object.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 
5 #include <kuroko/memory.h>
6 #include <kuroko/object.h>
7 #include <kuroko/value.h>
8 #include <kuroko/vm.h>
9 #include <kuroko/table.h>
10 
11 #include "private.h"
12 
13 #define ALLOCATE_OBJECT(type, objectType) \
14  (type*)allocateObject(sizeof(type), objectType)
15 
16 #ifndef KRK_DISABLE_THREADS
17 static volatile int _stringLock = 0;
18 static volatile int _objectLock = 0;
19 #endif
20 
21 static KrkObj * allocateObject(size_t size, KrkObjType type) {
22  KrkObj * object = (KrkObj*)krk_reallocate(NULL, 0, size);
23  memset(object,0,size);
24  object->type = type;
25 
26  _obtain_lock(_objectLock);
27  object->next = vm.objects;
28  krk_currentThread.scratchSpace[2] = OBJECT_VAL(object);
29  vm.objects = object;
30  _release_lock(_objectLock);
31 
32  object->hash = (uint32_t)((intptr_t)(object) >> 4 | ((intptr_t)object & 0xf) << 28);
33 
34  return object;
35 }
36 
37 size_t krk_codepointToBytes(krk_integer_type value, unsigned char * out) {
38  if (value > 0xFFFF) {
39  out[0] = (0xF0 | (value >> 18));
40  out[1] = (0x80 | ((value >> 12) & 0x3F));
41  out[2] = (0x80 | ((value >> 6) & 0x3F));
42  out[3] = (0x80 | ((value) & 0x3F));
43  return 4;
44  } else if (value > 0x7FF) {
45  out[0] = (0xE0 | (value >> 12));
46  out[1] = (0x80 | ((value >> 6) & 0x3F));
47  out[2] = (0x80 | (value & 0x3F));
48  return 3;
49  } else if (value > 0x7F) {
50  out[0] = (0xC0 | (value >> 6));
51  out[1] = (0x80 | (value & 0x3F));
52  return 2;
53  } else {
54  out[0] = (unsigned char)value;
55  return 1;
56  }
57 }
58 
59 #define UTF8_ACCEPT 0
60 #define UTF8_REJECT 1
61 
62 static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
63  static const int state_table[32] = {
64  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
65  1,1,1,1,1,1,1,1, /* 10xxxxxx */
66  2,2,2,2, /* 110xxxxx */
67  3,3, /* 1110xxxx */
68  4, /* 11110xxx */
69  1 /* 11111xxx */
70  };
71 
72  static const int mask_bytes[32] = {
73  0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
74  0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
75  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
76  0x1F,0x1F,0x1F,0x1F,
77  0x0F,0x0F,
78  0x07,
79  0x00
80  };
81 
82  static const int next[5] = {
83  0,
84  1,
85  0,
86  2,
87  3
88  };
89 
90  if (*state == UTF8_ACCEPT) {
91  if (byte >= 0x80 && byte <= 0xC1) goto _reject;
92  *codep = byte & mask_bytes[byte >> 3];
93  *state = state_table[byte >> 3];
94  } else if (*state > 0) {
95  if (byte < 0x80 || byte >= 0xC0) goto _reject;
96  *codep = (byte & 0x3F) | (*codep << 6);
97  *state = next[*state];
98  }
99  return *state;
100 _reject:
101  *state = UTF8_REJECT;
102  return *state;
103 }
104 
105 static int checkString(const char * chars, size_t length, size_t *codepointCount) {
106  uint32_t state = 0;
107  uint32_t codepoint = 0;
108  unsigned char * end = (unsigned char *)chars + length;
109  uint32_t maxCodepoint = 0;
110  for (unsigned char * c = (unsigned char *)chars; c < end; ++c) {
111  if (!decode(&state, &codepoint, *c)) {
112  if (codepoint > maxCodepoint) maxCodepoint = codepoint;
113  (*codepointCount)++;
114  } else if (state == UTF8_REJECT) {
115  _release_lock(_stringLock);
116  krk_runtimeError(vm.exceptions->valueError, "Invalid UTF-8 sequence in string.");
117  *codepointCount = 0;
118  return -1;
119  }
120  }
121  if (maxCodepoint > 0xFFFF) {
122  return KRK_OBJ_FLAGS_STRING_UCS4;
123  } else if (maxCodepoint > 0xFF) {
124  return KRK_OBJ_FLAGS_STRING_UCS2;
125  } else if (maxCodepoint > 0x7F) {
126  return KRK_OBJ_FLAGS_STRING_UCS1;
127  } else {
128  return KRK_OBJ_FLAGS_STRING_ASCII;
129  }
130 }
131 
132 #define GENREADY(size,type) \
133  static void _readyUCS ## size (KrkString * string) { \
134  uint32_t state = 0; \
135  uint32_t codepoint = 0; \
136  unsigned char * end = (unsigned char *)string->chars + string->length; \
137  string->codes = malloc(sizeof(type) * string->codesLength); \
138  type *outPtr = (type *)string->codes; \
139  for (unsigned char * c = (unsigned char *)string->chars; c < end; ++c) { \
140  if (!decode(&state, &codepoint, *c)) { \
141  *(outPtr++) = (type)codepoint; \
142  } else if (state == UTF8_REJECT) { \
143  state = 0; \
144  } \
145  } \
146  }
147 GENREADY(1,uint8_t)
148 GENREADY(2,uint16_t)
149 GENREADY(4,uint32_t)
150 #undef GENREADY
151 
152 void * krk_unicodeString(KrkString * string) {
153  if (string->codes) return string->codes;
154  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS1) _readyUCS1(string);
155  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS2) _readyUCS2(string);
156  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS4) _readyUCS4(string);
157  else krk_runtimeError(vm.exceptions->valueError, "Internal string error.");
158  return string->codes;
159 }
160 
161 uint32_t krk_unicodeCodepoint(KrkString * string, size_t index) {
162  krk_unicodeString(string);
163  switch (string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) {
164  case KRK_OBJ_FLAGS_STRING_ASCII:
165  case KRK_OBJ_FLAGS_STRING_UCS1: return ((uint8_t*)string->codes)[index];
166  case KRK_OBJ_FLAGS_STRING_UCS2: return ((uint16_t*)string->codes)[index];
167  case KRK_OBJ_FLAGS_STRING_UCS4: return ((uint32_t*)string->codes)[index];
168  default:
169  krk_runtimeError(vm.exceptions->valueError, "Internal string error.");
170  return 0;
171  }
172 }
173 
174 static KrkString * allocateString(char * chars, size_t length, uint32_t hash) {
175  size_t codesLength = 0;
176  int type = checkString(chars,length,&codesLength);
177  if (type == -1) {
178  return krk_copyString("",0);
179  }
180  KrkString * string = ALLOCATE_OBJECT(KrkString, KRK_OBJ_STRING);
181  string->length = length;
182  string->chars = chars;
183  string->obj.hash = hash;
184  string->obj.flags |= KRK_OBJ_FLAGS_VALID_HASH | type;
185  string->codesLength = codesLength;
186  string->codes = NULL;
187  if (type == KRK_OBJ_FLAGS_STRING_ASCII) string->codes = string->chars;
188  krk_push(OBJECT_VAL(string));
189  krk_tableSet(&vm.strings, OBJECT_VAL(string), NONE_VAL());
190  krk_pop();
191  _release_lock(_stringLock);
192  return string;
193 }
194 
195 static uint32_t hashString(const char * key, size_t length) {
196  uint32_t hash = 0;
197  /* This is the so-called "sdbm" hash. It comes from a piece of
198  * public domain code from a clone of ndbm. */
199  for (size_t i = 0; i < length; ++i) {
200  krk_hash_advance(hash,key[i]);
201  }
202  return hash;
203 }
204 
205 KrkString * krk_takeString(char * chars, size_t length) {
206  uint32_t hash = hashString(chars, length);
207  _obtain_lock(_stringLock);
208  KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
209  if (interned != NULL) {
210  free(chars); /* This string isn't owned by us yet, so free, not FREE_ARRAY */
211  _release_lock(_stringLock);
212  return interned;
213  }
214 
215  /* Part of taking ownership of this string is that we track its memory usage */
216  krk_gcTakeBytes(chars, length + 1);
217  KrkString * result = allocateString(chars, length, hash);
218  return result;
219 }
220 
221 KrkString * krk_copyString(const char * chars, size_t length) {
222  uint32_t hash = hashString(chars, length);
223  _obtain_lock(_stringLock);
224  KrkString * interned = krk_tableFindString(&vm.strings, chars ? chars : "", length, hash);
225  if (interned) {
226  _release_lock(_stringLock);
227  return interned;
228  }
229  char * heapChars = ALLOCATE(char, length + 1);
230  memcpy(heapChars, chars ? chars : "", length);
231  heapChars[length] = '\0';
232  KrkString * result = allocateString(heapChars, length, hash);
233  if (result->chars != heapChars) free(heapChars);
234  _release_lock(_stringLock);
235  return result;
236 }
237 
238 KrkString * krk_takeStringVetted(char * chars, size_t length, size_t codesLength, KrkStringType type, uint32_t hash) {
239  _obtain_lock(_stringLock);
240  KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
241  if (interned != NULL) {
242  FREE_ARRAY(char, chars, length + 1);
243  _release_lock(_stringLock);
244  return interned;
245  }
246  KrkString * string = ALLOCATE_OBJECT(KrkString, KRK_OBJ_STRING);
247  string->length = length;
248  string->chars = chars;
249  string->obj.hash = hash;
250  string->obj.flags |= KRK_OBJ_FLAGS_VALID_HASH | type;
251  string->codesLength = codesLength;
252  string->codes = NULL;
253  if (type == KRK_OBJ_FLAGS_STRING_ASCII) string->codes = string->chars;
254  krk_push(OBJECT_VAL(string));
255  krk_tableSet(&vm.strings, OBJECT_VAL(string), NONE_VAL());
256  krk_pop();
257  _release_lock(_stringLock);
258  return string;
259 }
260 
262  KrkCodeObject * codeobject = ALLOCATE_OBJECT(KrkCodeObject, KRK_OBJ_CODEOBJECT);
263  codeobject->requiredArgs = 0;
264  codeobject->keywordArgs = 0;
265  codeobject->potentialPositionals = 0;
266  codeobject->upvalueCount = 0;
267  codeobject->name = NULL;
268  codeobject->docstring = NULL;
269  codeobject->localNameCount = 0;
270  codeobject->localNames = NULL;
272  krk_initValueArray(&codeobject->keywordArgNames);
273  krk_initChunk(&codeobject->chunk);
274  return codeobject;
275 }
276 
277 KrkNative * krk_newNative(NativeFn function, const char * name, int type) {
278  KrkNative * native = ALLOCATE_OBJECT(KrkNative, KRK_OBJ_NATIVE);
279  native->function = function;
280  native->obj.flags = type;
281  native->name = name;
282  native->doc = NULL;
283  return native;
284 }
285 
287  KrkUpvalue ** upvalues = ALLOCATE(KrkUpvalue*, function->upvalueCount);
288  for (size_t i = 0; i < function->upvalueCount; ++i) {
289  upvalues[i] = NULL;
290  }
291  KrkClosure * closure = ALLOCATE_OBJECT(KrkClosure, KRK_OBJ_CLOSURE);
292  closure->function = function;
293  closure->upvalues = upvalues;
294  closure->upvalueCount = function->upvalueCount;
295  closure->annotations = krk_dict_of(0,NULL,0);
296  closure->globalsOwner = globals;
297  if (IS_INSTANCE(globals)) {
298  if (AS_INSTANCE(globals)->_class == vm.baseClasses->dictClass) {
299  closure->globalsTable = AS_DICT(globals);
300  } else {
301  closure->globalsTable = &AS_INSTANCE(globals)->fields;
302  }
303  } else {
304  fprintf(stderr, "Invalid globals context: %s\n", krk_typeName(globals));
305  abort();
306  }
307  krk_initTable(&closure->fields);
308  return closure;
309 }
310 
312  KrkUpvalue * upvalue = ALLOCATE_OBJECT(KrkUpvalue, KRK_OBJ_UPVALUE);
313  upvalue->location = slot;
314  upvalue->next = NULL;
315  upvalue->closed = NONE_VAL();
316  upvalue->owner = &krk_currentThread;
317  return upvalue;
318 }
319 
320 KrkClass * krk_newClass(KrkString * name, KrkClass * baseClass) {
321  KrkClass * _class = ALLOCATE_OBJECT(KrkClass, KRK_OBJ_CLASS);
322  _class->name = name;
323  _class->allocSize = sizeof(KrkInstance);
324  krk_initTable(&_class->methods);
325  krk_initTable(&_class->subclasses);
326 
327  if (baseClass) {
328  _class->base = baseClass;
329  _class->allocSize = baseClass->allocSize;
330  _class->_ongcscan = baseClass->_ongcscan;
331  _class->_ongcsweep = baseClass->_ongcsweep;
332 
333  krk_tableSet(&baseClass->subclasses, OBJECT_VAL(_class), NONE_VAL());
334  }
335 
336  return _class;
337 }
338 
340  KrkInstance * instance = (KrkInstance*)allocateObject(_class->allocSize, KRK_OBJ_INSTANCE);
341  instance->_class = _class;
342  krk_initTable(&instance->fields);
343  return instance;
344 }
345 
347  KrkBoundMethod * bound = ALLOCATE_OBJECT(KrkBoundMethod, KRK_OBJ_BOUND_METHOD);
348  bound->receiver = receiver;
349  bound->method = method;
350  return bound;
351 }
352 
353 KrkTuple * krk_newTuple(size_t length) {
354  KrkTuple * tuple = ALLOCATE_OBJECT(KrkTuple, KRK_OBJ_TUPLE);
355  krk_initValueArray(&tuple->values);
356  krk_push(OBJECT_VAL(tuple));
357  tuple->values.capacity = length;
358  tuple->values.values = GROW_ARRAY(KrkValue,NULL,0,length);
359  krk_pop();
360  return tuple;
361 }
362 
363 KrkBytes * krk_newBytes(size_t length, uint8_t * source) {
364  KrkBytes * bytes = ALLOCATE_OBJECT(KrkBytes, KRK_OBJ_BYTES);
365  bytes->length = length;
366  bytes->bytes = NULL;
367  krk_push(OBJECT_VAL(bytes));
368  bytes->bytes = ALLOCATE(uint8_t, length);
369  bytes->obj.hash = -1;
370  if (source) {
371  memcpy(bytes->bytes, source, length);
372  }
373  krk_pop();
374  return bytes;
375 }
376 
KrkValue krk_runtimeError(KrkClass *type, const char *fmt,...)
Produce and raise an exception with a formatted message.
Definition: exceptions.c:445
Functions for dealing with garbage collection and memory allocation.
void * krk_reallocate(void *ptr, size_t old, size_t new)
Resize an allocated heap object.
Definition: memory.c:154
void krk_gcTakeBytes(const void *ptr, size_t size)
Assume ownership of size bytes at ptr.
Definition: memory.c:146
Struct definitions for core object types.
KrkStringType
String compact storage type.
Definition: object.h:81
struct KrkInstance KrkInstance
An object of a class.
KrkObjType
Union tag for heap objects.
Definition: object.h:21
Internal header.
A function that has been attached to an object to serve as a method.
Definition: object.h:269
KrkBoundMethod * krk_newBoundMethod(KrkValue receiver, KrkObj *method)
Create a new bound method.
Definition: object.c:346
KrkValue receiver
Object to pass as implicit first argument.
Definition: object.h:271
KrkObj * method
Function to call.
Definition: object.h:272
Immutable sequence of bytes.
Definition: object.h:105
KrkObj obj
Base.
Definition: object.h:106
size_t length
Length of data in bytes.
Definition: object.h:107
uint8_t * bytes
Pointer to separately-stored bytes data.
Definition: object.h:108
KrkBytes * krk_newBytes(size_t length, uint8_t *source)
Create a new byte array.
Definition: object.c:363
void krk_initChunk(KrkChunk *chunk)
Initialize an opcode chunk.
Definition: chunk.c:7
Type object.
Definition: object.h:189
KrkCleanupCallback _ongcsweep
C function to call when the garbage collector is discarding an instance of this class.
Definition: object.h:198
KrkClass * krk_newClass(KrkString *name, KrkClass *base)
Create a new class object.
Definition: object.c:320
KrkCleanupCallback _ongcscan
C function to call when the garbage collector visits an instance of this class in the scan phase.
Definition: object.h:197
KrkString * name
Name of the class.
Definition: object.h:193
struct KrkClass * base
Pointer to base class implementation.
Definition: object.h:195
KrkTable subclasses
Set of classes that subclass this class.
Definition: object.h:199
size_t allocSize
Size to allocate when creating instances of this class.
Definition: object.h:196
KrkTable methods
General attributes table.
Definition: object.h:192
Function object.
Definition: object.h:169
KrkCodeObject * function
The codeobject containing the bytecode run when this function is called.
Definition: object.h:171
KrkValue globalsOwner
Owner of the globals table for this function.
Definition: object.h:176
size_t upvalueCount
Number of entries in upvalues.
Definition: object.h:173
KrkTable * globalsTable
Pointer to globals table with owner object.
Definition: object.h:177
KrkClosure * krk_newClosure(KrkCodeObject *function, KrkValue globals)
Create a new function object.
Definition: object.c:286
KrkValue annotations
Dictionary of type hints.
Definition: object.h:174
KrkUpvalue ** upvalues
Array of upvalues collected from the surrounding context when the closure was created.
Definition: object.h:172
KrkTable fields
Object attributes table.
Definition: object.h:175
Code object.
Definition: object.h:144
unsigned short potentialPositionals
Precalculated positional arguments for complex argument processing.
Definition: object.h:148
KrkChunk chunk
Bytecode data.
Definition: object.h:151
size_t upvalueCount
Number of upvalues this function collects as a closure.
Definition: object.h:150
KrkValueArray positionalArgNames
Array of names for positional arguments (and *args)
Definition: object.h:154
KrkLocalEntry * localNames
Stores the names of local variables used in the function, for debugging.
Definition: object.h:158
unsigned short keywordArgs
Arity of keyword (default) arguments.
Definition: object.h:147
unsigned short requiredArgs
Arity of required (non-default) arguments.
Definition: object.h:146
KrkString * docstring
Docstring attached to the function.
Definition: object.h:153
KrkValueArray keywordArgNames
Array of names for keyword-only arguments (and **kwargs)
Definition: object.h:155
KrkCodeObject * krk_newCodeObject(void)
Create a new, uninitialized code object.
Definition: object.c:261
size_t localNameCount
Number of entries in localNames.
Definition: object.h:157
KrkString * name
Name of the function.
Definition: object.h:152
KrkValue krk_dict_of(int argc, const KrkValue argv[], int hasKw)
Create a dict object.
Definition: obj_dict.c:11
An object of a class.
Definition: object.h:255
KrkInstance * krk_newInstance(KrkClass *_class)
Create a new instance of the given class.
Definition: object.c:339
KrkClass * _class
Type.
Definition: object.h:257
KrkTable fields
Attributes table.
Definition: object.h:258
Managed binding to a C function.
Definition: object.h:283
const char * doc
Docstring to supply from __doc__.
Definition: object.h:287
const char * name
Name to use when repring.
Definition: object.h:286
KrkObj obj
Base.
Definition: object.h:284
NativeFn function
C function pointer.
Definition: object.h:285
KrkNative * krk_newNative(NativeFn function, const char *name, int type)
Create a native function binding object.
Definition: object.c:277
The most basic object type.
Definition: object.h:41
uint32_t hash
Cached hash value for table keys.
Definition: object.h:44
uint16_t flags
General object flags, mostly related to garbage collection.
Definition: object.h:43
Immutable sequence of Unicode codepoints.
Definition: object.h:93
uint32_t krk_unicodeCodepoint(KrkString *string, size_t index)
Obtain the codepoint at a given index in a string.
Definition: object.c:161
void * krk_unicodeString(KrkString *string)
Ensure that a codepoint representation of a string is available.
Definition: object.c:152
KrkObj obj
Base.
Definition: object.h:94
KrkString * krk_copyString(const char *chars, size_t length)
Obtain a string object representation of the given C string.
Definition: object.c:221
KrkString * krk_takeString(char *chars, size_t length)
Yield ownership of a C string to the GC and obtain a string object.
Definition: object.c:205
char * chars
UTF8 canonical data.
Definition: object.h:97
void * codes
Codepoint data.
Definition: object.h:98
KrkString * krk_takeStringVetted(char *chars, size_t length, size_t codesLength, KrkStringType type, uint32_t hash)
Like krk_takeString but for when the caller has already calculated code lengths, hash,...
Definition: object.c:238
size_t krk_codepointToBytes(krk_integer_type value, unsigned char *out)
Convert an integer codepoint to a UTF-8 byte representation.
Definition: object.c:37
void krk_initTable(KrkTable *table)
Initialize a hash table.
Definition: table.c:14
int krk_tableSet(KrkTable *table, KrkValue key, KrkValue value)
Assign a value to a key in a table.
Definition: table.c:145
struct KrkString * krk_tableFindString(KrkTable *table, const char *chars, size_t length, uint32_t hash)
Find a character sequence in the string interning table.
Definition: table.c:232
KrkValue scratchSpace[KRK_THREAD_SCRATCH_SIZE]
Definition: vm.h:177
Immutable sequence of arbitrary values.
Definition: object.h:297
KrkValueArray values
Stores the length, capacity, and actual values of the tuple.
Definition: object.h:299
KrkTuple * krk_newTuple(size_t length)
Create a new tuple.
Definition: object.c:353
Storage for values referenced from nested functions.
Definition: object.h:115
int location
Stack offset or -1 if closed.
Definition: object.h:117
KrkValue closed
Heap storage for closed value.
Definition: object.h:118
struct KrkThreadState * owner
The thread that owns the stack this upvalue belongs in.
Definition: object.h:120
struct KrkUpvalue * next
Invasive linked list pointer to next upvalue.
Definition: object.h:119
KrkUpvalue * krk_newUpvalue(int slot)
Create an upvalue slot.
Definition: object.c:311
size_t capacity
Definition: value.h:71
KrkValue * values
Definition: value.h:73
void krk_initValueArray(KrkValueArray *array)
Initialize a value array.
Definition: value.c:11
Stack reference or primative value.
const char * krk_typeName(KrkValue value)
Get the name of the type of a value.
Definition: vm.c:1023
Implementation of a generic hash table.
Definitions for primitive stack references.
Core API for the bytecode virtual machine.
#define vm
Convenience macro for namespacing.
Definition: vm.h:267
KrkValue krk_pop(void)
Pop the top of the stack.
Definition: vm.c:170
threadLocal KrkThreadState krk_currentThread
Thread-local VM state.
void krk_push(KrkValue value)
Push a stack value.
Definition: vm.c:157