object.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 
5 #include <kuroko/memory.h>
6 #include <kuroko/object.h>
7 #include <kuroko/value.h>
8 #include <kuroko/vm.h>
9 #include <kuroko/table.h>
10 #include <kuroko/threads.h>
11 
12 #include "private.h"
13 
14 #define ALLOCATE_OBJECT(type, objectType) \
15  (type*)allocateObject(sizeof(type), objectType)
16 
17 #ifndef KRK_DISABLE_THREADS
18 static volatile int _stringLock = 0;
19 static volatile int _objectLock = 0;
20 #endif
21 
22 static KrkObj * allocateObject(size_t size, KrkObjType type) {
23  KrkObj * object = (KrkObj*)krk_reallocate(NULL, 0, size);
24  memset(object,0,size);
25  object->type = type;
26 
27  _obtain_lock(_objectLock);
28  object->next = vm.objects;
29  krk_currentThread.scratchSpace[2] = OBJECT_VAL(object);
30  vm.objects = object;
31  _release_lock(_objectLock);
32 
33  object->hash = (uint32_t)((intptr_t)(object) >> 4 | ((intptr_t)object & 0xf) << 28);
34 
35  return object;
36 }
37 
38 size_t krk_codepointToBytes(krk_integer_type value, unsigned char * out) {
39  if (value > 0xFFFF) {
40  out[0] = (0xF0 | (value >> 18));
41  out[1] = (0x80 | ((value >> 12) & 0x3F));
42  out[2] = (0x80 | ((value >> 6) & 0x3F));
43  out[3] = (0x80 | ((value) & 0x3F));
44  return 4;
45  } else if (value > 0x7FF) {
46  out[0] = (0xE0 | (value >> 12));
47  out[1] = (0x80 | ((value >> 6) & 0x3F));
48  out[2] = (0x80 | (value & 0x3F));
49  return 3;
50  } else if (value > 0x7F) {
51  out[0] = (0xC0 | (value >> 6));
52  out[1] = (0x80 | (value & 0x3F));
53  return 2;
54  } else {
55  out[0] = (unsigned char)value;
56  return 1;
57  }
58 }
59 
60 #define UTF8_ACCEPT 0
61 #define UTF8_REJECT 1
62 
63 static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
64  static const int state_table[32] = {
65  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
66  1,1,1,1,1,1,1,1, /* 10xxxxxx */
67  2,2,2,2, /* 110xxxxx */
68  3,3, /* 1110xxxx */
69  4, /* 11110xxx */
70  1 /* 11111xxx */
71  };
72 
73  static const int mask_bytes[32] = {
74  0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
75  0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
76  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
77  0x1F,0x1F,0x1F,0x1F,
78  0x0F,0x0F,
79  0x07,
80  0x00
81  };
82 
83  static const int next[5] = {
84  0,
85  1,
86  0,
87  2,
88  3
89  };
90 
91  if (*state == UTF8_ACCEPT) {
92  if (byte >= 0x80 && byte <= 0xC1) goto _reject;
93  *codep = byte & mask_bytes[byte >> 3];
94  *state = state_table[byte >> 3];
95  } else if (*state > 0) {
96  if (byte < 0x80 || byte >= 0xC0) goto _reject;
97  *codep = (byte & 0x3F) | (*codep << 6);
98  *state = next[*state];
99  }
100  return *state;
101 _reject:
102  *state = UTF8_REJECT;
103  return *state;
104 }
105 
106 static int checkString(const char * chars, size_t length, size_t *codepointCount) {
107  uint32_t state = 0;
108  uint32_t codepoint = 0;
109  unsigned char * end = (unsigned char *)chars + length;
110  uint32_t maxCodepoint = 0;
111  for (unsigned char * c = (unsigned char *)chars; c < end; ++c) {
112  if (!decode(&state, &codepoint, *c)) {
113  if (codepoint > maxCodepoint) maxCodepoint = codepoint;
114  (*codepointCount)++;
115  } else if (state == UTF8_REJECT) {
116  _release_lock(_stringLock);
117  krk_runtimeError(vm.exceptions->valueError, "Invalid UTF-8 sequence in string.");
118  *codepointCount = 0;
119  return -1;
120  }
121  }
122  if (maxCodepoint > 0xFFFF) {
123  return KRK_OBJ_FLAGS_STRING_UCS4;
124  } else if (maxCodepoint > 0xFF) {
125  return KRK_OBJ_FLAGS_STRING_UCS2;
126  } else if (maxCodepoint > 0x7F) {
127  return KRK_OBJ_FLAGS_STRING_UCS1;
128  } else {
129  return KRK_OBJ_FLAGS_STRING_ASCII;
130  }
131 }
132 
133 #define GENREADY(size,type) \
134  static void _readyUCS ## size (KrkString * string) { \
135  uint32_t state = 0; \
136  uint32_t codepoint = 0; \
137  unsigned char * end = (unsigned char *)string->chars + string->length; \
138  string->codes = malloc(sizeof(type) * string->codesLength); \
139  type *outPtr = (type *)string->codes; \
140  for (unsigned char * c = (unsigned char *)string->chars; c < end; ++c) { \
141  if (!decode(&state, &codepoint, *c)) { \
142  *(outPtr++) = (type)codepoint; \
143  } else if (state == UTF8_REJECT) { \
144  state = 0; \
145  } \
146  } \
147  }
148 GENREADY(1,uint8_t)
149 GENREADY(2,uint16_t)
150 GENREADY(4,uint32_t)
151 #undef GENREADY
152 
153 void * krk_unicodeString(KrkString * string) {
154  if (string->codes) return string->codes;
155  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS1) _readyUCS1(string);
156  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS2) _readyUCS2(string);
157  else if ((string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) == KRK_OBJ_FLAGS_STRING_UCS4) _readyUCS4(string);
158  else krk_runtimeError(vm.exceptions->valueError, "Internal string error.");
159  return string->codes;
160 }
161 
162 uint32_t krk_unicodeCodepoint(KrkString * string, size_t index) {
163  krk_unicodeString(string);
164  switch (string->obj.flags & KRK_OBJ_FLAGS_STRING_MASK) {
165  case KRK_OBJ_FLAGS_STRING_ASCII:
166  case KRK_OBJ_FLAGS_STRING_UCS1: return ((uint8_t*)string->codes)[index];
167  case KRK_OBJ_FLAGS_STRING_UCS2: return ((uint16_t*)string->codes)[index];
168  case KRK_OBJ_FLAGS_STRING_UCS4: return ((uint32_t*)string->codes)[index];
169  default:
170  krk_runtimeError(vm.exceptions->valueError, "Internal string error.");
171  return 0;
172  }
173 }
174 
175 extern int krk_tableSetExact(KrkTable * table, KrkValue key, KrkValue value);
176 
177 static KrkString * allocateString(char * chars, size_t length, uint32_t hash) {
178  size_t codesLength = 0;
179  int type = checkString(chars,length,&codesLength);
180  if (type == -1) {
181  return krk_copyString("",0);
182  }
183  KrkString * string = ALLOCATE_OBJECT(KrkString, KRK_OBJ_STRING);
184  string->length = length;
185  string->chars = chars;
186  string->obj.hash = hash;
187  string->obj.flags |= KRK_OBJ_FLAGS_VALID_HASH | type;
188  string->codesLength = codesLength;
189  string->codes = NULL;
190  if (type == KRK_OBJ_FLAGS_STRING_ASCII) string->codes = string->chars;
191  krk_push(OBJECT_VAL(string));
192  krk_tableSetExact(&vm.strings, OBJECT_VAL(string), NONE_VAL());
193  krk_pop();
194  _release_lock(_stringLock);
195  return string;
196 }
197 
198 static uint32_t hashString(const char * key, size_t length) {
199  uint32_t hash = 0;
200  /* This is the so-called "sdbm" hash. It comes from a piece of
201  * public domain code from a clone of ndbm. */
202  for (size_t i = 0; i < length; ++i) {
203  krk_hash_advance(hash,key[i]);
204  }
205  return hash;
206 }
207 
208 KrkString * krk_takeString(char * chars, size_t length) {
209  uint32_t hash = hashString(chars, length);
210  _obtain_lock(_stringLock);
211  KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
212  if (interned != NULL) {
213  free(chars); /* This string isn't owned by us yet, so free, not KRK_FREE_ARRAY */
214  _release_lock(_stringLock);
215  return interned;
216  }
217 
218  /* Part of taking ownership of this string is that we track its memory usage */
219  krk_gcTakeBytes(chars, length + 1);
220  KrkString * result = allocateString(chars, length, hash);
221  return result;
222 }
223 
224 KrkString * krk_copyString(const char * chars, size_t length) {
225  uint32_t hash = hashString(chars, length);
226  _obtain_lock(_stringLock);
227  KrkString * interned = krk_tableFindString(&vm.strings, chars ? chars : "", length, hash);
228  if (interned) {
229  _release_lock(_stringLock);
230  return interned;
231  }
232  char * heapChars = KRK_ALLOCATE(char, length + 1);
233  memcpy(heapChars, chars ? chars : "", length);
234  heapChars[length] = '\0';
235  KrkString * result = allocateString(heapChars, length, hash);
236  if (result->chars != heapChars) free(heapChars);
237  _release_lock(_stringLock);
238  return result;
239 }
240 
241 KrkString * krk_takeStringVetted(char * chars, size_t length, size_t codesLength, KrkStringType type, uint32_t hash) {
242  _obtain_lock(_stringLock);
243  KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
244  if (interned != NULL) {
245  KRK_FREE_ARRAY(char, chars, length + 1);
246  _release_lock(_stringLock);
247  return interned;
248  }
249  KrkString * string = ALLOCATE_OBJECT(KrkString, KRK_OBJ_STRING);
250  string->length = length;
251  string->chars = chars;
252  string->obj.hash = hash;
253  string->obj.flags |= KRK_OBJ_FLAGS_VALID_HASH | type;
254  string->codesLength = codesLength;
255  string->codes = NULL;
256  if (type == KRK_OBJ_FLAGS_STRING_ASCII) string->codes = string->chars;
257  krk_push(OBJECT_VAL(string));
258  krk_tableSetExact(&vm.strings, OBJECT_VAL(string), NONE_VAL());
259  krk_pop();
260  _release_lock(_stringLock);
261  return string;
262 }
263 
265  KrkCodeObject * codeobject = ALLOCATE_OBJECT(KrkCodeObject, KRK_OBJ_CODEOBJECT);
266  codeobject->requiredArgs = 0;
267  codeobject->keywordArgs = 0;
268  codeobject->potentialPositionals = 0;
269  codeobject->upvalueCount = 0;
270  codeobject->name = NULL;
271  codeobject->docstring = NULL;
272  codeobject->localNameCount = 0;
273  codeobject->localNames = NULL;
275  krk_initValueArray(&codeobject->keywordArgNames);
276  krk_initChunk(&codeobject->chunk);
277  codeobject->jumpTargets = NONE_VAL();
278  return codeobject;
279 }
280 
281 KrkNative * krk_newNative(NativeFn function, const char * name, int type) {
282  KrkNative * native = ALLOCATE_OBJECT(KrkNative, KRK_OBJ_NATIVE);
283  native->function = function;
284  native->obj.flags = type;
285  native->name = name;
286  native->doc = NULL;
287  return native;
288 }
289 
291  KrkUpvalue ** upvalues = KRK_ALLOCATE(KrkUpvalue*, function->upvalueCount);
292  for (size_t i = 0; i < function->upvalueCount; ++i) {
293  upvalues[i] = NULL;
294  }
295  KrkClosure * closure = ALLOCATE_OBJECT(KrkClosure, KRK_OBJ_CLOSURE);
296  closure->function = function;
297  closure->upvalues = upvalues;
298  closure->upvalueCount = function->upvalueCount;
299  closure->annotations = krk_dict_of(0,NULL,0);
300  closure->globalsOwner = globals;
301  if (IS_INSTANCE(globals)) {
302  if (AS_INSTANCE(globals)->_class == vm.baseClasses->dictClass) {
303  closure->globalsTable = AS_DICT(globals);
304  } else {
305  closure->globalsTable = &AS_INSTANCE(globals)->fields;
306  }
307  } else {
308  fprintf(stderr, "Invalid globals context: %s\n", krk_typeName(globals));
309  abort();
310  }
311  krk_initTable(&closure->fields);
312  return closure;
313 }
314 
316  KrkUpvalue * upvalue = ALLOCATE_OBJECT(KrkUpvalue, KRK_OBJ_UPVALUE);
317  upvalue->location = slot;
318  upvalue->next = NULL;
319  upvalue->closed = NONE_VAL();
320  upvalue->owner = &krk_currentThread;
321  return upvalue;
322 }
323 
324 KrkClass * krk_newClass(KrkString * name, KrkClass * baseClass) {
325  KrkClass * _class = ALLOCATE_OBJECT(KrkClass, KRK_OBJ_CLASS);
326  _class->name = name;
327  _class->allocSize = sizeof(KrkInstance);
328  krk_initTable(&_class->methods);
329  krk_initTable(&_class->subclasses);
330 
331  if (baseClass) {
332  _class->base = baseClass;
333  _class->allocSize = baseClass->allocSize;
334  _class->_ongcscan = baseClass->_ongcscan;
335  _class->_ongcsweep = baseClass->_ongcsweep;
336 
337  krk_tableSet(&baseClass->subclasses, OBJECT_VAL(_class), NONE_VAL());
338  }
339 
340  return _class;
341 }
342 
344  KrkInstance * instance = (KrkInstance*)allocateObject(_class->allocSize, KRK_OBJ_INSTANCE);
345  instance->_class = _class;
346  krk_initTable(&instance->fields);
347  return instance;
348 }
349 
351  KrkBoundMethod * bound = ALLOCATE_OBJECT(KrkBoundMethod, KRK_OBJ_BOUND_METHOD);
352  bound->receiver = receiver;
353  bound->method = method;
354  return bound;
355 }
356 
357 KrkTuple * krk_newTuple(size_t length) {
358  KrkTuple * tuple = ALLOCATE_OBJECT(KrkTuple, KRK_OBJ_TUPLE);
359  krk_initValueArray(&tuple->values);
360  krk_push(OBJECT_VAL(tuple));
361  tuple->values.capacity = length;
362  tuple->values.values = KRK_GROW_ARRAY(KrkValue,NULL,0,length);
363  krk_pop();
364  return tuple;
365 }
366 
367 KrkBytes * krk_newBytes(size_t length, uint8_t * source) {
368  KrkBytes * bytes = ALLOCATE_OBJECT(KrkBytes, KRK_OBJ_BYTES);
369  bytes->length = length;
370  bytes->bytes = NULL;
371  krk_push(OBJECT_VAL(bytes));
372  bytes->bytes = KRK_ALLOCATE(uint8_t, length);
373  bytes->obj.hash = -1;
374  if (source) {
375  memcpy(bytes->bytes, source, length);
376  }
377  krk_pop();
378  return bytes;
379 }
380 
KrkValue krk_runtimeError(KrkClass *type, const char *fmt,...)
Produce and raise an exception with a formatted message.
Definition: exceptions.c:460
Functions for dealing with garbage collection and memory allocation.
void * krk_reallocate(void *ptr, size_t old, size_t new)
Resize an allocated heap object.
Definition: memory.c:157
void krk_gcTakeBytes(const void *ptr, size_t size)
Assume ownership of size bytes at ptr.
Definition: memory.c:149
Struct definitions for core object types.
KrkStringType
String compact storage type.
Definition: object.h:81
struct KrkInstance KrkInstance
An object of a class.
KrkObjType
Union tag for heap objects.
Definition: object.h:21
Internal header.
A function that has been attached to an object to serve as a method.
Definition: object.h:295
KrkBoundMethod * krk_newBoundMethod(KrkValue receiver, KrkObj *method)
Create a new bound method.
Definition: object.c:350
KrkValue receiver
Object to pass as implicit first argument.
Definition: object.h:297
KrkObj * method
Function to call.
Definition: object.h:298
Immutable sequence of bytes.
Definition: object.h:105
KrkObj obj
Base.
Definition: object.h:106
size_t length
Length of data in bytes.
Definition: object.h:107
uint8_t * bytes
Pointer to separately-stored bytes data.
Definition: object.h:108
KrkBytes * krk_newBytes(size_t length, uint8_t *source)
Create a new byte array.
Definition: object.c:367
void krk_initChunk(KrkChunk *chunk)
Initialize an opcode chunk.
Definition: chunk.c:7
Type object.
Definition: object.h:215
KrkCleanupCallback _ongcsweep
C function to call when the garbage collector is discarding an instance of this class.
Definition: object.h:224
KrkClass * krk_newClass(KrkString *name, KrkClass *base)
Create a new class object.
Definition: object.c:324
KrkCleanupCallback _ongcscan
C function to call when the garbage collector visits an instance of this class in the scan phase.
Definition: object.h:223
KrkString * name
Name of the class.
Definition: object.h:219
struct KrkClass * base
Pointer to base class implementation.
Definition: object.h:221
KrkTable subclasses
Set of classes that subclass this class.
Definition: object.h:225
size_t allocSize
Size to allocate when creating instances of this class.
Definition: object.h:222
KrkTable methods
General attributes table.
Definition: object.h:218
Function object.
Definition: object.h:195
KrkCodeObject * function
The codeobject containing the bytecode run when this function is called.
Definition: object.h:197
KrkValue globalsOwner
Owner of the globals table for this function.
Definition: object.h:202
size_t upvalueCount
Number of entries in upvalues.
Definition: object.h:199
KrkTable * globalsTable
Pointer to globals table with owner object.
Definition: object.h:203
KrkClosure * krk_newClosure(KrkCodeObject *function, KrkValue globals)
Create a new function object.
Definition: object.c:290
KrkValue annotations
Dictionary of type hints.
Definition: object.h:200
KrkUpvalue ** upvalues
Array of upvalues collected from the surrounding context when the closure was created.
Definition: object.h:198
KrkTable fields
Object attributes table.
Definition: object.h:201
Code object.
Definition: object.h:163
unsigned short potentialPositionals
Precalculated positional arguments for complex argument processing.
Definition: object.h:167
KrkChunk chunk
Bytecode data.
Definition: object.h:170
size_t upvalueCount
Number of upvalues this function collects as a closure.
Definition: object.h:169
KrkValueArray positionalArgNames
Array of names for positional arguments (and *args)
Definition: object.h:173
KrkValue jumpTargets
Possibly a set of jump targets...
Definition: object.h:182
KrkLocalEntry * localNames
Stores the names of local variables used in the function, for debugging.
Definition: object.h:177
unsigned short keywordArgs
Arity of keyword (default) arguments.
Definition: object.h:166
unsigned short requiredArgs
Arity of required (non-default) arguments.
Definition: object.h:165
KrkString * docstring
Docstring attached to the function.
Definition: object.h:172
KrkValueArray keywordArgNames
Array of names for keyword-only arguments (and **kwargs)
Definition: object.h:174
KrkCodeObject * krk_newCodeObject(void)
Create a new, uninitialized code object.
Definition: object.c:264
size_t localNameCount
Number of entries in localNames.
Definition: object.h:176
KrkString * name
Name of the function.
Definition: object.h:171
KrkValue krk_dict_of(int argc, const KrkValue argv[], int hasKw)
Create a dict object.
Definition: obj_dict.c:19
An object of a class.
Definition: object.h:281
KrkInstance * krk_newInstance(KrkClass *_class)
Create a new instance of the given class.
Definition: object.c:343
KrkClass * _class
Type.
Definition: object.h:283
KrkTable fields
Attributes table.
Definition: object.h:284
Managed binding to a C function.
Definition: object.h:309
const char * doc
Docstring to supply from __doc__.
Definition: object.h:313
const char * name
Name to use when repring.
Definition: object.h:312
KrkObj obj
Base.
Definition: object.h:310
NativeFn function
C function pointer.
Definition: object.h:311
KrkNative * krk_newNative(NativeFn function, const char *name, int type)
Create a native function binding object.
Definition: object.c:281
The most basic object type.
Definition: object.h:41
uint32_t hash
Cached hash value for table keys.
Definition: object.h:44
uint16_t flags
General object flags, mostly related to garbage collection.
Definition: object.h:43
Immutable sequence of Unicode codepoints.
Definition: object.h:93
uint32_t krk_unicodeCodepoint(KrkString *string, size_t index)
Obtain the codepoint at a given index in a string.
Definition: object.c:162
void * krk_unicodeString(KrkString *string)
Ensure that a codepoint representation of a string is available.
Definition: object.c:153
KrkObj obj
Base.
Definition: object.h:94
KrkString * krk_copyString(const char *chars, size_t length)
Obtain a string object representation of the given C string.
Definition: object.c:224
KrkString * krk_takeString(char *chars, size_t length)
Yield ownership of a C string to the GC and obtain a string object.
Definition: object.c:208
char * chars
UTF8 canonical data.
Definition: object.h:97
void * codes
Codepoint data.
Definition: object.h:98
KrkString * krk_takeStringVetted(char *chars, size_t length, size_t codesLength, KrkStringType type, uint32_t hash)
Like krk_takeString but for when the caller has already calculated code lengths, hash,...
Definition: object.c:241
size_t krk_codepointToBytes(krk_integer_type value, unsigned char *out)
Convert an integer codepoint to a UTF-8 byte representation.
Definition: object.c:38
Simple hash table of arbitrary keys to values.
Definition: table.h:28
void krk_initTable(KrkTable *table)
Initialize a hash table.
Definition: table.c:33
int krk_tableSet(KrkTable *table, KrkValue key, KrkValue value)
Assign a value to a key in a table.
Definition: table.c:148
struct KrkString * krk_tableFindString(KrkTable *table, const char *chars, size_t length, uint32_t hash)
Find a character sequence in the string interning table.
Definition: table.c:260
KrkValue scratchSpace[KRK_THREAD_SCRATCH_SIZE]
Definition: vm.h:169
Immutable sequence of arbitrary values.
Definition: object.h:323
KrkValueArray values
Stores the length, capacity, and actual values of the tuple.
Definition: object.h:325
KrkTuple * krk_newTuple(size_t length)
Create a new tuple.
Definition: object.c:357
Storage for values referenced from nested functions.
Definition: object.h:115
int location
Stack offset or -1 if closed.
Definition: object.h:117
KrkValue closed
Heap storage for closed value.
Definition: object.h:118
struct KrkThreadState * owner
The thread that owns the stack this upvalue belongs in.
Definition: object.h:120
struct KrkUpvalue * next
Invasive linked list pointer to next upvalue.
Definition: object.h:119
KrkUpvalue * krk_newUpvalue(int slot)
Create an upvalue slot.
Definition: object.c:315
size_t capacity
Definition: value.h:76
KrkValue * values
Definition: value.h:78
void krk_initValueArray(KrkValueArray *array)
Initialize a value array.
Definition: value.c:11
Stack reference or primative value.
const char * krk_typeName(KrkValue value)
Get the name of the type of a value.
Definition: vm.c:984
Implementation of a generic hash table.
Convience header for providing atomic operations to threads.
Definitions for primitive stack references.
Core API for the bytecode virtual machine.
krk_threadLocal KrkThreadState krk_currentThread
Thread-local VM state.
#define vm
Convenience macro for namespacing.
Definition: vm.h:257
KrkValue krk_pop(void)
Pop the top of the stack.
Definition: vm.c:131
void krk_push(KrkValue value)
Push a stack value.
Definition: vm.c:118