Single-pass bytecode compiler. More...
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <kuroko/kuroko.h>
#include <kuroko/compiler.h>
#include <kuroko/memory.h>
#include <kuroko/scanner.h>
#include <kuroko/object.h>
#include <kuroko/debug.h>
#include <kuroko/vm.h>
#include <kuroko/util.h>
#include "private.h"
#include "opcode_enum.h"
Go to the source code of this file.
Data Structures | |
struct | Parser |
Token parser state. More... | |
struct | ParseRule |
Parse rule table entry. More... | |
struct | Local |
Local variable reference. More... | |
struct | Upvalue |
Closure upvalue reference. More... | |
struct | IndexWithNext |
Linked list of indices. More... | |
struct | LoopExit |
Tracks 'break' and 'continue' statements. More... | |
struct | Compiler |
Subcompiler state. More... | |
struct | ClassCompiler |
Class compilation context. More... | |
struct | ChunkRecorder |
Bytecode emitter backtracking breadcrumb. More... | |
struct | RewindState |
Compiler emit and parse state prior to this expression. More... | |
struct | GlobalState |
Macros | |
#define | OPTIONS_FLAG_COMPILE_TIME_BUILTINS (1 << 0) |
#define | OPTIONS_FLAG_NO_IMPLICIT_SELF (1 << 1) |
#define | currentChunk() (&state->current->codeobject->chunk) |
#define | EMIT_OPERAND_OP(opc, arg) |
#define | WRITE(s) |
#define | codeobject_from_chunk_pointer(ptr) ((KrkCodeObject*)((char *)(ptr) - offsetof(KrkCodeObject, chunk))) |
#define | raiseSyntaxError(token, ...) do { if (state->parser.hadError) break; krk_runtimeError(vm.exceptions->syntaxError, __VA_ARGS__); finishError(state,token); } while (0) |
#define | error(...) raiseSyntaxError(&state->parser.previous, __VA_ARGS__) |
#define | errorAtCurrent(...) raiseSyntaxError(&state->parser.current, __VA_ARGS__) |
#define | advance() _advance(state) |
#define | skipToEnd() _skipToEnd(state) |
#define | startEatingWhitespace() _startEatingWhitespace(state) |
#define | stopEatingWhitespace() _stopEatingWhitespace(state) |
#define | consume(...) _consume(state,__VA_ARGS__) |
#define | check(t) _check(state,t) |
#define | match(t) _match(state,t) |
#define | syntheticToken(t) _syntheticToken(state,t) |
#define | emitByte(b) _emitByte(state,b) |
#define | emitBytes(a, b) _emitBytes(state,a,b) |
#define | emitConstant(v) _emitConstant(state,v) |
#define | emitJump(o) _emitJump(state,o) |
#define | patchJump(o) _patchJump(state,o) |
#define | ATTACH_PROPERTY(propName, how, propValue) |
#define | EXIT_JUMP_MAX 64 |
#define | PUSH_CHAR(c) krk_pushStringBuilder(&sb, c) |
#define | PUSH_HEX(n, type) _pushHex(state, isBytes, &sb, c, end, n, type) |
#define | OP_NONE_LONG -1 |
#define | DO_VARIABLE(opset, opget, opdel) |
#define | RULE(token, a, b, c) [TOKEN_ ## token] = {a, b, c} |
Typedefs | |
typedef void(* | ParseFn) (struct GlobalState *, int, struct RewindState *) |
Subexpression parser function. More... | |
typedef struct Compiler | Compiler |
Subcompiler state. More... | |
typedef struct ClassCompiler | ClassCompiler |
Class compilation context. More... | |
typedef struct ChunkRecorder | ChunkRecorder |
Bytecode emitter backtracking breadcrumb. More... | |
typedef struct RewindState | RewindState |
Compiler emit and parse state prior to this expression. More... | |
typedef struct GlobalState | GlobalState |
Enumerations | |
enum | Precedence { PREC_NONE , PREC_ASSIGNMENT , PREC_COMMA , PREC_MUST_ASSIGN , PREC_CAN_ASSIGN , PREC_DEL_TARGET , PREC_TERNARY , PREC_OR , PREC_AND , PREC_NOT , PREC_COMPARISON , PREC_BITOR , PREC_BITXOR , PREC_BITAND , PREC_SHIFT , PREC_SUM , PREC_TERM , PREC_FACTOR , PREC_EXPONENT , PREC_PRIMARY } |
Parse precedence ladder. More... | |
enum | ExpressionType { EXPR_NORMAL , EXPR_CAN_ASSIGN , EXPR_ASSIGN_TARGET , EXPR_DEL_TARGET , EXPR_METHOD_CALL , EXPR_CLASS_PARAMETERS } |
Expression type. More... | |
enum | FunctionType { TYPE_FUNCTION , TYPE_MODULE , TYPE_METHOD , TYPE_INIT , TYPE_LAMBDA , TYPE_STATIC , TYPE_CLASS , TYPE_CLASSMETHOD , TYPE_COROUTINE , TYPE_COROUTINE_METHOD } |
Function compilation type. More... | |
Functions | |
_noexport void | _createAndBind_compilerClass (void) |
KrkCodeObject * | krk_compile (const char *src, const char *fileName) |
Compile a source string to bytecode. More... | |
Variables | |
ParseRule | krk_parseRules [] |
Parse rules table. More... | |
Detailed Description
Single-pass bytecode compiler.
Kuroko's compiler is still very reminiscent of its CLox roots and uses the same parsing strategy, so if you have read the third chapter of Bob's "Crafting Interpreters", this should should be fairly easy to understand. One important thing that Kuroko's compiler does differently is implement rewinding, which allows for conservative reparsing and recompilation of subexpressions that have already been parsed. This is used to compile ternaries, multiple assignments, and the expression value in generator and comprehension expressions.
Kuroko has several levels of parse precedence, including three different levels indicative of assignments. Most expressions start from the TERNARY or COMMA level, but top-level expression statements and assignment values start at the highest level of ASSIGNMENT, which allows for multiple assignment targets. Expressions parsed from the MUST_ASSIGN level are assignment targets in a multiple assignment. Expression parsed from the CAN_ASSIGN level are single assignment targets.
String compilation manages escape sequence processing, so string tokens received from the scanner are not directly converted to string constants. F-strings are compiled as expressions generating a regular string.
Kuroko's bytecode supports variable operand sizes using paired "short" and "long" opcodes. To ease the output of these opcodes, the EMIT_OPERAND_OP macro will generate the appropriate opcode given an operand.
Definition in file compiler.c.
Macro Definition Documentation
◆ ATTACH_PROPERTY
#define ATTACH_PROPERTY | ( | propName, | |
how, | |||
propValue | |||
) |
Definition at line 1772 of file compiler.c.
◆ DO_VARIABLE
#define DO_VARIABLE | ( | opset, | |
opget, | |||
opdel | |||
) |
Definition at line 3125 of file compiler.c.
◆ EMIT_OPERAND_OP
#define EMIT_OPERAND_OP | ( | opc, | |
arg | |||
) |
Definition at line 300 of file compiler.c.
◆ WRITE
#define WRITE | ( | s | ) |
Typedef Documentation
◆ ChunkRecorder
typedef struct ChunkRecorder ChunkRecorder |
Bytecode emitter backtracking breadcrumb.
Records the state of the bytecode emitter so it may be rewound when an expression needs to be re-parsed. Allows us to implement backtracking to compile the inner expression of a comprehension, the left hand side of a ternary, or the targets list of a complex assignment.
We rewind the bytecode, line mapping, and constants table, so that we don't keep around duplicate constants or debug info.
◆ ClassCompiler
typedef struct ClassCompiler ClassCompiler |
Class compilation context.
Allows for things like super to be bound correctly. Also allows us to establish qualified names for functions and nested class definitions.
◆ Compiler
Subcompiler state.
Each function is compiled in its own context, with its own codeobject, locals, type, scopes, etc.
◆ ParseFn
typedef void(* ParseFn) (struct GlobalState *, int, struct RewindState *) |
Subexpression parser function.
Used by the parse rule table for infix and prefix expression parser functions. The argument passed is the ExpressionType to compile the expression as.
Definition at line 116 of file compiler.c.
◆ RewindState
typedef struct RewindState RewindState |
Compiler emit and parse state prior to this expression.
Used to rewind the parser for ternary and comma expressions.
Enumeration Type Documentation
◆ ExpressionType
enum ExpressionType |
Expression type.
Determines how an expression should be compiled.
Definition at line 97 of file compiler.c.
◆ FunctionType
enum FunctionType |
Function compilation type.
Determines the context of the function being compiled, as different kinds of functions have different semantics.
Definition at line 160 of file compiler.c.
◆ Precedence
enum Precedence |
Parse precedence ladder.
Lower values (values listed first) bind more loosely than higher values (values listed later).
Definition at line 69 of file compiler.c.
Function Documentation
◆ krk_compile()
KrkCodeObject* krk_compile | ( | const char * | src, |
const char * | fileName | ||
) |
Compile a source string to bytecode.
Compile a string to a code object.
Parses source code and compiles it to bytecode.
Raises SyntaxError on error and returns NULL if the source failed to compile.
- Parameters
-
src Code string to compile. Should be a module body or repl line. fileName Used to describe where the source string came from in error messages.
- Returns
- A compiled code object, or NULL on error.
- Exceptions
-
SyntaxError if src
could not be compiled.
Definition at line 4129 of file compiler.c.
Variable Documentation
◆ krk_parseRules
ParseRule krk_parseRules[] |
Parse rules table.
Each parse rule consists of a token, an prefix rule, an infix rule, and a parse precedence. We also stringify the token name for use in debugging and error messages. The rule table is indexed by token types and is constructed with a designated initializer, so the order in this file is not important; it's probably best to order either visually or syntactically related elements together.
Definition at line 4189 of file compiler.c.