compiler.c File Reference

Single-pass bytecode compiler. More...

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <kuroko/kuroko.h>
#include <kuroko/compiler.h>
#include <kuroko/memory.h>
#include <kuroko/scanner.h>
#include <kuroko/object.h>
#include <kuroko/debug.h>
#include <kuroko/vm.h>
#include <kuroko/util.h>
#include "private.h"
#include "opcode_enum.h"
Include dependency graph for compiler.c:

Go to the source code of this file.

Data Structures

struct  Parser
 Token parser state. More...
 
struct  ParseRule
 Parse rule table entry. More...
 
struct  Local
 Local variable reference. More...
 
struct  Upvalue
 Closure upvalue reference. More...
 
struct  IndexWithNext
 Linked list of indices. More...
 
struct  LoopExit
 Tracks 'break' and 'continue' statements. More...
 
struct  Compiler
 Subcompiler state. More...
 
struct  ClassCompiler
 Class compilation context. More...
 
struct  ChunkRecorder
 Bytecode emitter backtracking breadcrumb. More...
 
struct  RewindState
 Compiler emit and parse state prior to this expression. More...
 
struct  GlobalState
 

Macros

#define OPTIONS_FLAG_COMPILE_TIME_BUILTINS   (1 << 0)
 
#define OPTIONS_FLAG_NO_IMPLICIT_SELF   (1 << 1)
 
#define currentChunk()   (&state->current->codeobject->chunk)
 
#define EMIT_OPERAND_OP(opc, arg)
 
#define WRITE(s)
 
#define codeobject_from_chunk_pointer(ptr)   ((KrkCodeObject*)((char *)(ptr) - offsetof(KrkCodeObject, chunk)))
 
#define raiseSyntaxError(token, ...)   do { if (state->parser.hadError) break; krk_runtimeError(vm.exceptions->syntaxError, __VA_ARGS__); finishError(state,token); } while (0)
 
#define error(...)   raiseSyntaxError(&state->parser.previous, __VA_ARGS__)
 
#define errorAtCurrent(...)   raiseSyntaxError(&state->parser.current, __VA_ARGS__)
 
#define advance()   _advance(state)
 
#define skipToEnd()   _skipToEnd(state)
 
#define startEatingWhitespace()   _startEatingWhitespace(state)
 
#define stopEatingWhitespace()   _stopEatingWhitespace(state)
 
#define consume(...)   _consume(state,__VA_ARGS__)
 
#define check(t)   _check(state,t)
 
#define match(t)   _match(state,t)
 
#define syntheticToken(t)   _syntheticToken(state,t)
 
#define emitByte(b)   _emitByte(state,b)
 
#define emitBytes(a, b)   _emitBytes(state,a,b)
 
#define emitConstant(v)   _emitConstant(state,v)
 
#define emitJump(o)   _emitJump(state,o)
 
#define patchJump(o)   _patchJump(state,o)
 
#define ATTACH_PROPERTY(propName, how, propValue)
 
#define EXIT_JUMP_MAX   64
 
#define PUSH_CHAR(c)   krk_pushStringBuilder(&sb, c)
 
#define PUSH_HEX(n, type)   _pushHex(state, isBytes, &sb, c, end, n, type)
 
#define OP_NONE_LONG   -1
 
#define DO_VARIABLE(opset, opget, opdel)
 
#define RULE(token, a, b, c)   [TOKEN_ ## token] = {a, b, c}
 

Typedefs

typedef void(* ParseFn) (struct GlobalState *, int, struct RewindState *)
 Subexpression parser function. More...
 
typedef struct Compiler Compiler
 Subcompiler state. More...
 
typedef struct ClassCompiler ClassCompiler
 Class compilation context. More...
 
typedef struct ChunkRecorder ChunkRecorder
 Bytecode emitter backtracking breadcrumb. More...
 
typedef struct RewindState RewindState
 Compiler emit and parse state prior to this expression. More...
 
typedef struct GlobalState GlobalState
 

Enumerations

enum  Precedence {
  PREC_NONE , PREC_ASSIGNMENT , PREC_COMMA , PREC_MUST_ASSIGN ,
  PREC_CAN_ASSIGN , PREC_DEL_TARGET , PREC_TERNARY , PREC_OR ,
  PREC_AND , PREC_NOT , PREC_COMPARISON , PREC_BITOR ,
  PREC_BITXOR , PREC_BITAND , PREC_SHIFT , PREC_SUM ,
  PREC_TERM , PREC_FACTOR , PREC_EXPONENT , PREC_PRIMARY
}
 Parse precedence ladder. More...
 
enum  ExpressionType {
  EXPR_NORMAL , EXPR_CAN_ASSIGN , EXPR_ASSIGN_TARGET , EXPR_DEL_TARGET ,
  EXPR_METHOD_CALL , EXPR_CLASS_PARAMETERS
}
 Expression type. More...
 
enum  FunctionType {
  TYPE_FUNCTION , TYPE_MODULE , TYPE_METHOD , TYPE_INIT ,
  TYPE_LAMBDA , TYPE_STATIC , TYPE_CLASS , TYPE_CLASSMETHOD ,
  TYPE_COROUTINE , TYPE_COROUTINE_METHOD
}
 Function compilation type. More...
 

Functions

_noexport void _createAndBind_compilerClass (void)
 
KrkCodeObjectkrk_compile (const char *src, const char *fileName)
 Compile a source string to bytecode. More...
 

Variables

ParseRule krk_parseRules []
 Parse rules table. More...
 

Detailed Description

Single-pass bytecode compiler.

Kuroko's compiler is still very reminiscent of its CLox roots and uses the same parsing strategy, so if you have read the third chapter of Bob's "Crafting Interpreters", this should should be fairly easy to understand. One important thing that Kuroko's compiler does differently is implement rewinding, which allows for conservative reparsing and recompilation of subexpressions that have already been parsed. This is used to compile ternaries, multiple assignments, and the expression value in generator and comprehension expressions.

Kuroko has several levels of parse precedence, including three different levels indicative of assignments. Most expressions start from the TERNARY or COMMA level, but top-level expression statements and assignment values start at the highest level of ASSIGNMENT, which allows for multiple assignment targets. Expressions parsed from the MUST_ASSIGN level are assignment targets in a multiple assignment. Expression parsed from the CAN_ASSIGN level are single assignment targets.

String compilation manages escape sequence processing, so string tokens received from the scanner are not directly converted to string constants. F-strings are compiled as expressions generating a regular string.

Kuroko's bytecode supports variable operand sizes using paired "short" and "long" opcodes. To ease the output of these opcodes, the EMIT_OPERAND_OP macro will generate the appropriate opcode given an operand.

Definition in file compiler.c.

Macro Definition Documentation

◆ ATTACH_PROPERTY

#define ATTACH_PROPERTY (   propName,
  how,
  propValue 
)
Value:
do { \
KrkToken val_tok = syntheticToken(propValue); \
size_t val_ind = nonidentifierTokenConstant(state, &val_tok); \
EMIT_OPERAND_OP(how, val_ind); \
KrkToken name_tok = syntheticToken(propName); \
size_t name_ind = identifierConstant(state, &name_tok); \
EMIT_OPERAND_OP(OP_SET_NAME, name_ind); \
emitByte(OP_POP); \
} while (0)

Definition at line 1772 of file compiler.c.

◆ DO_VARIABLE

#define DO_VARIABLE (   opset,
  opget,
  opdel 
)
Value:
do { \
if (exprType == EXPR_ASSIGN_TARGET) { \
if (matchComplexEnd(state)) { \
EMIT_OPERAND_OP(opset, arg); \
break; \
} \
exprType = EXPR_NORMAL; \
} \
if (exprType == EXPR_CAN_ASSIGN && match(TOKEN_EQUAL)) { \
parsePrecedence(state, PREC_ASSIGNMENT); \
EMIT_OPERAND_OP(opset, arg); \
} else if (exprType == EXPR_CAN_ASSIGN && matchAssignment(state)) { \
EMIT_OPERAND_OP(opget, arg); \
assignmentValue(state); \
EMIT_OPERAND_OP(opset, arg); \
} else if (exprType == EXPR_DEL_TARGET && checkEndOfDel(state)) {\
if (opdel == OP_NONE) { emitByte(OP_NONE); EMIT_OPERAND_OP(opset, arg); } \
else { EMIT_OPERAND_OP(opdel, arg); } \
} else { \
EMIT_OPERAND_OP(opget, arg); \
} } while (0)
@ EXPR_NORMAL
Definition: compiler.c:98
@ EXPR_ASSIGN_TARGET
Definition: compiler.c:100
@ EXPR_DEL_TARGET
Definition: compiler.c:101
@ EXPR_CAN_ASSIGN
Definition: compiler.c:99
@ PREC_ASSIGNMENT
Definition: compiler.c:71

Definition at line 3125 of file compiler.c.

◆ EMIT_OPERAND_OP

#define EMIT_OPERAND_OP (   opc,
  arg 
)
Value:
do { if (arg < 256) { emitBytes(opc, arg); } \
else { emitBytes(opc ## _LONG, arg >> 16); emitBytes(arg >> 8, arg); } } while (0)

Definition at line 300 of file compiler.c.

◆ WRITE

#define WRITE (   s)
Value:
do { \
size_t len = strlen(s); \
if (writer - len < space) goto _exit; \
writer -= len; \
memcpy(writer, s, len); \
} while (0)

Typedef Documentation

◆ ChunkRecorder

typedef struct ChunkRecorder ChunkRecorder

Bytecode emitter backtracking breadcrumb.

Records the state of the bytecode emitter so it may be rewound when an expression needs to be re-parsed. Allows us to implement backtracking to compile the inner expression of a comprehension, the left hand side of a ternary, or the targets list of a complex assignment.

We rewind the bytecode, line mapping, and constants table, so that we don't keep around duplicate constants or debug info.

◆ ClassCompiler

typedef struct ClassCompiler ClassCompiler

Class compilation context.

Allows for things like super to be bound correctly. Also allows us to establish qualified names for functions and nested class definitions.

◆ Compiler

typedef struct Compiler Compiler

Subcompiler state.

Each function is compiled in its own context, with its own codeobject, locals, type, scopes, etc.

◆ ParseFn

typedef void(* ParseFn) (struct GlobalState *, int, struct RewindState *)

Subexpression parser function.

Used by the parse rule table for infix and prefix expression parser functions. The argument passed is the ExpressionType to compile the expression as.

Definition at line 116 of file compiler.c.

◆ RewindState

typedef struct RewindState RewindState

Compiler emit and parse state prior to this expression.

Used to rewind the parser for ternary and comma expressions.

Enumeration Type Documentation

◆ ExpressionType

Expression type.

Determines how an expression should be compiled.

Enumerator
EXPR_NORMAL 

This expression can not be an assignment target.

EXPR_CAN_ASSIGN 

This expression may be an assignment target, check for assignment operators at the end.

EXPR_ASSIGN_TARGET 

This expression is definitely an assignment target or chained to one.

EXPR_DEL_TARGET 

This expression is in the target list of a 'del' statement.

EXPR_METHOD_CALL 

This expression is the parameter list of a method call; only used by dot and call

Definition at line 97 of file compiler.c.

◆ FunctionType

Function compilation type.

Determines the context of the function being compiled, as different kinds of functions have different semantics.

Enumerator
TYPE_FUNCTION 

Normal 'def' function.

TYPE_MODULE 

Top level of a script.

TYPE_METHOD 

Class method with self binding.

TYPE_INIT 

Class __init__

TYPE_LAMBDA 

Lambda expression body, must be a single expression.

TYPE_STATIC 

Static class method, no self binding.

TYPE_CLASS 

Class body, not a normal series of declarations.

TYPE_CLASSMETHOD 

Class method, binds first argument to the class.

TYPE_COROUTINE 

await def function.

TYPE_COROUTINE_METHOD 

await def class method.

Definition at line 160 of file compiler.c.

◆ Precedence

enum Precedence

Parse precedence ladder.

Lower values (values listed first) bind more loosely than higher values (values listed later).

Enumerator
PREC_ASSIGNMENT 

=

PREC_COMMA 

,

PREC_MUST_ASSIGN 

Multple assignment target

PREC_CAN_ASSIGN 

Single assignment target, inside parens

PREC_DEL_TARGET 

Like above, but del target list

PREC_TERNARY 

TrueBranch if Condition else FalseBranch

PREC_OR 

or

PREC_AND 

and

PREC_NOT 

not

PREC_COMPARISON 

< > <= >= in not in

PREC_BITOR 

|

PREC_BITXOR 

^

PREC_BITAND 

&

PREC_SHIFT 

<< >>

PREC_SUM 

+ -

PREC_TERM 

* / %

PREC_FACTOR 

+ - ~ !

PREC_EXPONENT 

**

PREC_PRIMARY 

. () []

Definition at line 69 of file compiler.c.

Function Documentation

◆ krk_compile()

KrkCodeObject* krk_compile ( const char *  src,
const char *  fileName 
)

Compile a source string to bytecode.

Compile a string to a code object.

Parses source code and compiles it to bytecode.

Raises SyntaxError on error and returns NULL if the source failed to compile.

Parameters
srcCode string to compile. Should be a module body or repl line.
fileNameUsed to describe where the source string came from in error messages.
Returns
A compiled code object, or NULL on error.
Exceptions
SyntaxErrorif src could not be compiled.

Definition at line 4129 of file compiler.c.

Variable Documentation

◆ krk_parseRules

ParseRule krk_parseRules[]

Parse rules table.

Each parse rule consists of a token, an prefix rule, an infix rule, and a parse precedence. We also stringify the token name for use in debugging and error messages. The rule table is indexed by token types and is constructed with a designated initializer, so the order in this file is not important; it's probably best to order either visually or syntactically related elements together.

Definition at line 4189 of file compiler.c.