Code Labs 18 min read

Crafting Interpreters: A Runnable, Semantics-First Cheat Sheet (Python)

Notebook companion to Crafting Interpreters. Chapters are distilled into a core semantic idea and presented via a runnable Python micro-example. Sections conclude with an end-to-end tree-walk and bytecode VM executions of the same tiny program.


Crafting Interpreters Cheat Sheet

A notebook companion for Crafting Interpreters. Each chapter is self-contained, with a definition cell and a runnable cell right below it.

This is a conceptual companion, not a line-by-line reproduction of the book’s implementations. Some chapters use smaller substitute mechanisms when that keeps the core idea cleaner in notebook form. For example, where the book uses a Pratt parser for expression parsing, this notebook may use a smaller hand-written parser that demonstrates precedence and associativity directly without reproducing the exact architecture.

Download & run locally: crafting-interpreters-cheat-sheet.ipynb

Notebook usage

  • Run cells top to bottom.
  • Each chapter appears as implementation -> example.
  • The notebook targets Python 3.10+ because several examples use match / case.

Chapter 4. Scanning

Lexing turns characters into tokens so later phases can reason about symbols.

Note: the book implements a manual scanner with explicit state and lookahead; this version uses a compact regex-based tokenizer to illustrate tokenization.

def ch4_scanning() -> None:
    import re
    token = re.compile(r"(?P<NUM>\d+)|(?P<ID>[A-Za-z_]\w*)|(?P<OP>[=+;])|(?P<WS>\s+)")
    def scan(s: str): return [(m.lastgroup, m.group()) for m in token.finditer(s) if m.lastgroup != "WS"]
    print(scan("var x=1+2;"))
ch4_scanning()
[('ID', 'var'), ('ID', 'x'), ('OP', '='), ('NUM', '1'), ('OP', '+'), ('NUM', '2'), ('OP', ';')]

Chapter 5. Representing Code

ASTs turn code into data, which makes evaluation a tree walk.

def ch5_ast() -> None:
    def ev(e):
        match e:
            case ("lit", v): return v
            case ("+", a, b): return ev(a) + ev(b)
            case _: raise ValueError(e)
    expr = ("+", ("lit", 1), ("+", ("lit", 2), ("lit", 3)))
    print(ev(expr))
ch5_ast()
6

Chapter 6. Parsing Expressions

Parsing chooses structure and precedence.

Note: Crafting Interpreters uses a Pratt parser here; this notebook instead uses a tiny recursive-descent precedence parser so the shape of parsing remains explicit in a smaller cell.

def ch6_structure() -> None:
    import re

    token_re = re.compile(r"\d+|[()+*]")
    toks = token_re.findall("1+2*3+(4*5)")
    i = 0

    def peek():
        return toks[i] if i < len(toks) else None

    def eat(tok=None):
        nonlocal i
        t = peek()
        if tok is not None and t != tok:
            raise SyntaxError((tok, t))
        i += 1
        return t

    def primary():
        t = peek()
        if t is None:
            raise SyntaxError("unexpected EOF")
        if t.isdigit():
            eat()
            return ("lit", int(t))
        if t == "(":
            eat("(")
            e = expr()
            eat(")")
            return e
        raise SyntaxError(t)

    def term():
        e = primary()
        while peek() == "*":
            eat("*")
            e = ("*", e, primary())
        return e

    def expr():
        e = term()
        while peek() == "+":
            eat("+")
            e = ("+", e, term())
        return e

    ast = expr()
    if peek() is not None:
        raise SyntaxError(f"trailing token {peek()}")

    def ev(e):
        match e:
            case ("lit", v): return v
            case ("+", a, b): return ev(a) + ev(b)
            case ("*", a, b): return ev(a) * ev(b)
            case _: raise ValueError(e)

    print(toks)
    print(ast)
    print(ev(ast))
ch6_structure()
['1', '+', '2', '*', '3', '+', '(', '4', '*', '5', ')']
('+', ('+', ('lit', 1), ('*', ('lit', 2), ('lit', 3))), ('*', ('lit', 4), ('lit', 5)))
27

Chapter 7. Evaluating Expressions

Evaluation is runtime meaning with dynamic checks.

def ch7_eval_semantics() -> None:
    def ev(e):
        match e:
            case ("lit", v): return v
            case ("+", a, b):
                x, y = ev(a), ev(b)
                if type(x) != type(y): raise TypeError("no mixed +")
                return x + y
            case _: raise ValueError(e)
    print(ev(("+", ("lit", 1), ("lit", 2))))
    try: ev(("+", ("lit", 1), ("lit", "s")))
    except Exception as ex: print(type(ex).__name__, str(ex))
ch7_eval_semantics()
3
TypeError no mixed +

Chapter 8. Statements and State

Statements introduce mutation through environments.

Note: this uses a single flat environment; the book introduces nested environments aligned with block structure.

def ch8_state() -> None:
    env: dict[str, object] = {}
    def eval_(e):
        match e:
            case ("lit", v): return v
            case ("var", n): return env[n]
            case _: raise ValueError(e)
    def exec_(s):
        match s:
            case ("var", n, e): env[n] = eval_(e)
            case ("set", n, e): env[n] = eval_(e)
            case _: raise ValueError(s)
    exec_(("var", "x", ("lit", 0)))
    exec_(("set", "x", ("lit", 3)))
    print(env)
ch8_state()
{'x': 3}

Chapter 9. Control Flow

Control flow is who runs next.

def ch9_control_flow() -> None:
    prog = [("inc", "x"), ("jnlt", "x", 3, 0)]  # jump to 0 while x < 3
    env, pc = {"x": 0}, 0
    while pc < len(prog):
        op, *a = prog[pc]
        if op == "inc":
            env[a[0]] += 1
            pc += 1
        elif op == "jnlt":
            name, bound, target = a
            pc = target if env[name] < bound else pc + 1
        else:
            raise ValueError(op)
    print(env["x"])
ch9_control_flow()
3

Chapter 9. Syntactic Sugar

Surface syntax often desugars into a smaller core.

Note: this models control flow with explicit jumps; I think the book first implements structured control flow in the AST interpreter before lowering.

def ch9_sugar() -> None:
    def desugar(stmt):
        match stmt:
            case ("for", init, cond, step, body):
                return [init, ("while", cond, body + [step])]
            case _:
                return [stmt]

    def run(block):
        env = {}

        def ev(e):
            match e:
                case ("lit", v): return v
                case ("var", n): return env[n]
                case ("+", a, b): return ev(a) + ev(b)
                case ("<", a, b): return ev(a) < ev(b)
                case _: raise ValueError(e)

        def exec_block(xs):
            for s in xs:
                exec_stmt(s)

        def exec_stmt(s):
            match s:
                case ("var", n, e): env[n] = ev(e)
                case ("set", n, e): env[n] = ev(e)
                case ("print", e): print(ev(e))
                case ("while", c, b):
                    while ev(c):
                        exec_block(b)
                case _: raise ValueError(s)

        exec_block(block)

    sugared = (
        "for",
        ("var", "i", ("lit", 0)),
        ("<", ("var", "i"), ("lit", 3)),
        ("set", "i", ("+", ("var", "i"), ("lit", 1))),
        [("print", ("var", "i"))],
    )

    core = desugar(sugared)
    print(core)
    run(core)
ch9_sugar()
[('var', 'i', ('lit', 0)), ('while', ('<', ('var', 'i'), ('lit', 3)), [('print', ('var', 'i')), ('set', 'i', ('+', ('var', 'i'), ('lit', 1)))])]
0
1
2

Chapter 10. Functions

Functions are first-class values and closures capture scope.

def ch10_functions() -> None:
    class Env(dict):
        def __init__(self, parent=None):
            super().__init__()
            self.parent = parent

        def get(self, name):
            if name in self:
                return self[name]
            if self.parent is not None:
                return self.parent.get(name)
            raise NameError(name)

    def ev(e, env):
        match e:
            case ("lit", v):
                return v
            case ("var", n):
                return env.get(n)
            case ("+", a, b):
                return ev(a, env) + ev(b, env)
            case ("fun", params, body):
                return ("closure", params, body, env)
            case ("call", fn_expr, arg_exprs):
                tag, params, body, closed = ev(fn_expr, env)
                call_env = Env(closed)
                for p, a in zip(params, arg_exprs):
                    call_env[p] = ev(a, env)
                return ev(body, call_env)
            case _:
                raise ValueError(e)

    env = Env()
    env["makeAdder"] = ev(
        ("fun", ["n"], ("fun", ["x"], ("+", ("var", "x"), ("var", "n")))),
        env,
    )
    env["add5"] = ev(("call", ("var", "makeAdder"), [("lit", 5)]), env)
    print(env["add5"][0], ev(("call", ("var", "add5"), [("lit", 2)]), env))
ch10_functions()
closure 7

Chapter 11. Resolving and Binding

Lexical scope can be resolved ahead of time.

Note: this is a minimal resolver sketch; the book performs a full pre-pass over the AST

def ch11_resolution() -> None:
    scopes: list[set[str]] = []

    def resolve_expr(e):
        match e:
            case ("lit", _):
                return e
            case ("var", name):
                for dist, scope in enumerate(reversed(scopes)):
                    if name in scope:
                        return ("var_at", dist, name)
                return ("global", name)
            case ("+", a, b):
                return ("+", resolve_expr(a), resolve_expr(b))
            case _:
                raise ValueError(e)

    def resolve_stmt(s):
        match s:
            case ("var", name, init):
                out = ("var", name, resolve_expr(init))
                if scopes:
                    scopes[-1].add(name)
                return out
            case ("print", e):
                return ("print", resolve_expr(e))
            case ("block", body):
                scopes.append(set())
                out = ("block", [resolve_stmt(x) for x in body])
                scopes.pop()
                return out
            case _:
                raise ValueError(s)

    prog = ("block", [
        ("var", "x", ("lit", "global")),
        ("block", [
            ("var", "x", ("lit", "local")),
            ("print", ("var", "x")),
        ]),
        ("print", ("var", "x")),
    ])

    resolved = resolve_stmt(prog)
    print(resolved)
ch11_resolution()
('block', [('var', 'x', ('lit', 'global')), ('block', [('var', 'x', ('lit', 'local')), ('print', ('var_at', 0, 'x'))]), ('print', ('var_at', 0, 'x'))])

Chapter 12. Classes

Classes package state and behavior into objects.

def ch12_classes() -> None:
    def lookup_method(cls, name):
        methods = cls["methods"]
        if name in methods:
            return methods[name]
        raise KeyError(name)

    def bind(method, this):
        return lambda *args: method(this, *args)

    def new(cls, *args):
        obj = {"class": cls, "fields": {}}
        init = cls["methods"].get("init")
        if init is not None:
            bind(init, obj)(*args)
        return obj

    def getprop(obj, name):
        if name in obj["fields"]:
            return obj["fields"][name]
        return bind(lookup_method(obj["class"], name), obj)

    Point = {
        "name": "Point",
        "methods": {
            "init": lambda this, x, y: this["fields"].update({"x": x, "y": y}),
            "norm2": lambda this: this["fields"]["x"] ** 2 + this["fields"]["y"] ** 2,
        },
    }

    p = new(Point, 3, 4)
    print(getprop(p, "norm2")())
ch12_classes()
25

Chapter 12. Prototypes

Prototype delegation is object lookup by chain walk.

def ch12_prototypes() -> None:
    proto = {"to_s": lambda self: f"{self['x']},{self['y']}"}
    obj = {"__proto__": proto, "x": 1, "y": 2}
    def get(o, k): return o[k] if k in o else get(o["__proto__"], k)
    print(get(obj, "to_s")(obj))
ch12_prototypes()
1,2

Chapter 13. Inheritance

Inheritance is method lookup with reuse.

def ch13_inheritance() -> None:
    def lookup_method(cls, name):
        if name in cls["methods"]:
            return cls["methods"][name]
        if cls["super"] is not None:
            return lookup_method(cls["super"], name)
        raise KeyError(name)

    def bind(method, this):
        return lambda *args: method(this, *args)

    def new(cls):
        return {"class": cls, "fields": {}}

    def getprop(obj, name):
        return bind(lookup_method(obj["class"], name), obj)

    A = {
        "name": "A",
        "super": None,
        "methods": {
            "m": lambda this: "A",
        },
    }

    B = {
        "name": "B",
        "super": A,
        "methods": {
            "m": lambda this: "B->" + lookup_method(B["super"], "m")(this),
        },
    }

    b = new(B)
    print(getprop(b, "m")())
ch13_inheritance()
B->A

End-to-End Shared Program

A single tiny AST program is used by both execution models below. The surface language is intentionally small: +, <, if, while, print, vars, and assignment.

# Mini language AST
# Expr: ("lit", v) | ("var", name) | ("+", a, b) | ("<", a, b)
# Stmt: ("var", name, expr) | ("set", name, expr) | ("print", expr)
#       | ("if", cond, then_block, else_block) | ("while", cond, body_block)

DEMO_PROG = [
    ("var", "i", ("lit", 0)),
    ("var", "acc", ("lit", 0)),
    ("while", ("<", ("var", "i"), ("lit", 5)), [
        ("set", "acc", ("+", ("var", "acc"), ("var", "i"))),
        ("set", "i", ("+", ("var", "i"), ("lit", 1))),
    ]),
    ("if", ("<", ("var", "acc"), ("lit", 10)),
        [("print", ("var", "acc"))],
        [("print", ("+", ("var", "acc"), ("lit", 100)))],
    ),
]

End-to-End After II.A

Run the shared AST program through a tiny tree-walk interpreter. This is the semantic target for the tree-walk half of the book.

def run_treewalk(prog):
    env = {}

    def ev(e):
        match e:
            case ("lit", v): return v
            case ("var", n): return env[n]
            case ("+", a, b):
                x, y = ev(a), ev(b)
                if type(x) != type(y): raise TypeError("no mixed +")
                return x + y
            case ("<", a, b):
                x, y = ev(a), ev(b)
                return x < y
            case _: raise ValueError(e)

    def exec_block(block):
        for s in block: exec_stmt(s)

    def exec_stmt(s):
        match s:
            case ("var", n, e): env[n] = ev(e)
            case ("set", n, e): env[n] = ev(e)
            case ("print", e): print(ev(e))
            case ("if", c, t, f): exec_block(t if ev(c) else f)
            case ("while", c, b):
                while ev(c): exec_block(b)
            case _: raise ValueError(s)

    exec_block(prog)

run_treewalk(DEMO_PROG)  # expected: 110
110

III.A Bytecode Virtual Machine

Chapter 14. Chunks of Bytecode

Bytecode separates instructions from literals.

def ch14_chunks() -> None:
    const = [1, 2]
    code = [("CONST", 0), ("CONST", 1), ("ADD",), ("RET",)]
    for i, ins in enumerate(code):
        op, *a = ins
        print(i, op, const[a[0]] if op == "CONST" else "")
ch14_chunks()
0 CONST 1
1 CONST 2
2 ADD 
3 RET 

Chapter 15. A Virtual Machine

A VM gives semantics by a stack execution model.

def ch15_vm() -> None:
    const = [1, 2]
    code = [("CONST", 0), ("CONST", 1), ("ADD",), ("RET",)]
    stack, ip = [], 0
    while True:
        op, *a = code[ip]; ip += 1
        if op == "CONST": stack.append(const[a[0]])
        elif op == "ADD": stack.append(stack.pop() + stack.pop())
        elif op == "RET": print(stack.pop()); break
        else: raise ValueError(op)
ch15_vm()
3

Chapter 15. Register Sketch

Register code makes dataflow explicit.

def ch15_register_bytecode() -> None:
    const = [1, 2]; r = [0, 0]
    r[0] = const[0]; r[1] = const[1]
    r[0] = r[0] + r[1]
    print(r[0])
ch15_register_bytecode()
3

Chapter 16. Scanning on Demand

Lazy lexing produces tokens only when needed.

def ch16_lazy_scan() -> None:
    import re
    pat = re.compile(r"\d+|[()+*]")
    def scan(s: str):
        for m in pat.finditer(s): yield m.group()
    it = scan("1+(2*3)")
    print(next(it), list(it))
ch16_lazy_scan()
1 ['+', '(', '2', '*', '3', ')']

Chapter 17. Compiling Expressions

Compilation lowers ASTs into bytecode.

def ch17_compile() -> None:
    def compile_(e, const, code):
        match e:
            case ("lit", v):
                const.append(v); code.append(("CONST", len(const) - 1))
            case ("+", a, b):
                compile_(a, const, code); compile_(b, const, code); code.append(("ADD",))
            case _: raise ValueError(e)
    const, code = [], []
    compile_(("+", ("lit", 1), ("lit", 2)), const, code); code.append(("RET",))
    print(const, code)
ch17_compile()
[1, 2] [('CONST', 0), ('CONST', 1), ('ADD',), ('RET',)]

Chapter 18. Types of Values

Tagged values make operations precise.

def ch18_tagged_values() -> None:
    def add(a, b):
        ta, va = a; tb, vb = b
        if ta != tb: raise TypeError("type mismatch")
        return (ta, va + vb)
    print(add(("num", 1), ("num", 2)))
ch18_tagged_values()
('num', 3)

Chapter 19. Strings

Interning canonicalizes equal strings.

def ch19_interning() -> None:
    pool: dict[str, str] = {}
    def intern(s: str) -> str: pool.setdefault(s, s); return pool[s]
    a = intern("hi" * 1); b = intern("h" + "i")
    print(a is b, a == b, len(pool))
ch19_interning()
True True 1

Chapter 20. Hash Tables

Hash tables trade simple lookup for collision handling.

def ch20_hashtable() -> None:
    def hput(t, k, v):
        i = hash(k) % len(t)
        while t[i] and t[i][0] != k: i = (i + 1) % len(t)
        t[i] = (k, v)
    def hget(t, k):
        i = hash(k) % len(t)
        while t[i] and t[i][0] != k: i = (i + 1) % len(t)
        return None if not t[i] else t[i][1]
    t = [None] * 8; hput(t, "a", 1); hput(t, "b", 2); print(hget(t, "b"))
ch20_hashtable()
2

Chapter 21. Global Variables

Globals are shared mutable name/value storage.

def ch21_globals() -> None:
    globals_: dict[str, int] = {}
    def setg(k: str, v: int) -> None: globals_[k] = v
    def getg(k: str) -> int: return globals_[k]
    setg("x", 1); setg("x", getg("x") + 2)
    print(getg("x"))
ch21_globals()
3

Chapter 22. Local Variables

Locals are usually fast stack slots.

def ch22_locals() -> None:
    stack = [None] * 4
    def fn(a: int, b: int) -> int:
        base = 0
        stack[base], stack[base + 1] = a, b
        stack[base + 2] = stack[base] + stack[base + 1]
        return stack[base + 2]
    print(fn(1, 2))
ch22_locals()
3

Chapter 23. Jumping Back and Forth

Jumps are control flow as data with patching.

def ch23_patch_jumps() -> None:
    code = [("JMP_IF_FALSE", None), ("THEN",), ("JMP", None), ("ELSE",), ("HALT",)]
    code[0] = ("JMP_IF_FALSE", 3)
    code[2] = ("JMP", 4)
    print(code)
ch23_patch_jumps()
[('JMP_IF_FALSE', 3), ('THEN',), ('JMP', 4), ('ELSE',), ('HALT',)]

Chapter 24. Calls and Functions

Calls require frames and return addresses.

def ch24_calls_frames() -> None:
    prog = [("CALL", "f"), ("PRINT",), ("HALT",)]
    funcs = {"f": [("PUSH", 3), ("RET",)]}
    stack, call, pc, code = [], [], 0, prog
    while True:
        op, *a = code[pc]; pc += 1
        if op == "CALL": call.append((code, pc)); code, pc = funcs[a[0]], 0
        elif op == "PUSH": stack.append(a[0])
        elif op == "RET": code, pc = call.pop()
        elif op == "PRINT": print(stack.pop())
        elif op == "HALT": break
        else: raise ValueError(op)
ch24_calls_frames()
3

Chapter 25. Closures

Closures extend the lifetime of captured locals.

def ch25_closures() -> None:
    class Cell:
        def __init__(self, value):
            self.value = value

    class Env(dict):
        def __init__(self, parent=None):
            super().__init__()
            self.parent = parent

        def lookup_binding(self, name):
            if name in self:
                return self[name]
            if self.parent is not None:
                return self.parent.lookup_binding(name)
            raise NameError(name)

    def read(binding):
        return binding.value if isinstance(binding, Cell) else binding

    def ev(e, env):
        match e:
            case ("lit", v):
                return v
            case ("var", n):
                return read(env.lookup_binding(n))
            case ("set", n, rhs):
                binding = env.lookup_binding(n)
                if not isinstance(binding, Cell):
                    raise TypeError(f"{n} is not assignable")
                binding.value = ev(rhs, env)
                return binding.value
            case ("+", a, b):
                return ev(a, env) + ev(b, env)
            case ("fun", params, body):
                return ("closure", params, body, env)
            case ("call", fn, args):
                tag, params, body, closed = ev(fn, env)
                call_env = Env(closed)
                for p, a in zip(params, args):
                    call_env[p] = Cell(ev(a, env))
                return ev(body, call_env)
            case ("seq", xs):
                v = None
                for x in xs:
                    v = ev(x, env)
                return v
            case _:
                raise ValueError(e)

    def make_counter():
        activation = Env()
        activation["n"] = Cell(0)
        return ev(
            ("fun", [], ("seq", [
                ("set", "n", ("+", ("var", "n"), ("lit", 1))),
                ("var", "n"),
            ])),
            activation,
        )

    env = Env()
    env["c"] = make_counter()
    print(ev(("call", ("var", "c"), []), env), ev(("call", ("var", "c"), []), env))
ch25_closures()
1 2

Chapter 25. Loop Variable Capture

Loop captures need snapshotting when you want value capture.

def ch25_loop_var() -> None:
    class Cell:
        def __init__(self, value):
            self.value = value

    shared = Cell(0)
    late = []
    for i in range(3):
        shared.value = i
        late.append(lambda cell=shared: cell.value)

    snap = []
    for i in range(3):
        cell = Cell(i)
        snap.append(lambda cell=cell: cell.value)

    print([f() for f in late], [f() for f in snap])
ch25_loop_var()
[2, 2, 2] [0, 1, 2]

Chapter 26. Garbage Collection

Tracing GC defines reachability semantics.

def ch26_gc_tracing() -> None:
    heap, roots = {1: [2, 3], 2: [], 3: [2], 4: []}, [1]
    marked: set[int] = set()
    def mark(o: int) -> None:
        if o in marked: return
        marked.add(o)
        for r in heap.get(o, []): mark(r)
    for r in roots: mark(r)
    for o in list(heap):
        if o not in marked: del heap[o]
    print(sorted(heap))
ch26_gc_tracing()
[1, 2, 3]

Chapter 27. Classes and Instances

Instances carry fields and a pointer to behavior.

def ch27_instances() -> None:
    def new(cls):
        return {"class": cls, "fields": {}}

    Point = {
        "name": "Point",
        "super": None,
        "methods": {
            "norm2": lambda this: this["fields"]["x"] ** 2 + this["fields"]["y"] ** 2,
        },
    }

    p = new(Point)
    p["fields"]["x"] = 3
    p["fields"]["y"] = 4
    print(p["class"]["methods"]["norm2"](p))
ch27_instances()
25

Chapter 28. Methods and Initializers

Initializers and binding shape object construction.

def ch28_init_bind() -> None:
    def bind(method, this):
        return lambda *args: method(this, *args)

    def new(cls, *args):
        obj = {"class": cls, "fields": {}}
        init = cls["methods"].get("init")
        if init is not None:
            bind(init, obj)(*args)
        return obj

    def getprop(obj, name):
        if name in obj["fields"]:
            return obj["fields"][name]
        return bind(obj["class"]["methods"][name], obj)

    C = {
        "name": "C",
        "super": None,
        "methods": {
            "init": lambda this, x: this["fields"].update({"x": x}),
            "get": lambda this: this["fields"]["x"],
        },
    }

    o = new(C, 7)
    print(getprop(o, "get")())
ch28_init_bind()
7

Chapter 29. Superclasses

super starts lookup one link up the chain.

def ch29_super_lookup() -> None:
    def lookup_method(cls, name):
        if name in cls["methods"]:
            return cls["methods"][name]
        if cls["super"] is not None:
            return lookup_method(cls["super"], name)
        raise KeyError(name)

    def bind(method, this):
        return lambda *args: method(this, *args)

    def new(cls):
        return {"class": cls, "fields": {}}

    A = {
        "name": "A",
        "super": None,
        "methods": {
            "m": lambda this: "A",
        },
    }

    B = {
        "name": "B",
        "super": A,
        "methods": {
            "m": lambda this: "B->" + lookup_method(B["super"], "m")(this),
        },
    }

    b = new(B)
    print(bind(lookup_method(B, "m"), b)())
ch29_super_lookup()
B->A

Chapter 30. Optimization

Optimizations are semantics-preserving rewrites.

def ch30_optimize_fold() -> None:
    def ev(e):
        match e:
            case ("lit", v): return v
            case ("+", a, b): return ev(a) + ev(b)
            case ("*", a, b): return ev(a) * ev(b)
            case _: raise ValueError(e)

    def fold(e):
        match e:
            case ("+", a, b):
                a, b = fold(a), fold(b)
                return ("lit", ev(("+", a, b))) if a[0] == b[0] == "lit" else ("+", a, b)
            case ("*", a, b):
                a, b = fold(a), fold(b)
                return ("lit", ev(("*", a, b))) if a[0] == b[0] == "lit" else ("*", a, b)
            case _:
                return e

    expr = ("+", ("lit", 1), ("*", ("lit", 2), ("lit", 3)))
    folded = fold(expr)
    print(expr, "=>", folded, ev(expr), ev(folded))
ch30_optimize_fold()
('+', ('lit', 1), ('*', ('lit', 2), ('lit', 3))) => ('lit', 7) 7 7

End-to-End After III.A

Compile the same AST program to bytecode and run it on a tiny stack VM. This keeps the same semantic target but moves the execution model into the VM world.

# Bytecode ops: CONST k, GET slot, SET slot, ADD, LT, JIF_FALSE addr, JMP addr, PRINT, HALT

def compile_to_bytecode(prog):
    consts = []
    code = []
    slots = {}  # name -> int

    def slot(name):
        if name not in slots: slots[name] = len(slots)
        return slots[name]

    def emit(*ins): code.append(tuple(ins)); return len(code) - 1
    def patch(at, target):
        op, _ = code[at]
        code[at] = (op, target)

    def cconst(v):
        consts.append(v)
        emit("CONST", len(consts) - 1)

    def cexpr(e):
        match e:
            case ("lit", v): cconst(v)
            case ("var", n): emit("GET", slot(n))
            case ("+", a, b): cexpr(a); cexpr(b); emit("ADD")
            case ("<", a, b): cexpr(a); cexpr(b); emit("LT")
            case _: raise ValueError(e)

    def cblock(block):
        for s in block: cstmt(s)

    def cstmt(s):
        match s:
            case ("var", n, e):
                cexpr(e); emit("SET", slot(n))
            case ("set", n, e):
                cexpr(e); emit("SET", slot(n))
            case ("print", e):
                cexpr(e); emit("PRINT")
            case ("if", c, t, f):
                cexpr(c)
                jif = emit("JIF_FALSE", None)
                cblock(t)
                jmp = emit("JMP", None)
                patch(jif, len(code))
                cblock(f)
                patch(jmp, len(code))
            case ("while", c, b):
                loop_start = len(code)
                cexpr(c)
                jif = emit("JIF_FALSE", None)
                cblock(b)
                emit("JMP", loop_start)
                patch(jif, len(code))
            case _: raise ValueError(s)

    cblock(prog)
    emit("HALT")
    return consts, code, slots

def run_vm(consts, code, slots):
    stack = []
    globals_ = [None] * len(slots)
    ip = 0

    while True:
        op, *a = code[ip]; ip += 1
        if op == "CONST":
            stack.append(consts[a[0]])
        elif op == "GET":
            stack.append(globals_[a[0]])
        elif op == "SET":
            globals_[a[0]] = stack.pop()
        elif op == "ADD":
            b = stack.pop(); a0 = stack.pop()
            if type(a0) != type(b): raise TypeError("no mixed +")
            stack.append(a0 + b)
        elif op == "LT":
            b = stack.pop(); a0 = stack.pop()
            stack.append(a0 < b)
        elif op == "JIF_FALSE":
            cond = stack.pop()
            if not cond: ip = a[0]
        elif op == "JMP":
            ip = a[0]
        elif op == "PRINT":
            print(stack.pop())
        elif op == "HALT":
            return
        else:
            raise ValueError(op)

consts, code, slots = compile_to_bytecode(DEMO_PROG)
run_vm(consts, code, slots)  # expected: 110
110

Stay in the Loop

Get notified when I publish new writing. This block follows the Code Labs stream.