apfl/src/bytecode.c

#include <assert.h>

#include "apfl.h"

#include "alloc.h"
#include "bytecode.h"
#include "encode.h"
#include "format.h"
#include "gc.h"
#include "hashmap.h"
#include "resizable.h"
#include "strings.h"

#define BYTECODE_VERSION 0

enum instruction_argument_type {
    INSN_ARGS_NONE,
    INSN_ARGS_NUMBER,
    INSN_ARGS_COUNT,
    INSN_ARGS_INDEX,
    INSN_ARGS_STRING,
    INSN_ARGS_BODY,
    INSN_ARGS_MATCHER,
};

enum matcher_instruction_argument_type {
    MINSN_ARGS_NONE,
    MINSN_ARGS_INDEX,
    MINSN_ARGS_NAME,
    MINSN_ARGS_NAME_INDEX_LEN,
};

static enum instruction_argument_type
argument_type_for_instruction(enum instruction insn)
{
    switch (insn) {
    case INSN_NIL:
    case INSN_TRUE:
    case INSN_FALSE:
    case INSN_LIST_APPEND:
    case INSN_LIST_EXPAND_INTO:
    case INSN_DICT:
    case INSN_DICT_APPEND_KVPAIR:
    case INSN_GET_MEMBER:
    case INSN_NEXT_LINE:
    case INSN_DROP:
    case INSN_DUP:
    case INSN_CALL:
    case INSN_MATCHER_MUST_MATCH:
    case INSN_BUILD_PAIR:
        return INSN_ARGS_NONE;
    case INSN_NUMBER:
        return INSN_ARGS_NUMBER;
    case INSN_LIST:
    case INSN_SET_LINE:
    case INSN_FUNC:
        return INSN_ARGS_COUNT;
    case INSN_GET_BY_INDEX_KEEP:
    case INSN_MATCHER_SET_VAL:
        return INSN_ARGS_INDEX;
    case INSN_STRING:
    case INSN_VAR_GET:
    case INSN_VAR_SET:
    case INSN_VAR_SET_LOCAL:
    case INSN_VAR_NEW:
    case INSN_VAR_NEW_LOCAL:
    case INSN_MOVE_TO_LOCAL_VAR:
    case INSN_FUNC_SET_NAME:
        return INSN_ARGS_STRING;
    case INSN_FUNC_ADD_SUBFUNC:
    case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
        return INSN_ARGS_BODY;
    case INSN_MATCHER_PUSH:
        return INSN_ARGS_MATCHER;
    }

    assert(false);
    return INSN_ARGS_NONE;
}


static enum matcher_instruction_argument_type
matcher_argument_type_for_instruction(enum matcher_instruction insn)
{
    switch (insn) {
    case MATCHER_IGNORE:
    case MATCHER_ENTER_LIST:
    case MATCHER_LEAVE_LIST:
    case MATCHER_CONTINUE_FROM_END:
    case MATCHER_REMAINDING:
    case MATCHER_UNPACK_PAIR:
        return MINSN_ARGS_NONE;
    case MATCHER_CHECK_CONST: // with index as values index
    case MATCHER_CHECK_PRED:  // with index as values index
        return MINSN_ARGS_INDEX;
    case MATCHER_CAPTURE_TO_VAR: // with name
    case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
        return MINSN_ARGS_NAME;
    case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
    case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
        return MINSN_ARGS_NAME_INDEX_LEN;
    }

    assert(false);
    return MINSN_ARGS_NONE;
}

static bool
valid_matcher_instruction(enum matcher_instruction insn)
{
    switch (insn) {
    case MATCHER_IGNORE:
    case MATCHER_ENTER_LIST:
    case MATCHER_LEAVE_LIST:
    case MATCHER_CONTINUE_FROM_END:
    case MATCHER_REMAINDING:
    case MATCHER_CHECK_CONST:
    case MATCHER_CHECK_PRED:
    case MATCHER_CAPTURE_TO_VAR:
    case MATCHER_CAPTURE_TO_VAR_LOCAL:
    case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
    case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
    case MATCHER_UNPACK_PAIR:
        return true;
    }
    return false;
}

static bool
valid_instruction(enum instruction insn)
{
    switch (insn) {
    case INSN_NIL:
    case INSN_TRUE:
    case INSN_FALSE:
    case INSN_LIST_APPEND:
    case INSN_LIST_EXPAND_INTO:
    case INSN_DICT:
    case INSN_DICT_APPEND_KVPAIR:
    case INSN_GET_MEMBER:
    case INSN_NEXT_LINE:
    case INSN_DROP:
    case INSN_DUP:
    case INSN_CALL:
    case INSN_MATCHER_MUST_MATCH:
    case INSN_NUMBER:
    case INSN_LIST:
    case INSN_SET_LINE:
    case INSN_FUNC:
    case INSN_GET_BY_INDEX_KEEP:
    case INSN_MATCHER_SET_VAL:
    case INSN_STRING:
    case INSN_VAR_GET:
    case INSN_VAR_SET:
    case INSN_VAR_SET_LOCAL:
    case INSN_VAR_NEW:
    case INSN_VAR_NEW_LOCAL:
    case INSN_MOVE_TO_LOCAL_VAR:
    case INSN_FUNC_SET_NAME:
    case INSN_FUNC_ADD_SUBFUNC:
    case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
    case INSN_MATCHER_PUSH:
    case INSN_BUILD_PAIR:
        return true;
    }

    return false;
}


struct instruction_list *
apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename)
{
    struct instruction_list *ilist = apfl_gc_new_instructions(gc);
    if (ilist == NULL) {
        return NULL;
    }
    *ilist = (struct instruction_list) {
        .instructions = NULL,
        .len = 0,
        .cap = 0,
        .line = line,
        .filename = filename,
    };
    return ilist;
}

void
apfl_instructions_deinit(struct apfl_allocator allocator, struct instruction_list *ilist)
{
    FREE_LIST(allocator, ilist->instructions, ilist->cap);
}

#define GET_ARGUMENT(ilist, i, arg)     \
    do {                                \
        if (i >= ilist->len) {          \
            return;                     \
        }                               \
        arg = ilist->instructions[++i]; \
    } while (0)

void
apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, void *opaque)
{
    union instruction_or_arg arg;

    if (ilist->filename != NULL) {
        cb(opaque, GC_OBJECT_FROM(ilist->filename, GC_TYPE_STRING));
    }

    for (size_t i = 0; i < ilist->len; i++) {
        switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
        case INSN_ARGS_NONE:
            break;
        case INSN_ARGS_NUMBER:
        case INSN_ARGS_COUNT:
        case INSN_ARGS_INDEX:
            i++;
            break;
        case INSN_ARGS_STRING:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            break;
        case INSN_ARGS_BODY:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS));
            break;
        case INSN_ARGS_MATCHER:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS));
            break;
        }
    }
}

void
apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, gc_visitor cb, void *opaque)
{
    union matcher_instruction_or_arg arg;

    for (size_t i = 0; i < milist->len; i++) {
        switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
        case MINSN_ARGS_NONE:
            break;
        case MINSN_ARGS_INDEX:
            i++;
            break;
        case MINSN_ARGS_NAME:
            GET_ARGUMENT(milist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            break;
        case MINSN_ARGS_NAME_INDEX_LEN:
            GET_ARGUMENT(milist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            i++;
            i++;
            break;
        }
    }
}

const char *
apfl_instruction_to_string(enum instruction insn)
{
    switch (insn) {
    case INSN_NIL:
        return "INSN_NIL";
    case INSN_TRUE:
        return "INSN_TRUE";
    case INSN_FALSE:
        return "INSN_FALSE";
    case INSN_NUMBER:
        return "INSN_NUMBER";
    case INSN_STRING:
        return "INSN_STRING";
    case INSN_LIST:
        return "INSN_LIST";
    case INSN_LIST_APPEND:
        return "INSN_LIST_APPEND";
    case INSN_LIST_EXPAND_INTO:
        return "INSN_LIST_EXPAND_INTO";
    case INSN_DICT:
        return "INSN_DICT";
    case INSN_DICT_APPEND_KVPAIR:
        return "INSN_DICT_APPEND_KVPAIR";
    case INSN_GET_MEMBER:
        return "INSN_GET_MEMBER";
    case INSN_VAR_GET:
        return "INSN_VAR_GET";
    case INSN_VAR_SET:
        return "INSN_VAR_SET";
    case INSN_VAR_SET_LOCAL:
        return "INSN_VAR_SET_LOCAL";
    case INSN_VAR_NEW:
        return "INSN_VAR_NEW";
    case INSN_VAR_NEW_LOCAL:
        return "INSN_VAR_NEW_LOCAL";
    case INSN_MOVE_TO_LOCAL_VAR:
        return "INSN_MOVE_TO_LOCAL_VAR";
    case INSN_NEXT_LINE:
        return "INSN_NEXT_LINE";
    case INSN_SET_LINE:
        return "INSN_SET_LINE";
    case INSN_GET_BY_INDEX_KEEP:
        return "INSN_GET_BY_INDEX_KEEP";
    case INSN_DROP:
        return "INSN_DROP";
    case INSN_DUP:
        return "INSN_DUP";
    case INSN_CALL:
        return "INSN_CALL";
    case INSN_FUNC:
        return "INSN_FUNC";
    case INSN_FUNC_ADD_SUBFUNC:
        return "INSN_FUNC_ADD_SUBFUNC";
    case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
        return "INSN_FUNC_ADD_SUBFUNC_ANYARGS";
    case INSN_FUNC_SET_NAME:
        return "INSN_FUNC_SET_NAME";
    case INSN_MATCHER_PUSH:
        return "INSN_MATCHER_PUSH";
    case INSN_MATCHER_SET_VAL:
        return "INSN_MATCHER_SET_VAL";
    case INSN_MATCHER_MUST_MATCH:
        return "INSN_MATCHER_MUST_MATCH";
    case INSN_BUILD_PAIR:
        return "INSN_BUILD_PAIR";
    }

    return "??";
}

const char *
apfl_matcher_instruction_to_string(enum matcher_instruction insn)
{
    switch (insn) {
    case MATCHER_IGNORE:
        return "MATCHER_IGNORE";
    case MATCHER_CAPTURE_TO_VAR:
        return "MATCHER_CAPTURE_TO_VAR";
    case MATCHER_CAPTURE_TO_VAR_LOCAL:
        return "MATCHER_CAPTURE_TO_VAR_LOCAL";
    case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
        return "MATCHER_CAPTURE_TO_VAR_WITH_PATH";
    case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
        return "MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH";
    case MATCHER_CHECK_CONST:
        return "MATCHER_CHECK_CONST";
    case MATCHER_CHECK_PRED:
        return "MATCHER_CHECK_PRED";
    case MATCHER_ENTER_LIST:
        return "MATCHER_ENTER_LIST";
    case MATCHER_LEAVE_LIST:
        return "MATCHER_LEAVE_LIST";
    case MATCHER_CONTINUE_FROM_END:
        return "MATCHER_CONTINUE_FROM_END";
    case MATCHER_REMAINDING:
        return "MATCHER_REMAINDING";
    case MATCHER_UNPACK_PAIR:
        return "MATCHER_UNPACK_PAIR";
    }

    return "??";
}

struct matcher_instruction_list *
apfl_matcher_instructions_new(struct gc *gc)
{
    struct matcher_instruction_list *milist = apfl_gc_new_matcher_instructions(gc);
    if (milist == NULL) {
        return NULL;
    }
    *milist = (struct matcher_instruction_list) {
        .instructions = NULL,
        .len = 0,
        .cap = 0,
        .capture_count = 0,
        .value_count = 0,
    };
    return milist;
}

void
apfl_matcher_instructions_deinit(struct apfl_allocator allocator, struct matcher_instruction_list *milist)
{
    FREE_LIST(allocator, milist->instructions, milist->cap);
}

#define GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg)                       \
    do {                                                              \
        if (i >= ilist->len) {                                        \
            FMT_TRY(apfl_io_write_string(w, "Bytecode corrupted")); \
            return false;                                             \
        }                                                             \
        arg = ilist->instructions[++i];                               \
    } while (0)

bool
apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist)
{
    for (size_t i = 0; i < milist->len; i++) {
        union matcher_instruction_or_arg arg;

        FMT_TRY(apfl_format_put_indent(w, indent));
        FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction)));

        switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
        case MINSN_ARGS_NONE:
            break;
        case MINSN_ARGS_NAME:
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            break;
        case MINSN_ARGS_NAME_INDEX_LEN:
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, ", "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, ", "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.len));
            break;
        case MINSN_ARGS_INDEX:
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            break;
        }

        FMT_TRY(apfl_io_write_byte(w, '\n'));
    }

    return true;
}

bool
apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist)
{
    union instruction_or_arg arg;

    for (size_t i = 0; i < ilist->len; i++) {
        FMT_TRY(apfl_format_put_indent(w, indent));
        FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction)));

        switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
        case INSN_ARGS_NONE:
            break;
        case INSN_ARGS_NUMBER:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_number(w, arg.number));
            break;
        case INSN_ARGS_COUNT:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.count));
            break;
        case INSN_ARGS_INDEX:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            break;
        case INSN_ARGS_STRING:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            break;
        case INSN_ARGS_BODY:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " ilist{\n"));
            FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body));
            FMT_TRY(apfl_format_put_indent(w, indent));
            FMT_TRY(apfl_io_write_string(w, "}"));
            break;
        case INSN_ARGS_MATCHER:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " milist{\n"));
            FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher));
            FMT_TRY(apfl_format_put_indent(w, indent));
            FMT_TRY(apfl_io_write_string(w, "}"));
            break;
        }

        FMT_TRY(apfl_io_write_byte(w, '\n'));
    }

    return true;
}

struct serializer {
    struct apfl_allocator allocator;
    struct apfl_io_writer w;
    struct apfl_hashmap string_lookup;
    size_t next_string_index;
};

struct unserializer {
    struct gc *gc;
    struct apfl_io_reader r;
    struct apfl_string **strings;
    size_t strings_len;
    size_t strings_cap;
};

static bool
serializer_strings_eq(void *opaque, const void *_a, const void *_b)
{
    (void)opaque;

    const struct apfl_string * const *a = _a;
    const struct apfl_string * const *b = _b;

    return apfl_string_eq(**a, **b);
}

static apfl_hash
serializer_strings_hash(void *opaque, const void *_key)
{
    (void)opaque;

    const struct apfl_string * const *key = _key;
    struct apfl_string_view sv = apfl_string_view_from(**key);
    return apfl_hash_fnv1a(sv.bytes, sv.len);
}

#define MAXU64 0xFFFFFFFFFFFFFFFF

static bool
serialize_size(struct apfl_io_writer w, size_t n)
{
    uintmax_t _n = n;
    if (_n > MAXU64) {
        return false;
    }

    return apfl_encode_u64(w, _n);
}

static bool
unserialize_size(struct apfl_io_reader r, size_t *n)
{
    uint_least64_t _n;
    FMT_TRY(apfl_decode_u64(r, &_n));
    if (_n > SIZE_MAX) {
        return false;
    }
    *n = (size_t)_n;
    return true;
}

#define GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg) \
    do {                                          \
        if (i >= ilist->len) {                    \
            assert(false);                        \
            return false;                         \
        }                                         \
        arg = ilist->instructions[++i];           \
    } while (0)

#define DEF_APPEND_INS_OR_ARG(name, ListType, InsOrArgType)     \
    static bool                                                 \
    name(                                                       \
        ListType *milist,                                       \
        InsOrArgType instruction_or_arg                         \
    ) {                                                         \
        if (milist->len >= milist->cap) {                       \
            return false;                                       \
        }                                                       \
        milist->instructions[milist->len] = instruction_or_arg; \
        milist->len++;                                          \
                                                                \
        return true;                                            \
    }

#define ABSTRACT_APPEND_INS_OR_ARG(fn, InsOrArgType, ilist, which, arg) \
    fn((ilist), (InsOrArgType) {.which = (arg)})

DEF_APPEND_INS_OR_ARG(
    append_instruction_or_arg,
    struct instruction_list,
    union instruction_or_arg
)
DEF_APPEND_INS_OR_ARG(
    append_matcher_instruction_or_arg,
    struct matcher_instruction_list,
    union matcher_instruction_or_arg
)

#define APPEND_INS_OR_ARG(ilist, which, arg) \
    ABSTRACT_APPEND_INS_OR_ARG(              \
        append_instruction_or_arg,           \
        union instruction_or_arg,            \
        ilist,                               \
        which,                               \
        arg                                  \
    )

#define APPEND_MATCHER_INS_OR_ARG(ilist, which, arg) \
    ABSTRACT_APPEND_INS_OR_ARG(                      \
        append_matcher_instruction_or_arg,           \
        union matcher_instruction_or_arg,            \
        ilist,                                       \
        which,                                       \
        arg                                          \
    )

static bool
serialize_string(
    struct serializer *serializer,
    struct apfl_string *string
) {
    if (string == NULL) {
        return apfl_encode_u64(serializer->w, 0);
    }

    uint_least64_t index;
    if (apfl_hashmap_get(&serializer->string_lookup, &string, &index)) {
        if (index > (MAXU64>>1)) {
            return false;
        }
        index <<= 1;
        index |= 1;

        return apfl_encode_u64(serializer->w, index);
    }

    uintmax_t _len = string->len;
    if (_len > (MAXU64>>1)-1) {
        return false;
    }

    FMT_TRY(apfl_encode_u64(serializer->w, (string->len + 1) << 1));
    FMT_TRY(apfl_io_write_string(serializer->w, *string));

    index = serializer->next_string_index;
    serializer->next_string_index++;

    return apfl_hashmap_set(&serializer->string_lookup, &string, &index);
}

static bool
unserialize_string(
    struct unserializer *unserializer,
    struct apfl_string **s
) {
    uint_least64_t index_or_len;
    FMT_TRY(apfl_decode_u64(unserializer->r, &index_or_len));

    if (index_or_len == 0) {
        *s = NULL;
        return true;
    }

    bool is_index = (index_or_len & 1) == 1;
    index_or_len >>= 1;
    if (is_index) {
        if (index_or_len > SIZE_MAX) {
            return false;
        }
        size_t index = index_or_len;
        if (index >= unserializer->strings_len) {
            return false;
        }
        *s = unserializer->strings[index];
        return true;
    } else {
        index_or_len -= 1;
        if (index_or_len > SIZE_MAX) {
            return false;
        }
        size_t len = index_or_len;
        unsigned char *buf = NULL;
        if (len > 0) {
            buf = ALLOC_BYTES(unserializer->gc->allocator, len);
            if (buf == NULL) {
                return false;
            }
            if (!apfl_io_read_bytes_exact_size(unserializer->r, buf, len)) {
                FREE_BYTES(unserializer->gc->allocator, buf, len);
                return false;
            }
        }

        struct apfl_string tmpstring = {
            .bytes = buf,
            .len = len,
            .cap = len,
        };

        if ((*s = apfl_string_move_into_new_gc_string(unserializer->gc, &tmpstring)) == NULL) {
            FREE_BYTES(unserializer->gc->allocator, buf, len);
            return false;
        }

        if (!apfl_resizable_append(
            unserializer->gc->allocator,
            sizeof(struct apfl_string *),
            (void **)&unserializer->strings,
            &unserializer->strings_len,
            &unserializer->strings_cap,
            &(*s),
            1
        )) {
            return false;
        }

        return true;
    }
}

static bool
serialize_milist(
    struct serializer *serializer,
    struct matcher_instruction_list *milist
) {
    union matcher_instruction_or_arg arg;

    FMT_TRY(serialize_size(serializer->w, milist->value_count));
    FMT_TRY(serialize_size(serializer->w, milist->capture_count));
    FMT_TRY(serialize_size(serializer->w, milist->len));

    for (size_t i = 0; i < milist->len; i++) {
        enum matcher_instruction insn = milist->instructions[i].instruction;
        FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));

        switch (matcher_argument_type_for_instruction(insn)) {
        case MINSN_ARGS_NONE:
            break;
        case MINSN_ARGS_INDEX:
            GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
            FMT_TRY(serialize_size(serializer->w, arg.index));
            break;
        case MINSN_ARGS_NAME:
            GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
            FMT_TRY(serialize_string(serializer, arg.string));
            break;
        case MINSN_ARGS_NAME_INDEX_LEN:
            GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
            FMT_TRY(serialize_string(serializer, arg.string));

            GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
            FMT_TRY(serialize_size(serializer->w, arg.index));

            GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
            FMT_TRY(serialize_size(serializer->w, arg.len));
            break;
        }
    }

    return true;
}

static bool
unserialize_milist(
    struct unserializer *unserializer,
    struct matcher_instruction_list *milist
) {
    size_t len;
    FMT_TRY(unserialize_size(unserializer->r, &milist->value_count));
    FMT_TRY(unserialize_size(unserializer->r, &milist->capture_count));
    FMT_TRY(unserialize_size(unserializer->r, &len));

    if ((milist->instructions = ALLOC_LIST(
        unserializer->gc->allocator,
        union matcher_instruction_or_arg,
        len
    )) == NULL) {
        return false;
    }

    milist->cap = len;
    milist->len = 0;


    while (milist->len < len) {
        unsigned char b;
        FMT_TRY(apfl_io_read_byte(unserializer->r, &b));

        enum matcher_instruction insn = b;

        if (!valid_matcher_instruction(insn)) {
            return false;
        }

        FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, instruction, insn));

        switch (matcher_argument_type_for_instruction(insn)) {
        case MINSN_ARGS_NONE:
            break;
        case MINSN_ARGS_INDEX: {
            size_t index;
            FMT_TRY(unserialize_size(unserializer->r, &index));
            FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, index));
            break;
        }
        case MINSN_ARGS_NAME: {
            struct apfl_string *name;
            FMT_TRY(unserialize_string(unserializer, &name));
            FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));
            break;
        }
        case MINSN_ARGS_NAME_INDEX_LEN: {
            struct apfl_string *name;
            FMT_TRY(unserialize_string(unserializer, &name));
            FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));

            size_t s;
            FMT_TRY(unserialize_size(unserializer->r, &s));
            FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, s));

            FMT_TRY(unserialize_size(unserializer->r, &s));
            FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, len, s));

            break;
        }
        }
    }

    return true;
}

static bool
serialize_ilist(
    struct serializer *serializer,
    struct instruction_list *ilist
) {
    union instruction_or_arg arg;

    FMT_TRY(serialize_size(serializer->w, ilist->line));
    FMT_TRY(serialize_string(serializer, ilist->filename));
    FMT_TRY(serialize_size(serializer->w, ilist->len));

    for (size_t i = 0; i < ilist->len; i++) {
        enum instruction insn = ilist->instructions[i].instruction;
        FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));

        switch (argument_type_for_instruction(insn)) {
        case INSN_ARGS_NONE:
            break;
        case INSN_ARGS_NUMBER:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(apfl_encode_double(serializer->w, arg.number));
            break;
        case INSN_ARGS_COUNT:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(serialize_size(serializer->w, arg.count));
            break;
        case INSN_ARGS_INDEX:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(serialize_size(serializer->w, arg.index));
            break;
        case INSN_ARGS_STRING:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(serialize_string(serializer, arg.string));
            break;
        case INSN_ARGS_BODY:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(serialize_ilist(serializer, arg.body));
            break;
        case INSN_ARGS_MATCHER:
            GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
            FMT_TRY(serialize_milist(serializer, arg.matcher));
            break;
        }
    }

    return true;
}

static bool
set_ilist_nested(
    struct unserializer *unserializer,
    void *opaque,
    struct instruction_list *ilist
) {
    (void)unserializer;

    struct instruction_list **dst = opaque;
    *dst = ilist;
    return true;
}

static bool
unserialize_ilist(
    struct unserializer *unserializer,
    bool (*set_ilist)(struct unserializer *, void *, struct instruction_list *ilist),
    void *opaque
) {
    size_t line;
    FMT_TRY(unserialize_size(unserializer->r, &line));

    struct apfl_string *filename;
    FMT_TRY(unserialize_string(unserializer, &filename));

    size_t tmproots;
    if (filename != NULL) {
        tmproots = apfl_gc_tmproots_begin(unserializer->gc);
        if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(filename, GC_TYPE_STRING))) {
            return false;
        }
    }

    struct instruction_list *ilist = apfl_instructions_new(unserializer->gc, line, filename);
    if (ilist == NULL) {
        return false;
    }

    if (filename != NULL) {
        apfl_gc_tmproots_restore(unserializer->gc, tmproots);
    }

    FMT_TRY(set_ilist(unserializer, opaque, ilist));

    size_t len;
    FMT_TRY(unserialize_size(unserializer->r, &len));

    if (len > 0) {
        ilist->instructions = ALLOC_LIST(unserializer->gc->allocator, union instruction_or_arg, len);
        if (ilist->instructions == NULL) {
            return false;
        }
    }
    ilist->cap = len;

    while (ilist->len < len) {
        unsigned char b;
        FMT_TRY(apfl_io_read_byte(unserializer->r, &b));

        enum instruction insn = b;

        if (!valid_instruction(insn)) {
            return false;
        }

        FMT_TRY(APPEND_INS_OR_ARG(ilist, instruction, insn));

        switch (argument_type_for_instruction(insn)) {
        case INSN_ARGS_NONE:
            break;
        case INSN_ARGS_NUMBER: {
            double d;
            FMT_TRY(apfl_decode_double(unserializer->r, &d));
            FMT_TRY(APPEND_INS_OR_ARG(ilist, number, d));
            break;
        }
        case INSN_ARGS_COUNT: {
            size_t count;
            FMT_TRY(unserialize_size(unserializer->r, &count));
            FMT_TRY(APPEND_INS_OR_ARG(ilist, count, count));
            break;
        }
        case INSN_ARGS_INDEX: {
            size_t index;
            FMT_TRY(unserialize_size(unserializer->r, &index));
            FMT_TRY(APPEND_INS_OR_ARG(ilist, index, index));
            break;
        }
        case INSN_ARGS_STRING: {
            struct apfl_string *s;
            FMT_TRY(unserialize_string(unserializer, &s));
            FMT_TRY(APPEND_INS_OR_ARG(ilist, string, s));
            break;
        }
        case INSN_ARGS_BODY: {
            if (ilist->len >= ilist->cap) {
                return false;
            }
            struct instruction_list **dst = &ilist->instructions[ilist->len].body;
            ilist->len++;
            FMT_TRY(unserialize_ilist(unserializer, set_ilist_nested, dst));
            break;
        }
        case INSN_ARGS_MATCHER: {
            if (ilist->len >= ilist->cap) {
                return false;
            }
            struct matcher_instruction_list *matcher = apfl_matcher_instructions_new(unserializer->gc);
            if (matcher == NULL) {
                return false;
            }
            ilist->instructions[ilist->len].matcher = matcher;
            ilist->len++;
            FMT_TRY(unserialize_milist(unserializer, matcher));
            break;
        }
        }
    }

    return true;
}

static const unsigned char header[] = { '\0', 'a', 'p', 'f', 'l', 'B', BYTECODE_VERSION};
#define BYTE_ARRAY_SV(b) ((struct apfl_string_view) { .bytes = (b), .len = sizeof(b), })
#define HEADER_LEN sizeof(header)

static struct apfl_string_view
header_sv(void)
{
    return BYTE_ARRAY_SV(header);
}


static bool
bytecode_serialize_inner(
    struct serializer *serializer,
    struct instruction_list *ilist
) {
    FMT_TRY(apfl_io_write_string(serializer->w, header_sv()));
    FMT_TRY(serialize_ilist(serializer, ilist));

    return true;
}

bool
apfl_bytecode_serialize(
    struct apfl_allocator allocator,
    struct apfl_io_writer w,
    struct instruction_list *ilist
) {
    struct serializer serializer = {
        .allocator = allocator,
        .w = w,
        .next_string_index = 0,
    };

    if (!apfl_hashmap_init(
        &serializer.string_lookup,
        allocator,
        (struct apfl_hashmap_callbacks) {
            .opaque = NULL,
            .keys_eq = serializer_strings_eq,
            .calc_hash = serializer_strings_hash,
        },
        sizeof(struct apfl_string *),
        sizeof(uint_least64_t)
    )) {
        return false;
    }

    bool out = bytecode_serialize_inner(&serializer, ilist);

    apfl_hashmap_deinit(&serializer.string_lookup);

    return out;
}

static bool
set_ilist_root(
    struct unserializer *unserializer,
    void *opaque,
    struct instruction_list *ilist
) {
    struct instruction_list **dest = opaque;
    if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) {
        return false;
    }

    *dest = ilist;
    return true;
}

static struct instruction_list *
bytecode_unserialize_inner(struct unserializer *unserializer)
{
    unsigned char header_buf[HEADER_LEN];
    FMT_TRY(apfl_io_read_bytes_exact_size(unserializer->r, header_buf, HEADER_LEN));

    if (!apfl_string_eq(header_sv(), BYTE_ARRAY_SV(header_buf))) {
        return NULL;
    }

    struct instruction_list *ilist = NULL;

    if (!unserialize_ilist(unserializer, set_ilist_root, &ilist)) {
        return NULL;
    }

    return ilist;
}

struct instruction_list *
apfl_bytecode_unserialize(
    struct gc *gc,
    struct apfl_io_reader r
) {
    struct unserializer unserializer = {
        .gc = gc,
        .r = r,
        .strings = NULL,
        .strings_len = 0,
        .strings_cap = 0,
    };

    size_t tmproots = apfl_gc_tmproots_begin(gc);
    struct instruction_list *out = bytecode_unserialize_inner(&unserializer);
    apfl_gc_tmproots_restore(gc, tmproots);

    FREE_LIST(gc->allocator, unserializer.strings, unserializer.strings_cap);

    return out;
}