apfl/src/bytecode.c

#include <assert.h>

#include "apfl.h"

#include "alloc.h"
#include "bytecode.h"
#include "format.h"
#include "gc.h"

struct instruction_list *
apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename)
{
    struct instruction_list *ilist = apfl_gc_new_instructions(gc);
    if (ilist == NULL) {
        return NULL;
    }
    *ilist = (struct instruction_list) {
        .instructions = NULL,
        .len = 0,
        .cap = 0,
        .line = line,
        .filename = filename,
    };
    return ilist;
}

void
apfl_instructions_deinit(struct apfl_allocator allocator, struct instruction_list *ilist)
{
    FREE_LIST(allocator, ilist->instructions, ilist->cap);
}

#define GET_ARGUMENT(ilist, i, arg)     \
    do {                                \
        if (i >= ilist->len) {          \
            return;                     \
        }                               \
        arg = ilist->instructions[++i]; \
    } while (0)

void
apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, void *opaque)
{
    union instruction_or_arg arg;

    if (ilist->filename != NULL) {
        cb(opaque, GC_OBJECT_FROM(ilist->filename, GC_TYPE_STRING));
    }

    for (size_t i = 0; i < ilist->len; i++) {
        switch (ilist->instructions[i].instruction) {
        case INSN_NIL:
        case INSN_TRUE:
        case INSN_FALSE:
        case INSN_LIST_APPEND:
        case INSN_LIST_EXPAND_INTO:
        case INSN_DICT:
        case INSN_DICT_APPEND_KVPAIR:
        case INSN_GET_MEMBER:
        case INSN_NEXT_LINE:
        case INSN_DROP:
        case INSN_DUP:
        case INSN_CALL:
        case INSN_MATCHER_MUST_MATCH:
            break;
        case INSN_NUMBER:
        case INSN_LIST:
        case INSN_SET_LINE:
        case INSN_GET_BY_INDEX_KEEP:
        case INSN_MATCHER_SET_VAL:
        case INSN_FUNC:
            i++;
            break;
        case INSN_STRING:
        case INSN_VAR_GET:
        case INSN_VAR_SET:
        case INSN_VAR_SET_LOCAL:
        case INSN_VAR_NEW:
        case INSN_VAR_NEW_LOCAL:
        case INSN_MOVE_TO_LOCAL_VAR:
        case INSN_FUNC_SET_NAME:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            break;
        case INSN_FUNC_ADD_SUBFUNC:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS));
            break;
        case INSN_MATCHER_PUSH:
            GET_ARGUMENT(ilist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS));
            break;
        }
    }
}

void
apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, gc_visitor cb, void *opaque)
{
    union matcher_instruction_or_arg arg;

    for (size_t i = 0; i < milist->len; i++) {
        switch (milist->instructions[i].instruction) {
        case MATCHER_IGNORE:
        case MATCHER_ENTER_LIST:
        case MATCHER_LEAVE_LIST:
        case MATCHER_CONTINUE_FROM_END:
        case MATCHER_REMAINDING:
            break;
        case MATCHER_CHECK_CONST: // with index as values index
        case MATCHER_CHECK_PRED:  // with index as values index
            i++;
            break;
        case MATCHER_CAPTURE_TO_VAR: // with name
        case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
            GET_ARGUMENT(milist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            break;
        case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
        case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
            GET_ARGUMENT(milist, i, arg);
            cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
            i++;
            i++;
            break;
        }
    }
}

const char *
apfl_instruction_to_string(enum instruction insn)
{
    switch (insn) {
    case INSN_NIL:
        return "INSN_NIL";
    case INSN_TRUE:
        return "INSN_TRUE";
    case INSN_FALSE:
        return "INSN_FALSE";
    case INSN_NUMBER:
        return "INSN_NUMBER";
    case INSN_STRING:
        return "INSN_STRING";
    case INSN_LIST:
        return "INSN_LIST";
    case INSN_LIST_APPEND:
        return "INSN_LIST_APPEND";
    case INSN_LIST_EXPAND_INTO:
        return "INSN_LIST_EXPAND_INTO";
    case INSN_DICT:
        return "INSN_DICT";
    case INSN_DICT_APPEND_KVPAIR:
        return "INSN_DICT_APPEND_KVPAIR";
    case INSN_GET_MEMBER:
        return "INSN_GET_MEMBER";
    case INSN_VAR_GET:
        return "INSN_VAR_GET";
    case INSN_VAR_SET:
        return "INSN_VAR_SET";
    case INSN_VAR_SET_LOCAL:
        return "INSN_VAR_SET_LOCAL";
    case INSN_VAR_NEW:
        return "INSN_VAR_NEW";
    case INSN_VAR_NEW_LOCAL:
        return "INSN_VAR_NEW_LOCAL";
    case INSN_MOVE_TO_LOCAL_VAR:
        return "INSN_MOVE_TO_LOCAL_VAR";
    case INSN_NEXT_LINE:
        return "INSN_NEXT_LINE";
    case INSN_SET_LINE:
        return "INSN_SET_LINE";
    case INSN_GET_BY_INDEX_KEEP:
        return "INSN_GET_BY_INDEX_KEEP";
    case INSN_DROP:
        return "INSN_DROP";
    case INSN_DUP:
        return "INSN_DUP";
    case INSN_CALL:
        return "INSN_CALL";
    case INSN_FUNC:
        return "INSN_FUNC";
    case INSN_FUNC_ADD_SUBFUNC:
        return "INSN_FUNC_ADD_SUBFUNC";
    case INSN_FUNC_SET_NAME:
        return "INSN_FUNC_SET_NAME";
    case INSN_MATCHER_PUSH:
        return "INSN_MATCHER_PUSH";
    case INSN_MATCHER_SET_VAL:
        return "INSN_MATCHER_SET_VAL";
    case INSN_MATCHER_MUST_MATCH:
        return "INSN_MATCHER_MUST_MATCH";
    }

    return "??";
}

const char *
apfl_matcher_instruction_to_string(enum matcher_instruction insn)
{
    switch (insn) {
    case MATCHER_IGNORE:
        return "MATCHER_IGNORE";
    case MATCHER_CAPTURE_TO_VAR:
        return "MATCHER_CAPTURE_TO_VAR";
    case MATCHER_CAPTURE_TO_VAR_LOCAL:
        return "MATCHER_CAPTURE_TO_VAR_LOCAL";
    case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
        return "MATCHER_CAPTURE_TO_VAR_WITH_PATH";
    case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
        return "MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH";
    case MATCHER_CHECK_CONST:
        return "MATCHER_CHECK_CONST";
    case MATCHER_CHECK_PRED:
        return "MATCHER_CHECK_PRED";
    case MATCHER_ENTER_LIST:
        return "MATCHER_ENTER_LIST";
    case MATCHER_LEAVE_LIST:
        return "MATCHER_LEAVE_LIST";
    case MATCHER_CONTINUE_FROM_END:
        return "MATCHER_CONTINUE_FROM_END";
    case MATCHER_REMAINDING:
        return "MATCHER_REMAINDING";
    }

    return "??";
}

struct matcher_instruction_list *
apfl_matcher_instructions_new(struct gc *gc)
{
    struct matcher_instruction_list *milist = apfl_gc_new_matcher_instructions(gc);
    if (milist == NULL) {
        return NULL;
    }
    *milist = (struct matcher_instruction_list) {
        .instructions = NULL,
        .len = 0,
        .cap = 0,
        .capture_count = 0,
        .value_count = 0,
    };
    return milist;
}

void
apfl_matcher_instructions_deinit(struct apfl_allocator allocator, struct matcher_instruction_list *milist)
{
    FREE_LIST(allocator, milist->instructions, milist->cap);
}

#define GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg)                       \
    do {                                                              \
        if (i >= ilist->len) {                                        \
            FMT_TRY(apfl_io_write_string(w, "Bytecode corrupted")); \
            return false;                                             \
        }                                                             \
        arg = ilist->instructions[++i];                               \
    } while (0)

bool
apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist)
{
    for (size_t i = 0; i < milist->len; i++) {
        union matcher_instruction_or_arg arg;

        FMT_TRY(apfl_format_put_indent(w, indent));
        FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction)));

        switch (milist->instructions[i].instruction) {
        case MATCHER_IGNORE:
        case MATCHER_ENTER_LIST:
        case MATCHER_LEAVE_LIST:
        case MATCHER_CONTINUE_FROM_END:
        case MATCHER_REMAINDING:
            break;
        case MATCHER_CAPTURE_TO_VAR:
        case MATCHER_CAPTURE_TO_VAR_LOCAL:
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            break;
        case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with string, index and len
        case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with string, index and len
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, ", "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, ", "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.len));
            break;
        case MATCHER_CHECK_CONST:
        case MATCHER_CHECK_PRED:
            GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            break;
        }

        FMT_TRY(apfl_io_write_byte(w, '\n'));
    }

    return true;
}

bool
apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist)
{
    union instruction_or_arg arg;

    for (size_t i = 0; i < ilist->len; i++) {
        FMT_TRY(apfl_format_put_indent(w, indent));
        FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction)));

        switch (ilist->instructions[i].instruction) {
        case INSN_NIL:
        case INSN_TRUE:
        case INSN_FALSE:
        case INSN_LIST_APPEND:
        case INSN_LIST_EXPAND_INTO:
        case INSN_DICT:
        case INSN_DICT_APPEND_KVPAIR:
        case INSN_GET_MEMBER:
        case INSN_NEXT_LINE:
        case INSN_DROP:
        case INSN_DUP:
        case INSN_CALL:
        case INSN_MATCHER_MUST_MATCH:
            break;
        case INSN_NUMBER:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_number(w, arg.number));
            break;
        case INSN_LIST:
        case INSN_SET_LINE:
        case INSN_FUNC:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.count));
            break;
        case INSN_GET_BY_INDEX_KEEP:
        case INSN_MATCHER_SET_VAL:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_format_put_int(w, (int)arg.index));
            break;
        case INSN_STRING:
        case INSN_VAR_GET:
        case INSN_VAR_SET:
        case INSN_VAR_SET_LOCAL:
        case INSN_VAR_NEW:
        case INSN_VAR_NEW_LOCAL:
        case INSN_MOVE_TO_LOCAL_VAR:
        case INSN_FUNC_SET_NAME:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " "));
            FMT_TRY(apfl_io_write_string(w, *arg.string));
            break;
        case INSN_FUNC_ADD_SUBFUNC:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " ilist{\n"));
            FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body));
            FMT_TRY(apfl_format_put_indent(w, indent));
            FMT_TRY(apfl_io_write_string(w, "}"));
            break;
        case INSN_MATCHER_PUSH:
            GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
            FMT_TRY(apfl_io_write_string(w, " milist{\n"));
            FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher));
            FMT_TRY(apfl_format_put_indent(w, indent));
            FMT_TRY(apfl_io_write_string(w, "}"));
            break;
        }

        FMT_TRY(apfl_io_write_byte(w, '\n'));
    }

    return true;
}