#include #include "apfl.h" #include "alloc.h" #include "bytecode.h" #include "encode.h" #include "format.h" #include "gc.h" #include "hashmap.h" #include "resizable.h" #include "strings.h" #define BYTECODE_VERSION 0 enum instruction_argument_type { INSN_ARGS_NONE, INSN_ARGS_NUMBER, INSN_ARGS_COUNT, INSN_ARGS_INDEX, INSN_ARGS_STRING, INSN_ARGS_BODY, INSN_ARGS_MATCHER, }; enum matcher_instruction_argument_type { MINSN_ARGS_NONE, MINSN_ARGS_INDEX, MINSN_ARGS_NAME, MINSN_ARGS_NAME_INDEX_LEN, }; static enum instruction_argument_type argument_type_for_instruction(enum instruction insn) { switch (insn) { case INSN_NIL: case INSN_TRUE: case INSN_FALSE: case INSN_LIST_APPEND: case INSN_LIST_EXPAND_INTO: case INSN_DICT: case INSN_DICT_APPEND_KVPAIR: case INSN_GET_MEMBER: case INSN_NEXT_LINE: case INSN_DROP: case INSN_DUP: case INSN_CALL: case INSN_MATCHER_MUST_MATCH: case INSN_BUILD_PAIR: return INSN_ARGS_NONE; case INSN_NUMBER: return INSN_ARGS_NUMBER; case INSN_LIST: case INSN_SET_LINE: case INSN_FUNC: return INSN_ARGS_COUNT; case INSN_GET_BY_INDEX_KEEP: case INSN_MATCHER_SET_VAL: return INSN_ARGS_INDEX; case INSN_STRING: case INSN_VAR_GET: case INSN_VAR_SET: case INSN_VAR_SET_LOCAL: case INSN_VAR_NEW: case INSN_VAR_NEW_LOCAL: case INSN_MOVE_TO_LOCAL_VAR: case INSN_FUNC_SET_NAME: return INSN_ARGS_STRING; case INSN_FUNC_ADD_SUBFUNC: case INSN_FUNC_ADD_SUBFUNC_ANYARGS: return INSN_ARGS_BODY; case INSN_MATCHER_PUSH: return INSN_ARGS_MATCHER; } assert(false); return INSN_ARGS_NONE; } static enum matcher_instruction_argument_type matcher_argument_type_for_instruction(enum matcher_instruction insn) { switch (insn) { case MATCHER_IGNORE: case MATCHER_ENTER_LIST: case MATCHER_LEAVE_LIST: case MATCHER_CONTINUE_FROM_END: case MATCHER_REMAINDING: case MATCHER_UNPACK_PAIR: return MINSN_ARGS_NONE; case MATCHER_CHECK_CONST: // with index as values index case MATCHER_CHECK_PRED: // with index as values index return MINSN_ARGS_INDEX; case MATCHER_CAPTURE_TO_VAR: // with name case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name return MINSN_ARGS_NAME; case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len return MINSN_ARGS_NAME_INDEX_LEN; } assert(false); return MINSN_ARGS_NONE; } static bool valid_matcher_instruction(enum matcher_instruction insn) { switch (insn) { case MATCHER_IGNORE: case MATCHER_ENTER_LIST: case MATCHER_LEAVE_LIST: case MATCHER_CONTINUE_FROM_END: case MATCHER_REMAINDING: case MATCHER_CHECK_CONST: case MATCHER_CHECK_PRED: case MATCHER_CAPTURE_TO_VAR: case MATCHER_CAPTURE_TO_VAR_LOCAL: case MATCHER_CAPTURE_TO_VAR_WITH_PATH: case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: case MATCHER_UNPACK_PAIR: return true; } return false; } static bool valid_instruction(enum instruction insn) { switch (insn) { case INSN_NIL: case INSN_TRUE: case INSN_FALSE: case INSN_LIST_APPEND: case INSN_LIST_EXPAND_INTO: case INSN_DICT: case INSN_DICT_APPEND_KVPAIR: case INSN_GET_MEMBER: case INSN_NEXT_LINE: case INSN_DROP: case INSN_DUP: case INSN_CALL: case INSN_MATCHER_MUST_MATCH: case INSN_NUMBER: case INSN_LIST: case INSN_SET_LINE: case INSN_FUNC: case INSN_GET_BY_INDEX_KEEP: case INSN_MATCHER_SET_VAL: case INSN_STRING: case INSN_VAR_GET: case INSN_VAR_SET: case INSN_VAR_SET_LOCAL: case INSN_VAR_NEW: case INSN_VAR_NEW_LOCAL: case INSN_MOVE_TO_LOCAL_VAR: case INSN_FUNC_SET_NAME: case INSN_FUNC_ADD_SUBFUNC: case INSN_FUNC_ADD_SUBFUNC_ANYARGS: case INSN_MATCHER_PUSH: case INSN_BUILD_PAIR: return true; } return false; } struct instruction_list * apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename) { struct instruction_list *ilist = apfl_gc_new_instructions(gc); if (ilist == NULL) { return NULL; } *ilist = (struct instruction_list) { .instructions = NULL, .len = 0, .cap = 0, .line = line, .filename = filename, }; return ilist; } void apfl_instructions_deinit(struct apfl_allocator allocator, struct instruction_list *ilist) { FREE_LIST(allocator, ilist->instructions, ilist->cap); } #define GET_ARGUMENT(ilist, i, arg) \ do { \ if (i >= ilist->len) { \ return; \ } \ arg = ilist->instructions[++i]; \ } while (0) void apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, void *opaque) { union instruction_or_arg arg; if (ilist->filename != NULL) { cb(opaque, GC_OBJECT_FROM(ilist->filename, GC_TYPE_STRING)); } for (size_t i = 0; i < ilist->len; i++) { switch (argument_type_for_instruction(ilist->instructions[i].instruction)) { case INSN_ARGS_NONE: break; case INSN_ARGS_NUMBER: case INSN_ARGS_COUNT: case INSN_ARGS_INDEX: i++; break; case INSN_ARGS_STRING: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); break; case INSN_ARGS_BODY: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS)); break; case INSN_ARGS_MATCHER: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS)); break; } } } void apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, gc_visitor cb, void *opaque) { union matcher_instruction_or_arg arg; for (size_t i = 0; i < milist->len; i++) { switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) { case MINSN_ARGS_NONE: break; case MINSN_ARGS_INDEX: i++; break; case MINSN_ARGS_NAME: GET_ARGUMENT(milist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); break; case MINSN_ARGS_NAME_INDEX_LEN: GET_ARGUMENT(milist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); i++; i++; break; } } } const char * apfl_instruction_to_string(enum instruction insn) { switch (insn) { case INSN_NIL: return "INSN_NIL"; case INSN_TRUE: return "INSN_TRUE"; case INSN_FALSE: return "INSN_FALSE"; case INSN_NUMBER: return "INSN_NUMBER"; case INSN_STRING: return "INSN_STRING"; case INSN_LIST: return "INSN_LIST"; case INSN_LIST_APPEND: return "INSN_LIST_APPEND"; case INSN_LIST_EXPAND_INTO: return "INSN_LIST_EXPAND_INTO"; case INSN_DICT: return "INSN_DICT"; case INSN_DICT_APPEND_KVPAIR: return "INSN_DICT_APPEND_KVPAIR"; case INSN_GET_MEMBER: return "INSN_GET_MEMBER"; case INSN_VAR_GET: return "INSN_VAR_GET"; case INSN_VAR_SET: return "INSN_VAR_SET"; case INSN_VAR_SET_LOCAL: return "INSN_VAR_SET_LOCAL"; case INSN_VAR_NEW: return "INSN_VAR_NEW"; case INSN_VAR_NEW_LOCAL: return "INSN_VAR_NEW_LOCAL"; case INSN_MOVE_TO_LOCAL_VAR: return "INSN_MOVE_TO_LOCAL_VAR"; case INSN_NEXT_LINE: return "INSN_NEXT_LINE"; case INSN_SET_LINE: return "INSN_SET_LINE"; case INSN_GET_BY_INDEX_KEEP: return "INSN_GET_BY_INDEX_KEEP"; case INSN_DROP: return "INSN_DROP"; case INSN_DUP: return "INSN_DUP"; case INSN_CALL: return "INSN_CALL"; case INSN_FUNC: return "INSN_FUNC"; case INSN_FUNC_ADD_SUBFUNC: return "INSN_FUNC_ADD_SUBFUNC"; case INSN_FUNC_ADD_SUBFUNC_ANYARGS: return "INSN_FUNC_ADD_SUBFUNC_ANYARGS"; case INSN_FUNC_SET_NAME: return "INSN_FUNC_SET_NAME"; case INSN_MATCHER_PUSH: return "INSN_MATCHER_PUSH"; case INSN_MATCHER_SET_VAL: return "INSN_MATCHER_SET_VAL"; case INSN_MATCHER_MUST_MATCH: return "INSN_MATCHER_MUST_MATCH"; case INSN_BUILD_PAIR: return "INSN_BUILD_PAIR"; } return "??"; } const char * apfl_matcher_instruction_to_string(enum matcher_instruction insn) { switch (insn) { case MATCHER_IGNORE: return "MATCHER_IGNORE"; case MATCHER_CAPTURE_TO_VAR: return "MATCHER_CAPTURE_TO_VAR"; case MATCHER_CAPTURE_TO_VAR_LOCAL: return "MATCHER_CAPTURE_TO_VAR_LOCAL"; case MATCHER_CAPTURE_TO_VAR_WITH_PATH: return "MATCHER_CAPTURE_TO_VAR_WITH_PATH"; case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: return "MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH"; case MATCHER_CHECK_CONST: return "MATCHER_CHECK_CONST"; case MATCHER_CHECK_PRED: return "MATCHER_CHECK_PRED"; case MATCHER_ENTER_LIST: return "MATCHER_ENTER_LIST"; case MATCHER_LEAVE_LIST: return "MATCHER_LEAVE_LIST"; case MATCHER_CONTINUE_FROM_END: return "MATCHER_CONTINUE_FROM_END"; case MATCHER_REMAINDING: return "MATCHER_REMAINDING"; case MATCHER_UNPACK_PAIR: return "MATCHER_UNPACK_PAIR"; } return "??"; } struct matcher_instruction_list * apfl_matcher_instructions_new(struct gc *gc) { struct matcher_instruction_list *milist = apfl_gc_new_matcher_instructions(gc); if (milist == NULL) { return NULL; } *milist = (struct matcher_instruction_list) { .instructions = NULL, .len = 0, .cap = 0, .capture_count = 0, .value_count = 0, }; return milist; } void apfl_matcher_instructions_deinit(struct apfl_allocator allocator, struct matcher_instruction_list *milist) { FREE_LIST(allocator, milist->instructions, milist->cap); } #define GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg) \ do { \ if (i >= ilist->len) { \ FMT_TRY(apfl_io_write_string(w, "Bytecode corrupted")); \ return false; \ } \ arg = ilist->instructions[++i]; \ } while (0) bool apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist) { for (size_t i = 0; i < milist->len; i++) { union matcher_instruction_or_arg arg; FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction))); switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) { case MINSN_ARGS_NONE: break; case MINSN_ARGS_NAME: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); break; case MINSN_ARGS_NAME_INDEX_LEN: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, ", ")); FMT_TRY(apfl_format_put_int(w, (int)arg.index)); GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, ", ")); FMT_TRY(apfl_format_put_int(w, (int)arg.len)); break; case MINSN_ARGS_INDEX: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.index)); break; } FMT_TRY(apfl_io_write_byte(w, '\n')); } return true; } bool apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist) { union instruction_or_arg arg; for (size_t i = 0; i < ilist->len; i++) { FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction))); switch (argument_type_for_instruction(ilist->instructions[i].instruction)) { case INSN_ARGS_NONE: break; case INSN_ARGS_NUMBER: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_number(w, arg.number)); break; case INSN_ARGS_COUNT: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.count)); break; case INSN_ARGS_INDEX: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.index)); break; case INSN_ARGS_STRING: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); break; case INSN_ARGS_BODY: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ilist{\n")); FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body)); FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, "}")); break; case INSN_ARGS_MATCHER: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " milist{\n")); FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher)); FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, "}")); break; } FMT_TRY(apfl_io_write_byte(w, '\n')); } return true; } struct serializer { struct apfl_allocator allocator; struct apfl_io_writer w; struct apfl_hashmap string_lookup; size_t next_string_index; }; struct unserializer { struct gc *gc; struct apfl_io_reader r; struct apfl_string **strings; size_t strings_len; size_t strings_cap; }; static bool serializer_strings_eq(void *opaque, const void *_a, const void *_b) { (void)opaque; const struct apfl_string * const *a = _a; const struct apfl_string * const *b = _b; return apfl_string_eq(**a, **b); } static apfl_hash serializer_strings_hash(void *opaque, const void *_key) { (void)opaque; const struct apfl_string * const *key = _key; struct apfl_string_view sv = apfl_string_view_from(**key); return apfl_hash_fnv1a(sv.bytes, sv.len); } #define MAXU64 0xFFFFFFFFFFFFFFFF static bool serialize_size(struct apfl_io_writer w, size_t n) { uintmax_t _n = n; if (_n > MAXU64) { return false; } return apfl_encode_u64(w, _n); } static bool unserialize_size(struct apfl_io_reader r, size_t *n) { uint_least64_t _n; FMT_TRY(apfl_decode_u64(r, &_n)); if (_n > SIZE_MAX) { return false; } *n = (size_t)_n; return true; } #define GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg) \ do { \ if (i >= ilist->len) { \ assert(false); \ return false; \ } \ arg = ilist->instructions[++i]; \ } while (0) #define DEF_APPEND_INS_OR_ARG(name, ListType, InsOrArgType) \ static bool \ name( \ ListType *milist, \ InsOrArgType instruction_or_arg \ ) { \ if (milist->len >= milist->cap) { \ return false; \ } \ milist->instructions[milist->len] = instruction_or_arg; \ milist->len++; \ \ return true; \ } #define ABSTRACT_APPEND_INS_OR_ARG(fn, InsOrArgType, ilist, which, arg) \ fn((ilist), (InsOrArgType) {.which = (arg)}) DEF_APPEND_INS_OR_ARG( append_instruction_or_arg, struct instruction_list, union instruction_or_arg ) DEF_APPEND_INS_OR_ARG( append_matcher_instruction_or_arg, struct matcher_instruction_list, union matcher_instruction_or_arg ) #define APPEND_INS_OR_ARG(ilist, which, arg) \ ABSTRACT_APPEND_INS_OR_ARG( \ append_instruction_or_arg, \ union instruction_or_arg, \ ilist, \ which, \ arg \ ) #define APPEND_MATCHER_INS_OR_ARG(ilist, which, arg) \ ABSTRACT_APPEND_INS_OR_ARG( \ append_matcher_instruction_or_arg, \ union matcher_instruction_or_arg, \ ilist, \ which, \ arg \ ) static bool serialize_string( struct serializer *serializer, struct apfl_string *string ) { if (string == NULL) { return apfl_encode_u64(serializer->w, 0); } uint_least64_t index; if (apfl_hashmap_get(&serializer->string_lookup, &string, &index)) { if (index > (MAXU64>>1)) { return false; } index <<= 1; index |= 1; return apfl_encode_u64(serializer->w, index); } uintmax_t _len = string->len; if (_len > (MAXU64>>1)-1) { return false; } FMT_TRY(apfl_encode_u64(serializer->w, (string->len + 1) << 1)); FMT_TRY(apfl_io_write_string(serializer->w, *string)); index = serializer->next_string_index; serializer->next_string_index++; return apfl_hashmap_set(&serializer->string_lookup, &string, &index); } static bool unserialize_string( struct unserializer *unserializer, struct apfl_string **s ) { uint_least64_t index_or_len; FMT_TRY(apfl_decode_u64(unserializer->r, &index_or_len)); if (index_or_len == 0) { *s = NULL; return true; } bool is_index = (index_or_len & 1) == 1; index_or_len >>= 1; if (is_index) { if (index_or_len > SIZE_MAX) { return false; } size_t index = index_or_len; if (index >= unserializer->strings_len) { return false; } *s = unserializer->strings[index]; return true; } else { index_or_len -= 1; if (index_or_len > SIZE_MAX) { return false; } size_t len = index_or_len; unsigned char *buf = NULL; if (len > 0) { buf = ALLOC_BYTES(unserializer->gc->allocator, len); if (buf == NULL) { return false; } if (!apfl_io_read_bytes_exact_size(unserializer->r, buf, len)) { FREE_BYTES(unserializer->gc->allocator, buf, len); return false; } } struct apfl_string tmpstring = { .bytes = buf, .len = len, .cap = len, }; if ((*s = apfl_string_move_into_new_gc_string(unserializer->gc, &tmpstring)) == NULL) { FREE_BYTES(unserializer->gc->allocator, buf, len); return false; } if (!apfl_resizable_append( unserializer->gc->allocator, sizeof(struct apfl_string *), (void **)&unserializer->strings, &unserializer->strings_len, &unserializer->strings_cap, &(*s), 1 )) { return false; } return true; } } static bool serialize_milist( struct serializer *serializer, struct matcher_instruction_list *milist ) { union matcher_instruction_or_arg arg; FMT_TRY(serialize_size(serializer->w, milist->value_count)); FMT_TRY(serialize_size(serializer->w, milist->capture_count)); FMT_TRY(serialize_size(serializer->w, milist->len)); for (size_t i = 0; i < milist->len; i++) { enum matcher_instruction insn = milist->instructions[i].instruction; FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn)); switch (matcher_argument_type_for_instruction(insn)) { case MINSN_ARGS_NONE: break; case MINSN_ARGS_INDEX: GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); FMT_TRY(serialize_size(serializer->w, arg.index)); break; case MINSN_ARGS_NAME: GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); FMT_TRY(serialize_string(serializer, arg.string)); break; case MINSN_ARGS_NAME_INDEX_LEN: GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); FMT_TRY(serialize_string(serializer, arg.string)); GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); FMT_TRY(serialize_size(serializer->w, arg.index)); GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); FMT_TRY(serialize_size(serializer->w, arg.len)); break; } } return true; } static bool unserialize_milist( struct unserializer *unserializer, struct matcher_instruction_list *milist ) { size_t len; FMT_TRY(unserialize_size(unserializer->r, &milist->value_count)); FMT_TRY(unserialize_size(unserializer->r, &milist->capture_count)); FMT_TRY(unserialize_size(unserializer->r, &len)); if ((milist->instructions = ALLOC_LIST( unserializer->gc->allocator, union matcher_instruction_or_arg, len )) == NULL) { return false; } milist->cap = len; milist->len = 0; while (milist->len < len) { unsigned char b; FMT_TRY(apfl_io_read_byte(unserializer->r, &b)); enum matcher_instruction insn = b; if (!valid_matcher_instruction(insn)) { return false; } FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, instruction, insn)); switch (matcher_argument_type_for_instruction(insn)) { case MINSN_ARGS_NONE: break; case MINSN_ARGS_INDEX: { size_t index; FMT_TRY(unserialize_size(unserializer->r, &index)); FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, index)); break; } case MINSN_ARGS_NAME: { struct apfl_string *name; FMT_TRY(unserialize_string(unserializer, &name)); FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name)); break; } case MINSN_ARGS_NAME_INDEX_LEN: { struct apfl_string *name; FMT_TRY(unserialize_string(unserializer, &name)); FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name)); size_t s; FMT_TRY(unserialize_size(unserializer->r, &s)); FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, s)); FMT_TRY(unserialize_size(unserializer->r, &s)); FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, len, s)); break; } } } return true; } static bool serialize_ilist( struct serializer *serializer, struct instruction_list *ilist ) { union instruction_or_arg arg; FMT_TRY(serialize_size(serializer->w, ilist->line)); FMT_TRY(serialize_string(serializer, ilist->filename)); FMT_TRY(serialize_size(serializer->w, ilist->len)); for (size_t i = 0; i < ilist->len; i++) { enum instruction insn = ilist->instructions[i].instruction; FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn)); switch (argument_type_for_instruction(insn)) { case INSN_ARGS_NONE: break; case INSN_ARGS_NUMBER: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(apfl_encode_double(serializer->w, arg.number)); break; case INSN_ARGS_COUNT: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(serialize_size(serializer->w, arg.count)); break; case INSN_ARGS_INDEX: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(serialize_size(serializer->w, arg.index)); break; case INSN_ARGS_STRING: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(serialize_string(serializer, arg.string)); break; case INSN_ARGS_BODY: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(serialize_ilist(serializer, arg.body)); break; case INSN_ARGS_MATCHER: GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); FMT_TRY(serialize_milist(serializer, arg.matcher)); break; } } return true; } static bool set_ilist_nested( struct unserializer *unserializer, void *opaque, struct instruction_list *ilist ) { (void)unserializer; struct instruction_list **dst = opaque; *dst = ilist; return true; } static bool unserialize_ilist( struct unserializer *unserializer, bool (*set_ilist)(struct unserializer *, void *, struct instruction_list *ilist), void *opaque ) { size_t line; FMT_TRY(unserialize_size(unserializer->r, &line)); struct apfl_string *filename; FMT_TRY(unserialize_string(unserializer, &filename)); size_t tmproots; if (filename != NULL) { tmproots = apfl_gc_tmproots_begin(unserializer->gc); if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(filename, GC_TYPE_STRING))) { return false; } } struct instruction_list *ilist = apfl_instructions_new(unserializer->gc, line, filename); if (ilist == NULL) { return false; } if (filename != NULL) { apfl_gc_tmproots_restore(unserializer->gc, tmproots); } FMT_TRY(set_ilist(unserializer, opaque, ilist)); size_t len; FMT_TRY(unserialize_size(unserializer->r, &len)); if (len > 0) { ilist->instructions = ALLOC_LIST(unserializer->gc->allocator, union instruction_or_arg, len); if (ilist->instructions == NULL) { return false; } } ilist->cap = len; while (ilist->len < len) { unsigned char b; FMT_TRY(apfl_io_read_byte(unserializer->r, &b)); enum instruction insn = b; if (!valid_instruction(insn)) { return false; } FMT_TRY(APPEND_INS_OR_ARG(ilist, instruction, insn)); switch (argument_type_for_instruction(insn)) { case INSN_ARGS_NONE: break; case INSN_ARGS_NUMBER: { double d; FMT_TRY(apfl_decode_double(unserializer->r, &d)); FMT_TRY(APPEND_INS_OR_ARG(ilist, number, d)); break; } case INSN_ARGS_COUNT: { size_t count; FMT_TRY(unserialize_size(unserializer->r, &count)); FMT_TRY(APPEND_INS_OR_ARG(ilist, count, count)); break; } case INSN_ARGS_INDEX: { size_t index; FMT_TRY(unserialize_size(unserializer->r, &index)); FMT_TRY(APPEND_INS_OR_ARG(ilist, index, index)); break; } case INSN_ARGS_STRING: { struct apfl_string *s; FMT_TRY(unserialize_string(unserializer, &s)); FMT_TRY(APPEND_INS_OR_ARG(ilist, string, s)); break; } case INSN_ARGS_BODY: { if (ilist->len >= ilist->cap) { return false; } struct instruction_list **dst = &ilist->instructions[ilist->len].body; ilist->len++; FMT_TRY(unserialize_ilist(unserializer, set_ilist_nested, dst)); break; } case INSN_ARGS_MATCHER: { if (ilist->len >= ilist->cap) { return false; } struct matcher_instruction_list *matcher = apfl_matcher_instructions_new(unserializer->gc); if (matcher == NULL) { return false; } ilist->instructions[ilist->len].matcher = matcher; ilist->len++; FMT_TRY(unserialize_milist(unserializer, matcher)); break; } } } return true; } static const unsigned char header[] = { '\0', 'a', 'p', 'f', 'l', 'B', BYTECODE_VERSION}; #define BYTE_ARRAY_SV(b) ((struct apfl_string_view) { .bytes = (b), .len = sizeof(b), }) #define HEADER_LEN sizeof(header) static struct apfl_string_view header_sv(void) { return BYTE_ARRAY_SV(header); } static bool bytecode_serialize_inner( struct serializer *serializer, struct instruction_list *ilist ) { FMT_TRY(apfl_io_write_string(serializer->w, header_sv())); FMT_TRY(serialize_ilist(serializer, ilist)); return true; } bool apfl_bytecode_serialize( struct apfl_allocator allocator, struct apfl_io_writer w, struct instruction_list *ilist ) { struct serializer serializer = { .allocator = allocator, .w = w, .next_string_index = 0, }; if (!apfl_hashmap_init( &serializer.string_lookup, allocator, (struct apfl_hashmap_callbacks) { .opaque = NULL, .keys_eq = serializer_strings_eq, .calc_hash = serializer_strings_hash, }, sizeof(struct apfl_string *), sizeof(uint_least64_t) )) { return false; } bool out = bytecode_serialize_inner(&serializer, ilist); apfl_hashmap_deinit(&serializer.string_lookup); return out; } static bool set_ilist_root( struct unserializer *unserializer, void *opaque, struct instruction_list *ilist ) { struct instruction_list **dest = opaque; if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) { return false; } *dest = ilist; return true; } static struct instruction_list * bytecode_unserialize_inner(struct unserializer *unserializer) { unsigned char header_buf[HEADER_LEN]; FMT_TRY(apfl_io_read_bytes_exact_size(unserializer->r, header_buf, HEADER_LEN)); if (!apfl_string_eq(header_sv(), BYTE_ARRAY_SV(header_buf))) { return NULL; } struct instruction_list *ilist = NULL; if (!unserialize_ilist(unserializer, set_ilist_root, &ilist)) { return NULL; } return ilist; } struct instruction_list * apfl_bytecode_unserialize( struct gc *gc, struct apfl_io_reader r ) { struct unserializer unserializer = { .gc = gc, .r = r, .strings = NULL, .strings_len = 0, .strings_cap = 0, }; size_t tmproots = apfl_gc_tmproots_begin(gc); struct instruction_list *out = bytecode_unserialize_inner(&unserializer); apfl_gc_tmproots_restore(gc, tmproots); FREE_LIST(gc->allocator, unserializer.strings, unserializer.strings_cap); return out; }