From 2666c0f148b95a3da8b1f8f9d87387dbf74cc8a8 Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Sun, 5 Mar 2023 17:02:42 +0100 Subject: [PATCH] Implement bytecode (de)serialization --- src/CMakeLists.txt | 5 + src/apfl.h | 51 +- src/bytecode.c | 882 +++++++++++++++++++++++++++++++---- src/bytecode.h | 11 + src/context.c | 42 ++ src/context.h | 2 + src/encode.c | 125 +++++ src/encode.h | 23 + src/encode_test.c | 200 ++++++++ src/functional-test-runner.c | 5 +- src/globals.c | 45 +- src/io.c | 91 ++++ src/parser_test.c | 8 +- src/source_readers.c | 54 +-- src/tokenizer_test.c | 8 +- 15 files changed, 1390 insertions(+), 162 deletions(-) create mode 100644 src/encode.c create mode 100644 src/encode.h create mode 100644 src/encode_test.c create mode 100644 src/io.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0ad8b68..fb66ed9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(apfl alloc.c bytecode.c compile.c + encode.c context.c error.c eval.c @@ -15,6 +16,7 @@ add_library(apfl format.c gc.c hashmap.c + io.c globals.c messages.c parser.c @@ -28,6 +30,8 @@ add_library(apfl value.c ) +target_link_libraries(apfl PUBLIC m) + add_executable(apfl-bin main.c) target_link_libraries(apfl-bin PUBLIC apfl) @@ -54,6 +58,7 @@ unittest(resizable_test "resizable.h") unittest(hashmap_test "hashmap.h") unittest(strings_test "") unittest(alloc_test "") +unittest(encode_test "encode.h") function(functionaltest name) add_test(NAME "functionaltest_${name}" COMMAND functional-test-runner ${CMAKE_SOURCE_DIR}/src/functional-tests/${name}.at) diff --git a/src/apfl.h b/src/apfl.h index 8eee50a..1f30c4b 100644 --- a/src/apfl.h +++ b/src/apfl.h @@ -141,6 +141,39 @@ bool apfl_format_put_indent(struct apfl_io_writer, unsigned); bool apfl_format_put_number(struct apfl_io_writer, apfl_number); bool apfl_format_put_poiner(struct apfl_io_writer, void *); +struct apfl_io_reader { + /* buf points to a buffer to fill that has a size of *len. + * The callback must set len to the number of read bytes and return true + * on success and false on failure. + * + * Setting len to 0 indicates and end of file. + */ + bool (*read)(void *, unsigned char *buf, size_t *len); + void *opaque; +}; + +struct apfl_io_reader apfl_io_file_reader(FILE *f); + +/* apfl_io_string_reader_* implements an apfl_io_reader that reads data from a + * string view. + */ +struct apfl_io_string_reader_data { + struct apfl_string_view sv; + size_t off; +}; + +struct apfl_io_string_reader_data apfl_io_string_reader_create(struct apfl_string_view); + +/* Creates a reader for apfl_io_string_reader_data. + * The pointed to apfl_io_string_reader_data and the underlying string view + * must be alive while the reader is in use. + */ +struct apfl_io_reader apfl_io_string_reader(struct apfl_io_string_reader_data *); + +bool apfl_io_read_bytes(struct apfl_io_reader, unsigned char *buf, size_t *len); +bool apfl_io_read_bytes_exact_size(struct apfl_io_reader, unsigned char *buf, size_t len); +bool apfl_io_read_byte(struct apfl_io_reader, unsigned char *byte); + // Tokens enum apfl_token_type { @@ -543,23 +576,7 @@ struct apfl_token apfl_tokenizer_get_token(apfl_tokenizer_ptr); */ struct apfl_error apfl_tokenizer_get_error(apfl_tokenizer_ptr); -/* apfl_string_source_reader_* implements an apfl_source_reader that reads - * source code from a string view. - */ -struct apfl_string_source_reader_data { - struct apfl_string_view sv; - size_t off; -}; - -struct apfl_string_source_reader_data apfl_string_source_reader_create(struct apfl_string_view); - -/* Creates a source reader for apfl_string_source_reader_data. - * The pointed to apfl_string_source_reader_data and the underlying string view - * must be alive while the reader is in use. - */ -struct apfl_source_reader apfl_string_source_reader(struct apfl_string_source_reader_data *); - -struct apfl_source_reader apfl_stdio_source_reader(FILE *f); +struct apfl_source_reader apfl_io_reader_as_source_reader(struct apfl_io_reader *); struct apfl_parser_token_source { enum apfl_parse_result (*next)(void *, bool need); diff --git a/src/bytecode.c b/src/bytecode.c index 123d92a..8755132 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -4,8 +4,165 @@ #include "alloc.h" #include "bytecode.h" +#include "encode.h" #include "format.h" #include "gc.h" +#include "hashmap.h" +#include "resizable.h" +#include "strings.h" + +#define BYTECODE_VERSION 0 + +enum instruction_argument_type { + INSN_ARGS_NONE, + INSN_ARGS_NUMBER, + INSN_ARGS_COUNT, + INSN_ARGS_INDEX, + INSN_ARGS_STRING, + INSN_ARGS_BODY, + INSN_ARGS_MATCHER, +}; + +enum matcher_instruction_argument_type { + MINSN_ARGS_NONE, + MINSN_ARGS_INDEX, + MINSN_ARGS_NAME, + MINSN_ARGS_NAME_INDEX_LEN, +}; + +static enum instruction_argument_type +argument_type_for_instruction(enum instruction insn) +{ + switch (insn) { + case INSN_NIL: + case INSN_TRUE: + case INSN_FALSE: + case INSN_LIST_APPEND: + case INSN_LIST_EXPAND_INTO: + case INSN_DICT: + case INSN_DICT_APPEND_KVPAIR: + case INSN_GET_MEMBER: + case INSN_NEXT_LINE: + case INSN_DROP: + case INSN_DUP: + case INSN_CALL: + case INSN_MATCHER_MUST_MATCH: + return INSN_ARGS_NONE; + case INSN_NUMBER: + return INSN_ARGS_NUMBER; + case INSN_LIST: + case INSN_SET_LINE: + case INSN_FUNC: + return INSN_ARGS_COUNT; + case INSN_GET_BY_INDEX_KEEP: + case INSN_MATCHER_SET_VAL: + return INSN_ARGS_INDEX; + case INSN_STRING: + case INSN_VAR_GET: + case INSN_VAR_SET: + case INSN_VAR_SET_LOCAL: + case INSN_VAR_NEW: + case INSN_VAR_NEW_LOCAL: + case INSN_MOVE_TO_LOCAL_VAR: + case INSN_FUNC_SET_NAME: + return INSN_ARGS_STRING; + case INSN_FUNC_ADD_SUBFUNC: + case INSN_FUNC_ADD_SUBFUNC_ANYARGS: + return INSN_ARGS_BODY; + case INSN_MATCHER_PUSH: + return INSN_ARGS_MATCHER; + } + + assert(false); + return INSN_ARGS_NONE; +} + + +static enum matcher_instruction_argument_type +matcher_argument_type_for_instruction(enum matcher_instruction insn) +{ + switch (insn) { + case MATCHER_IGNORE: + case MATCHER_ENTER_LIST: + case MATCHER_LEAVE_LIST: + case MATCHER_CONTINUE_FROM_END: + case MATCHER_REMAINDING: + return MINSN_ARGS_NONE; + case MATCHER_CHECK_CONST: // with index as values index + case MATCHER_CHECK_PRED: // with index as values index + return MINSN_ARGS_INDEX; + case MATCHER_CAPTURE_TO_VAR: // with name + case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name + return MINSN_ARGS_NAME; + case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len + case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len + return MINSN_ARGS_NAME_INDEX_LEN; + } + + assert(false); + return MINSN_ARGS_NONE; +} + +static bool +valid_matcher_instruction(enum matcher_instruction insn) +{ + switch (insn) { + case MATCHER_IGNORE: + case MATCHER_ENTER_LIST: + case MATCHER_LEAVE_LIST: + case MATCHER_CONTINUE_FROM_END: + case MATCHER_REMAINDING: + case MATCHER_CHECK_CONST: + case MATCHER_CHECK_PRED: + case MATCHER_CAPTURE_TO_VAR: + case MATCHER_CAPTURE_TO_VAR_LOCAL: + case MATCHER_CAPTURE_TO_VAR_WITH_PATH: + case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: + return true; + } + return false; +} + +static bool +valid_instruction(enum instruction insn) +{ + switch (insn) { + case INSN_NIL: + case INSN_TRUE: + case INSN_FALSE: + case INSN_LIST_APPEND: + case INSN_LIST_EXPAND_INTO: + case INSN_DICT: + case INSN_DICT_APPEND_KVPAIR: + case INSN_GET_MEMBER: + case INSN_NEXT_LINE: + case INSN_DROP: + case INSN_DUP: + case INSN_CALL: + case INSN_MATCHER_MUST_MATCH: + case INSN_NUMBER: + case INSN_LIST: + case INSN_SET_LINE: + case INSN_FUNC: + case INSN_GET_BY_INDEX_KEEP: + case INSN_MATCHER_SET_VAL: + case INSN_STRING: + case INSN_VAR_GET: + case INSN_VAR_SET: + case INSN_VAR_SET_LOCAL: + case INSN_VAR_NEW: + case INSN_VAR_NEW_LOCAL: + case INSN_MOVE_TO_LOCAL_VAR: + case INSN_FUNC_SET_NAME: + case INSN_FUNC_ADD_SUBFUNC: + case INSN_FUNC_ADD_SUBFUNC_ANYARGS: + case INSN_MATCHER_PUSH: + return true; + } + + return false; +} + struct instruction_list * apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename) @@ -48,46 +205,23 @@ apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, voi } for (size_t i = 0; i < ilist->len; i++) { - switch (ilist->instructions[i].instruction) { - case INSN_NIL: - case INSN_TRUE: - case INSN_FALSE: - case INSN_LIST_APPEND: - case INSN_LIST_EXPAND_INTO: - case INSN_DICT: - case INSN_DICT_APPEND_KVPAIR: - case INSN_GET_MEMBER: - case INSN_NEXT_LINE: - case INSN_DROP: - case INSN_DUP: - case INSN_CALL: - case INSN_MATCHER_MUST_MATCH: + switch (argument_type_for_instruction(ilist->instructions[i].instruction)) { + case INSN_ARGS_NONE: break; - case INSN_NUMBER: - case INSN_LIST: - case INSN_SET_LINE: - case INSN_GET_BY_INDEX_KEEP: - case INSN_MATCHER_SET_VAL: - case INSN_FUNC: + case INSN_ARGS_NUMBER: + case INSN_ARGS_COUNT: + case INSN_ARGS_INDEX: i++; break; - case INSN_STRING: - case INSN_VAR_GET: - case INSN_VAR_SET: - case INSN_VAR_SET_LOCAL: - case INSN_VAR_NEW: - case INSN_VAR_NEW_LOCAL: - case INSN_MOVE_TO_LOCAL_VAR: - case INSN_FUNC_SET_NAME: + case INSN_ARGS_STRING: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); break; - case INSN_FUNC_ADD_SUBFUNC: - case INSN_FUNC_ADD_SUBFUNC_ANYARGS: + case INSN_ARGS_BODY: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS)); break; - case INSN_MATCHER_PUSH: + case INSN_ARGS_MATCHER: GET_ARGUMENT(ilist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS)); break; @@ -101,24 +235,17 @@ apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, g union matcher_instruction_or_arg arg; for (size_t i = 0; i < milist->len; i++) { - switch (milist->instructions[i].instruction) { - case MATCHER_IGNORE: - case MATCHER_ENTER_LIST: - case MATCHER_LEAVE_LIST: - case MATCHER_CONTINUE_FROM_END: - case MATCHER_REMAINDING: + switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) { + case MINSN_ARGS_NONE: break; - case MATCHER_CHECK_CONST: // with index as values index - case MATCHER_CHECK_PRED: // with index as values index + case MINSN_ARGS_INDEX: i++; break; - case MATCHER_CAPTURE_TO_VAR: // with name - case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name + case MINSN_ARGS_NAME: GET_ARGUMENT(milist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); break; - case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len - case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len + case MINSN_ARGS_NAME_INDEX_LEN: GET_ARGUMENT(milist, i, arg); cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING)); i++; @@ -269,21 +396,15 @@ apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matc FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction))); - switch (milist->instructions[i].instruction) { - case MATCHER_IGNORE: - case MATCHER_ENTER_LIST: - case MATCHER_LEAVE_LIST: - case MATCHER_CONTINUE_FROM_END: - case MATCHER_REMAINDING: + switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) { + case MINSN_ARGS_NONE: break; - case MATCHER_CAPTURE_TO_VAR: - case MATCHER_CAPTURE_TO_VAR_LOCAL: + case MINSN_ARGS_NAME: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); break; - case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with string, index and len - case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with string, index and len + case MINSN_ARGS_NAME_INDEX_LEN: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); @@ -294,8 +415,7 @@ apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matc FMT_TRY(apfl_io_write_string(w, ", ")); FMT_TRY(apfl_format_put_int(w, (int)arg.len)); break; - case MATCHER_CHECK_CONST: - case MATCHER_CHECK_PRED: + case MINSN_ARGS_INDEX: GET_ARGUMENT_FOR_DUMP(w, milist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.index)); @@ -317,60 +437,37 @@ apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_ FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction))); - switch (ilist->instructions[i].instruction) { - case INSN_NIL: - case INSN_TRUE: - case INSN_FALSE: - case INSN_LIST_APPEND: - case INSN_LIST_EXPAND_INTO: - case INSN_DICT: - case INSN_DICT_APPEND_KVPAIR: - case INSN_GET_MEMBER: - case INSN_NEXT_LINE: - case INSN_DROP: - case INSN_DUP: - case INSN_CALL: - case INSN_MATCHER_MUST_MATCH: + switch (argument_type_for_instruction(ilist->instructions[i].instruction)) { + case INSN_ARGS_NONE: break; - case INSN_NUMBER: + case INSN_ARGS_NUMBER: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_number(w, arg.number)); break; - case INSN_LIST: - case INSN_SET_LINE: - case INSN_FUNC: + case INSN_ARGS_COUNT: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.count)); break; - case INSN_GET_BY_INDEX_KEEP: - case INSN_MATCHER_SET_VAL: + case INSN_ARGS_INDEX: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_format_put_int(w, (int)arg.index)); break; - case INSN_STRING: - case INSN_VAR_GET: - case INSN_VAR_SET: - case INSN_VAR_SET_LOCAL: - case INSN_VAR_NEW: - case INSN_VAR_NEW_LOCAL: - case INSN_MOVE_TO_LOCAL_VAR: - case INSN_FUNC_SET_NAME: + case INSN_ARGS_STRING: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ")); FMT_TRY(apfl_io_write_string(w, *arg.string)); break; - case INSN_FUNC_ADD_SUBFUNC: - case INSN_FUNC_ADD_SUBFUNC_ANYARGS: + case INSN_ARGS_BODY: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " ilist{\n")); FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body)); FMT_TRY(apfl_format_put_indent(w, indent)); FMT_TRY(apfl_io_write_string(w, "}")); break; - case INSN_MATCHER_PUSH: + case INSN_ARGS_MATCHER: GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg); FMT_TRY(apfl_io_write_string(w, " milist{\n")); FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher)); @@ -384,3 +481,616 @@ apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_ return true; } + +struct serializer { + struct apfl_allocator allocator; + struct apfl_io_writer w; + struct apfl_hashmap string_lookup; + size_t next_string_index; +}; + +struct unserializer { + struct gc *gc; + struct apfl_io_reader r; + struct apfl_string **strings; + size_t strings_len; + size_t strings_cap; +}; + +static bool +serializer_strings_eq(void *opaque, const void *_a, const void *_b) +{ + (void)opaque; + + const struct apfl_string * const *a = _a; + const struct apfl_string * const *b = _b; + + return apfl_string_eq(**a, **b); +} + +static apfl_hash +serializer_strings_hash(void *opaque, const void *_key) +{ + (void)opaque; + + const struct apfl_string * const *key = _key; + struct apfl_string_view sv = apfl_string_view_from(**key); + return apfl_hash_fnv1a(sv.bytes, sv.len); +} + +#define MAXU64 0xFFFFFFFFFFFFFFFF + +static bool +serialize_size(struct apfl_io_writer w, size_t n) +{ + uintmax_t _n = n; + if (_n > MAXU64) { + return false; + } + + return apfl_encode_u64(w, _n); +} + +static bool +unserialize_size(struct apfl_io_reader r, size_t *n) +{ + uint_least64_t _n; + FMT_TRY(apfl_decode_u64(r, &_n)); + if (_n > SIZE_MAX) { + return false; + } + *n = (size_t)_n; + return true; +} + +#define GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg) \ + do { \ + if (i >= ilist->len) { \ + assert(false); \ + return false; \ + } \ + arg = ilist->instructions[++i]; \ + } while (0) + +#define DEF_APPEND_INS_OR_ARG(name, ListType, InsOrArgType) \ + static bool \ + name( \ + ListType *milist, \ + InsOrArgType instruction_or_arg \ + ) { \ + if (milist->len >= milist->cap) { \ + return false; \ + } \ + milist->instructions[milist->len] = instruction_or_arg; \ + milist->len++; \ + \ + return true; \ + } + +#define ABSTRACT_APPEND_INS_OR_ARG(fn, InsOrArgType, ilist, which, arg) \ + fn((ilist), (InsOrArgType) {.which = (arg)}) + +DEF_APPEND_INS_OR_ARG( + append_instruction_or_arg, + struct instruction_list, + union instruction_or_arg +) +DEF_APPEND_INS_OR_ARG( + append_matcher_instruction_or_arg, + struct matcher_instruction_list, + union matcher_instruction_or_arg +) + +#define APPEND_INS_OR_ARG(ilist, which, arg) \ + ABSTRACT_APPEND_INS_OR_ARG( \ + append_instruction_or_arg, \ + union instruction_or_arg, \ + ilist, \ + which, \ + arg \ + ) + +#define APPEND_MATCHER_INS_OR_ARG(ilist, which, arg) \ + ABSTRACT_APPEND_INS_OR_ARG( \ + append_matcher_instruction_or_arg, \ + union matcher_instruction_or_arg, \ + ilist, \ + which, \ + arg \ + ) + +static bool +serialize_string( + struct serializer *serializer, + struct apfl_string *string +) { + if (string == NULL) { + return apfl_encode_u64(serializer->w, 0); + } + + uint_least64_t index; + if (apfl_hashmap_get(&serializer->string_lookup, &string, &index)) { + if (index > (MAXU64>>1)) { + return false; + } + index <<= 1; + index |= 1; + + return apfl_encode_u64(serializer->w, index); + } + + uintmax_t _len = string->len; + if (_len > (MAXU64>>1)-1) { + return false; + } + + FMT_TRY(apfl_encode_u64(serializer->w, (string->len + 1) << 1)); + FMT_TRY(apfl_io_write_string(serializer->w, *string)); + + index = serializer->next_string_index; + serializer->next_string_index++; + + return apfl_hashmap_set(&serializer->string_lookup, &string, &index); +} + +static bool +unserialize_string( + struct unserializer *unserializer, + struct apfl_string **s +) { + uint_least64_t index_or_len; + FMT_TRY(apfl_decode_u64(unserializer->r, &index_or_len)); + + if (index_or_len == 0) { + *s = NULL; + return true; + } + + bool is_index = (index_or_len & 1) == 1; + index_or_len >>= 1; + if (is_index) { + if (index_or_len > SIZE_MAX) { + return false; + } + size_t index = index_or_len; + if (index >= unserializer->strings_len) { + return false; + } + *s = unserializer->strings[index]; + return true; + } else { + index_or_len -= 1; + if (index_or_len > SIZE_MAX) { + return false; + } + size_t len = index_or_len; + unsigned char *buf = NULL; + if (len > 0) { + buf = ALLOC_BYTES(unserializer->gc->allocator, len); + if (buf == NULL) { + return false; + } + if (!apfl_io_read_bytes_exact_size(unserializer->r, buf, len)) { + FREE_BYTES(unserializer->gc->allocator, buf, len); + return false; + } + } + + struct apfl_string tmpstring = { + .bytes = buf, + .len = len, + .cap = len, + }; + + if ((*s = apfl_string_move_into_new_gc_string(unserializer->gc, &tmpstring)) == NULL) { + FREE_BYTES(unserializer->gc->allocator, buf, len); + return false; + } + + if (!apfl_resizable_append( + unserializer->gc->allocator, + sizeof(struct apfl_string *), + (void **)&unserializer->strings, + &unserializer->strings_len, + &unserializer->strings_cap, + &(*s), + 1 + )) { + return false; + } + + return true; + } +} + +static bool +serialize_milist( + struct serializer *serializer, + struct matcher_instruction_list *milist +) { + union matcher_instruction_or_arg arg; + + FMT_TRY(serialize_size(serializer->w, milist->value_count)); + FMT_TRY(serialize_size(serializer->w, milist->capture_count)); + FMT_TRY(serialize_size(serializer->w, milist->len)); + + for (size_t i = 0; i < milist->len; i++) { + enum matcher_instruction insn = milist->instructions[i].instruction; + FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn)); + + switch (matcher_argument_type_for_instruction(insn)) { + case MINSN_ARGS_NONE: + break; + case MINSN_ARGS_INDEX: + GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); + FMT_TRY(serialize_size(serializer->w, arg.index)); + break; + case MINSN_ARGS_NAME: + GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); + FMT_TRY(serialize_string(serializer, arg.string)); + break; + case MINSN_ARGS_NAME_INDEX_LEN: + GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); + FMT_TRY(serialize_string(serializer, arg.string)); + + GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); + FMT_TRY(serialize_size(serializer->w, arg.index)); + + GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg); + FMT_TRY(serialize_size(serializer->w, arg.len)); + break; + } + } + + return true; +} + +static bool +unserialize_milist( + struct unserializer *unserializer, + struct matcher_instruction_list *milist +) { + size_t len; + FMT_TRY(unserialize_size(unserializer->r, &milist->value_count)); + FMT_TRY(unserialize_size(unserializer->r, &milist->capture_count)); + FMT_TRY(unserialize_size(unserializer->r, &len)); + + if ((milist->instructions = ALLOC_LIST( + unserializer->gc->allocator, + union matcher_instruction_or_arg, + len + )) == NULL) { + return false; + } + + milist->cap = len; + milist->len = 0; + + + while (milist->len < len) { + unsigned char b; + FMT_TRY(apfl_io_read_byte(unserializer->r, &b)); + + enum matcher_instruction insn = b; + + if (!valid_matcher_instruction(insn)) { + return false; + } + + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, instruction, insn)); + + switch (matcher_argument_type_for_instruction(insn)) { + case MINSN_ARGS_NONE: + break; + case MINSN_ARGS_INDEX: { + size_t index; + FMT_TRY(unserialize_size(unserializer->r, &index)); + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, index)); + break; + } + case MINSN_ARGS_NAME: { + struct apfl_string *name; + FMT_TRY(unserialize_string(unserializer, &name)); + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name)); + break; + } + case MINSN_ARGS_NAME_INDEX_LEN: { + struct apfl_string *name; + FMT_TRY(unserialize_string(unserializer, &name)); + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name)); + + size_t s; + FMT_TRY(unserialize_size(unserializer->r, &s)); + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, s)); + + FMT_TRY(unserialize_size(unserializer->r, &s)); + FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, len, s)); + + break; + } + } + } + + return true; +} + +static bool +serialize_ilist( + struct serializer *serializer, + struct instruction_list *ilist +) { + union instruction_or_arg arg; + + FMT_TRY(serialize_size(serializer->w, ilist->line)); + FMT_TRY(serialize_string(serializer, ilist->filename)); + FMT_TRY(serialize_size(serializer->w, ilist->len)); + + for (size_t i = 0; i < ilist->len; i++) { + enum instruction insn = ilist->instructions[i].instruction; + FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn)); + + switch (argument_type_for_instruction(insn)) { + case INSN_ARGS_NONE: + break; + case INSN_ARGS_NUMBER: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(apfl_encode_double(serializer->w, arg.number)); + break; + case INSN_ARGS_COUNT: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(serialize_size(serializer->w, arg.count)); + break; + case INSN_ARGS_INDEX: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(serialize_size(serializer->w, arg.index)); + break; + case INSN_ARGS_STRING: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(serialize_string(serializer, arg.string)); + break; + case INSN_ARGS_BODY: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(serialize_ilist(serializer, arg.body)); + break; + case INSN_ARGS_MATCHER: + GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg); + FMT_TRY(serialize_milist(serializer, arg.matcher)); + break; + } + } + + return true; +} + +static bool +set_ilist_nested( + struct unserializer *unserializer, + void *opaque, + struct instruction_list *ilist +) { + (void)unserializer; + + struct instruction_list **dst = opaque; + *dst = ilist; + return true; +} + +static bool +unserialize_ilist( + struct unserializer *unserializer, + bool (*set_ilist)(struct unserializer *, void *, struct instruction_list *ilist), + void *opaque +) { + size_t line; + FMT_TRY(unserialize_size(unserializer->r, &line)); + + struct apfl_string *filename; + FMT_TRY(unserialize_string(unserializer, &filename)); + + size_t tmproots; + if (filename != NULL) { + tmproots = apfl_gc_tmproots_begin(unserializer->gc); + if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(filename, GC_TYPE_STRING))) { + return false; + } + } + + struct instruction_list *ilist = apfl_instructions_new(unserializer->gc, line, filename); + if (ilist == NULL) { + return false; + } + + if (filename != NULL) { + apfl_gc_tmproots_restore(unserializer->gc, tmproots); + } + + FMT_TRY(set_ilist(unserializer, opaque, ilist)); + + size_t len; + FMT_TRY(unserialize_size(unserializer->r, &len)); + + if (len > 0) { + ilist->instructions = ALLOC_LIST(unserializer->gc->allocator, union instruction_or_arg, len); + if (ilist->instructions == NULL) { + return false; + } + } + ilist->cap = len; + + while (ilist->len < len) { + unsigned char b; + FMT_TRY(apfl_io_read_byte(unserializer->r, &b)); + + enum instruction insn = b; + + if (!valid_instruction(insn)) { + return false; + } + + FMT_TRY(APPEND_INS_OR_ARG(ilist, instruction, insn)); + + switch (argument_type_for_instruction(insn)) { + case INSN_ARGS_NONE: + break; + case INSN_ARGS_NUMBER: { + double d; + FMT_TRY(apfl_decode_double(unserializer->r, &d)); + FMT_TRY(APPEND_INS_OR_ARG(ilist, number, d)); + break; + } + case INSN_ARGS_COUNT: { + size_t count; + FMT_TRY(unserialize_size(unserializer->r, &count)); + FMT_TRY(APPEND_INS_OR_ARG(ilist, count, count)); + break; + } + case INSN_ARGS_INDEX: { + size_t index; + FMT_TRY(unserialize_size(unserializer->r, &index)); + FMT_TRY(APPEND_INS_OR_ARG(ilist, index, index)); + break; + } + case INSN_ARGS_STRING: { + struct apfl_string *s; + FMT_TRY(unserialize_string(unserializer, &s)); + FMT_TRY(APPEND_INS_OR_ARG(ilist, string, s)); + break; + } + case INSN_ARGS_BODY: { + if (ilist->len >= ilist->cap) { + return false; + } + struct instruction_list **dst = &ilist->instructions[ilist->len].body; + ilist->len++; + FMT_TRY(unserialize_ilist(unserializer, set_ilist_nested, dst)); + break; + } + case INSN_ARGS_MATCHER: { + if (ilist->len >= ilist->cap) { + return false; + } + struct matcher_instruction_list *matcher = apfl_matcher_instructions_new(unserializer->gc); + if (matcher == NULL) { + return false; + } + ilist->instructions[ilist->len].matcher = matcher; + ilist->len++; + FMT_TRY(unserialize_milist(unserializer, matcher)); + break; + } + } + } + + return true; +} + +static const unsigned char header[] = { '\0', 'a', 'p', 'f', 'l', 'B', BYTECODE_VERSION}; +#define BYTE_ARRAY_SV(b) ((struct apfl_string_view) { .bytes = (b), .len = sizeof(b), }) +#define HEADER_LEN sizeof(header) + +static struct apfl_string_view +header_sv(void) +{ + return BYTE_ARRAY_SV(header); +} + + +static bool +bytecode_serialize_inner( + struct serializer *serializer, + struct instruction_list *ilist +) { + FMT_TRY(apfl_io_write_string(serializer->w, header_sv())); + FMT_TRY(serialize_ilist(serializer, ilist)); + + return true; +} + +bool +apfl_bytecode_serialize( + struct apfl_allocator allocator, + struct apfl_io_writer w, + struct instruction_list *ilist +) { + struct serializer serializer = { + .allocator = allocator, + .w = w, + .next_string_index = 0, + }; + + if (!apfl_hashmap_init( + &serializer.string_lookup, + allocator, + (struct apfl_hashmap_callbacks) { + .opaque = NULL, + .keys_eq = serializer_strings_eq, + .calc_hash = serializer_strings_hash, + }, + sizeof(struct apfl_string *), + sizeof(uint_least64_t) + )) { + return false; + } + + bool out = bytecode_serialize_inner(&serializer, ilist); + + apfl_hashmap_deinit(&serializer.string_lookup); + + return out; +} + +static bool +set_ilist_root( + struct unserializer *unserializer, + void *opaque, + struct instruction_list *ilist +) { + struct instruction_list **dest = opaque; + if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) { + return false; + } + + *dest = ilist; + return true; +} + +static struct instruction_list * +bytecode_unserialize_inner(struct unserializer *unserializer) +{ + unsigned char header_buf[HEADER_LEN]; + FMT_TRY(apfl_io_read_bytes_exact_size(unserializer->r, header_buf, HEADER_LEN)); + + if (!apfl_string_eq(header_sv(), BYTE_ARRAY_SV(header_buf))) { + return NULL; + } + + struct instruction_list *ilist = NULL; + + if (!unserialize_ilist(unserializer, set_ilist_root, &ilist)) { + return NULL; + } + + return ilist; +} + +struct instruction_list * +apfl_bytecode_unserialize( + struct gc *gc, + struct apfl_io_reader r +) { + struct unserializer unserializer = { + .gc = gc, + .r = r, + .strings = NULL, + .strings_len = 0, + .strings_cap = 0, + }; + + size_t tmproots = apfl_gc_tmproots_begin(gc); + struct instruction_list *out = bytecode_unserialize_inner(&unserializer); + apfl_gc_tmproots_restore(gc, tmproots); + + FREE_LIST(gc->allocator, unserializer.strings, unserializer.strings_cap); + + return out; +} diff --git a/src/bytecode.h b/src/bytecode.h index 3694aa7..f29dd50 100644 --- a/src/bytecode.h +++ b/src/bytecode.h @@ -105,6 +105,17 @@ void apfl_matcher_instructions_deinit(struct apfl_allocator, struct matcher_inst bool apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist); bool apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist); +bool apfl_bytecode_serialize( + struct apfl_allocator, + struct apfl_io_writer, + struct instruction_list * +); + +struct instruction_list *apfl_bytecode_unserialize( + struct gc *gc, + struct apfl_io_reader r +); + #ifdef __cplusplus } #endif diff --git a/src/context.c b/src/context.c index 0c68a80..5b7f831 100644 --- a/src/context.c +++ b/src/context.c @@ -2167,3 +2167,45 @@ apfl_load(apfl_ctx ctx, struct apfl_source_reader reader, apfl_stackidx name) } } } + +static void +load_bytecode_inner(apfl_ctx ctx, struct apfl_io_reader r) +{ + struct instruction_list *ilist = apfl_bytecode_unserialize(&ctx->gc, r); + if (ilist == NULL) { + apfl_raise_const_error(ctx, "Failed to load bytecode"); + } + + if (!apfl_gc_tmproot_add(&ctx->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) { + apfl_raise_alloc_error(ctx); + } + + + struct apfl_value *func_value = apfl_stack_push_placeholder(ctx); + if (func_value == NULL) { + apfl_raise_alloc_error(ctx); + } + + if ((func_value->func = apfl_func_new( + &ctx->gc, + 1, + NULL, + ilist->line, + ilist->filename + )) == NULL) { + apfl_drop(ctx, -1); + apfl_raise_alloc_error(ctx); + } + + func_value->type = VALUE_FUNC; + + assert(apfl_func_add_subfunc(func_value->func, ilist, NULL) /* should not fail, func was initialized with cap of 1 */); +} + +void +apfl_load_bytecode(apfl_ctx ctx, struct apfl_io_reader r) +{ + size_t tmproots = apfl_gc_tmproots_begin(&ctx->gc); + load_bytecode_inner(ctx, r); + apfl_gc_tmproots_restore(&ctx->gc, tmproots); +} diff --git a/src/context.h b/src/context.h index fe258e8..94f9b83 100644 --- a/src/context.h +++ b/src/context.h @@ -216,6 +216,8 @@ void apfl_gc_matcher_traverse(struct matcher *, gc_visitor, void *); void apfl_iterative_runner_visit_gc_objects(apfl_iterative_runner, gc_visitor, void *); +void apfl_load_bytecode(apfl_ctx, struct apfl_io_reader); + #ifdef __cplusplus } #endif diff --git a/src/encode.c b/src/encode.c new file mode 100644 index 0000000..75bddb0 --- /dev/null +++ b/src/encode.c @@ -0,0 +1,125 @@ +#include +#include + +#include + +#include "apfl.h" +#include "format.h" + +bool +apfl_encode_u64(struct apfl_io_writer w, uint_least64_t n) +{ + unsigned char buf[8] = { + n & 0xFF, + (n & 0xFF00) >> 8, + (n & 0xFF0000) >> 16, + (n & 0xFF000000) >> 24, + (n & 0xFF00000000) >> 32, + (n & 0xFF0000000000) >> 40, + (n & 0xFF000000000000) >> 48, + (n & 0xFF00000000000000) >> 56, + }; + return apfl_io_write_string_view(w, (struct apfl_string_view) { + .bytes = buf, + .len = 8, + }); +} + +bool +apfl_decode_u64(struct apfl_io_reader r, uint_least64_t *n) +{ + unsigned char buf[8]; + FMT_TRY(apfl_io_read_bytes_exact_size(r, buf, 8)); + *n = ((uint_least64_t)buf[0]) + | ((uint_least64_t)buf[1] << 8) + | ((uint_least64_t)buf[2] << 16) + | ((uint_least64_t)buf[3] << 24) + | ((uint_least64_t)buf[4] << 32) + | ((uint_least64_t)buf[5] << 40) + | ((uint_least64_t)buf[6] << 48) + | ((uint_least64_t)buf[7] << 56); + return true; +} + +static uint_least64_t +double_repr_from_exp_and_frac(uint_least64_t exp, uint_least64_t frac) +{ + return ((exp & 0x7FF) << 52) | (frac & 0xFFFFFFFFFFFFF); +} + + +static uint_least64_t +prepare_unsigned_double_encode(double d) +{ + if (d == 0) { + return 0; + } else if (isinf(d)) { + return double_repr_from_exp_and_frac(0x7FF, 0x0); + } else if (isnan(d)) { + return double_repr_from_exp_and_frac(0x7FF, 0x8000000000000); + } else { + int _exp; + double _frac = frexp(d, &_exp); + + _frac *= 9007199254740992; // 2^53 + uint_least64_t frac = (uint_least64_t)(_frac) & 0xFFFFFFFFFFFFF; + uint_least64_t exp = _exp + 1022; + + return double_repr_from_exp_and_frac(exp, frac); + } +} + +static double +double_from_exp_and_frac(uint_least64_t exp, uint_least64_t frac) +{ + if (exp == 0x7FF) { + if (frac == 0) { + return INFINITY; + } else { + return NAN; + } + } else if (exp == 0) { + if (frac == 0) { + return 0; + } else { + return 0; // TODO: Subnormal numbers + } + } else { + int iexp = (int)exp - 1022; + double dfrac = frac | 0x10000000000000; + dfrac /= 9007199254740992; + return dfrac * pow(2, (double)iexp); + } +} + +bool +apfl_encode_double(struct apfl_io_writer w, double d) +{ + bool negative = copysign(1, d) < 0; + d = fabs(d); + + uint_least64_t out = prepare_unsigned_double_encode(d); + if (negative) { + out |= 0x8000000000000000; + } + + return apfl_encode_u64(w, out); +} + +bool +apfl_decode_double(struct apfl_io_reader r, double *d) +{ + uint_least64_t n; + if (!apfl_decode_u64(r, &n)) { + return false; + } + + double sign = (n & ((uint_least64_t)1<<63)) != 0 ? -1 : 1; + n &= 0x7FFFFFFFFFFFFFFF; + + uint_least64_t exp = n >> 52; + uint_least64_t frac = n & 0xFFFFFFFFFFFFF; + + *d = copysign(double_from_exp_and_frac(exp, frac), sign); + return true; +} diff --git a/src/encode.h b/src/encode.h new file mode 100644 index 0000000..7624309 --- /dev/null +++ b/src/encode.h @@ -0,0 +1,23 @@ +#ifndef APFL_CONTEXT_H +#define APFL_CONTEXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "apfl.h" + +bool apfl_encode_u64(struct apfl_io_writer, uint_least64_t ); +bool apfl_decode_u64(struct apfl_io_reader, uint_least64_t *); + +bool apfl_encode_double(struct apfl_io_writer, double); +bool apfl_decode_double(struct apfl_io_reader, double *); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/encode_test.c b/src/encode_test.c new file mode 100644 index 0000000..ce7c188 --- /dev/null +++ b/src/encode_test.c @@ -0,0 +1,200 @@ +#include +#include + +#include "test.h" + +#include "apfl.h" + +#include "encode.h" + +#define HEXDUMP_FMT "%02X%02X%02X%02X%02X%02X%02X%02X" +#define HEXDUMP_ARGS(b) \ + (unsigned int)((b)[0]), \ + (unsigned int)((b)[1]), \ + (unsigned int)((b)[2]), \ + (unsigned int)((b)[3]), \ + (unsigned int)((b)[4]), \ + (unsigned int)((b)[5]), \ + (unsigned int)((b)[6]), \ + (unsigned int)((b)[7]) + +static void +test_u64(testctx t, void (*fn)(testctx, uint_least64_t, unsigned char[8])) +{ + fn(t, 0, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}); + fn(t, 1, (unsigned char[8]) {0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00}); + fn(t, 42, (unsigned char[8]) {0x2A,0x00,0x00,0x00,0x00,0x00,0x00,0x00}); + fn(t, 666, (unsigned char[8]) {0x9A,0x02,0x00,0x00,0x00,0x00,0x00,0x00}); + fn(t, 0x1122334455667788, (unsigned char[8]) {0x88,0x77,0x66,0x55,0x44,0x33,0x22,0x11}); + fn(t, 0xFFFFFFFFFFFFFFFF, (unsigned char[8]) {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}); +} + +static void u64_encode_callback(testctx t, uint_least64_t n, unsigned char bytes[8]) +{ + struct apfl_allocator allocator = test_allocator(t); + + struct apfl_string_builder sb = apfl_string_builder_init(allocator); + struct apfl_io_writer w = apfl_io_string_writer(&sb); + + if (!apfl_encode_u64(w, n)) { + test_failf(t, "Could not encode u64 0x%" PRIxLEAST64, n); + return; + } + + struct apfl_string have = apfl_string_builder_move_string(&sb); + + if (have.len != 8) { + test_failf(t, "Encoded length is incorrect. Got %d", (int)have.len); + return; + } + + struct apfl_string_view want = { .bytes = bytes, .len = 8, }; + + if (!apfl_string_eq(want, have)) { + test_failf( + t, + "Encoding for 0x%" PRIxLEAST64 " is wrong. have " HEXDUMP_FMT ", want " HEXDUMP_FMT, + n, + HEXDUMP_ARGS(have.bytes), + HEXDUMP_ARGS(bytes) + ); + return; + } + + apfl_string_deinit(allocator, &have); +} + +static void u64_decode_callback(testctx t, uint_least64_t want, unsigned char bytes[8]) +{ + struct apfl_string_view input = { .bytes = bytes, .len = 8, }; + struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(input); + struct apfl_io_reader r = apfl_io_string_reader(&reader_data); + + uint_least64_t have; + + if (!apfl_decode_u64(r, &have)) { + test_failf(t, "Could not decode u64 0x%" PRIxLEAST64, want); + return; + } + + if (want != have) { + test_failf( + t, + "Decoding failed, have 0x%" PRIxLEAST64 ", want 0x%" PRIxLEAST64 ".", + have, + want + ); + return; + } +} + +TEST(encode_u64, t) { + test_u64(t, u64_encode_callback); +} + +TEST(decode_u64, t) { + test_u64(t, u64_decode_callback); +} + +static void +test_double(testctx t, void (*fn)(testctx, double, unsigned char[8])) +{ + fn(t, 0, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}); + fn(t, 1, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x3F}); + fn(t, 42, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x45,0x40}); + fn(t, -2.75, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x06,0xC0}); + fn(t, 1337.42, (unsigned char[8]) {0x48,0xE1,0x7A,0x14,0xAE,0xE5,0x94,0x40}); + fn(t, 1E+50, (unsigned char[8]) {0x9A,0x64,0x7E,0xC5,0x0E,0x1B,0x51,0x4A}); + fn(t, 1E-12, (unsigned char[8]) {0x11,0xEA,0x2D,0x81,0x99,0x97,0x71,0x3D}); + fn(t, NAN, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF8,0x7F}); + fn(t, -NAN, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF8,0xFF}); + fn(t, INFINITY, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F}); + fn(t, -INFINITY, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF}); + +} + +static void +double_encode_callback(testctx t, double d, unsigned char bytes[8]) +{ + struct apfl_allocator allocator = test_allocator(t); + + struct apfl_string_builder sb = apfl_string_builder_init(allocator); + struct apfl_io_writer w = apfl_io_string_writer(&sb); + + if (!apfl_encode_double(w, d)) { + test_failf(t, "Could not encode double %a", d); + return; + } + + struct apfl_string have = apfl_string_builder_move_string(&sb); + + if (have.len != 8) { + test_failf(t, "Encoded length is incorrect. Got %d", (int)have.len); + return; + } + + struct apfl_string_view want = { .bytes = bytes, .len = 8, }; + + if (!apfl_string_eq(want, have)) { + test_failf( + t, + "Encoding for %a is wrong. have " HEXDUMP_FMT ", want " HEXDUMP_FMT, + d, + HEXDUMP_ARGS(have.bytes), + HEXDUMP_ARGS(bytes) + ); + return; + } + + apfl_string_deinit(allocator, &have); +} + +static bool +cmpdouble(double a, double b) +{ + if (isnan(a)) { + return isnan(b) && signbit(a) == signbit(b); + } else { + return a == b; + } +} + +static void +double_decode_callback(testctx t, double want, unsigned char bytes[8]) +{ + struct apfl_string_view input = { .bytes = bytes, .len = 8, }; + struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(input); + struct apfl_io_reader r = apfl_io_string_reader(&reader_data); + + double have; + + if (!apfl_decode_double(r, &have)) { + test_failf(t, "Could not decode double %a", want); + return; + } + + if (!cmpdouble(want, have)) { + test_failf( + t, + "Decoding failed, have %a, want %a.", + have, + want + ); + return; + } +} + +TEST(encode_double, t) { + test_double(t, double_encode_callback); +} + +TEST(decode_double, t) { + test_double(t, double_decode_callback); +} + +TESTS_BEGIN + ADDTEST(encode_u64), + ADDTEST(decode_u64), + ADDTEST(encode_double), + ADDTEST(decode_double), +TESTS_END diff --git a/src/functional-test-runner.c b/src/functional-test-runner.c index c2e3783..93c2b68 100644 --- a/src/functional-test-runner.c +++ b/src/functional-test-runner.c @@ -163,8 +163,9 @@ runtest(const char *filename) .output_writer = apfl_io_string_writer(&output), }); - struct apfl_string_source_reader_data src_data = apfl_string_source_reader_create(parts.script); - apfl_iterative_runner runner = apfl_iterative_runner_new(ctx, apfl_string_source_reader(&src_data)); + struct apfl_io_string_reader_data src_data = apfl_io_string_reader_create(parts.script); + struct apfl_io_reader r = apfl_io_string_reader(&src_data); + apfl_iterative_runner runner = apfl_iterative_runner_new(ctx, apfl_io_reader_as_source_reader(&r)); assert(runner != NULL); while (apfl_iterative_runner_next(runner)) { diff --git a/src/globals.c b/src/globals.c index 13ef4e8..3d2c2ef 100644 --- a/src/globals.c +++ b/src/globals.c @@ -529,7 +529,8 @@ loadfile(apfl_ctx ctx) } apfl_drop(ctx, -2); // drop cstring - apfl_load(ctx, apfl_stdio_source_reader(*fh), -2); + struct apfl_io_reader r = apfl_io_file_reader(*fh); + apfl_load(ctx, apfl_io_reader_as_source_reader(&r), -2); closefile(fh); apfl_drop(ctx, -2); } @@ -541,11 +542,47 @@ loadstring(apfl_ctx ctx) apfl_tostring(ctx, -1); apfl_push_const_string(ctx, "(loadstring)"); - struct apfl_string_source_reader_data reader_data = apfl_string_source_reader_create(apfl_get_string(ctx, -2)); - apfl_load(ctx, apfl_string_source_reader(&reader_data), -1); + struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(apfl_get_string(ctx, -2)); + struct apfl_io_reader r = apfl_io_string_reader(&reader_data); + apfl_load(ctx, apfl_io_reader_as_source_reader(&r), -1); apfl_drop(ctx, -2); } +static void +serialize_bytecode(apfl_ctx ctx) +{ + apfl_get_list_member_by_index(ctx, 0, 0); + struct apfl_value value = apfl_stack_must_get(ctx, -1); + if (value.type == VALUE_CFUNC) { + apfl_raise_const_error(ctx, "-serialize-bytecode needs a apfl function, got a native function instead"); + } else if (value.type != VALUE_FUNC) { + apfl_raise_errorfmt(ctx, "-serialize-bytecode needs a apfl function, got value of type {value:type} instead", value); + } + + apfl_get_list_member_by_index(ctx, 0, 1); + FILE **fh = apfl_get_native_object(ctx, &file_object, -1); + + struct apfl_io_writer w = apfl_io_file_writer(*fh); + + if (!apfl_bytecode_serialize(ctx->gc.allocator, w, value.func->subfunctions[0].body)) { + apfl_raise_const_error(ctx, "Could not serialize function"); + } + + apfl_push_nil(ctx); +} + +static void +unserialize_bytecode(apfl_ctx ctx) +{ + apfl_get_list_member_by_index(ctx, 0, 0); + apfl_drop(ctx, -2); + FILE **fh = apfl_get_native_object(ctx, &file_object, -1); + + struct apfl_io_reader r = apfl_io_file_reader(*fh); + + apfl_load_bytecode(ctx, r); +} + static const struct global_def globals[] = { {"if", impl_if}, {"==", impl_eq}, @@ -575,6 +612,8 @@ static const struct global_def globals[] = { {"fclose", impl_fclose}, {"loadfile", loadfile}, {"loadstring", loadstring}, + {"-serialize-bytecode", serialize_bytecode}, + {"-unserialize-bytecode", unserialize_bytecode}, {NULL, NULL}, }; diff --git a/src/io.c b/src/io.c new file mode 100644 index 0000000..cb4b2cc --- /dev/null +++ b/src/io.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include + +#include "apfl.h" + +static bool +file_read(void *opaque, unsigned char *buf, size_t *len) +{ + FILE *f = opaque; + + size_t maxlen = *len; + *len = fread(buf, 1, maxlen, f); + if (*len == 0) { + return feof(f); + } + + return true; +} + +struct apfl_io_reader +apfl_io_file_reader(FILE *f) +{ + return (struct apfl_io_reader) { + .read = file_read, + .opaque = f, + }; +} + +static bool +string_reader_callback(void *opaque, unsigned char *buf, size_t *len) +{ + struct apfl_io_string_reader_data *ctx = opaque; + + size_t maxlen = *len; + size_t remain_len = ctx->sv.len - ctx->off; + *len = maxlen < remain_len ? maxlen : remain_len; + memcpy(buf, ctx->sv.bytes + ctx->off, *len); + ctx->off += *len; + assert(ctx->off <= ctx->sv.len); + + return true; +} + +struct apfl_io_string_reader_data +apfl_io_string_reader_create(struct apfl_string_view sv) +{ + return (struct apfl_io_string_reader_data) { + .sv = sv, + .off = 0, + }; +} + +struct apfl_io_reader apfl_io_string_reader(struct apfl_io_string_reader_data *data) +{ + return (struct apfl_io_reader) { + .read = string_reader_callback, + .opaque = data, + }; +} + +bool +apfl_io_read_bytes(struct apfl_io_reader r, unsigned char *buf, size_t *len) +{ + return r.read(r.opaque, buf, len); +} + +bool +apfl_io_read_bytes_exact_size(struct apfl_io_reader r, unsigned char *buf, size_t want) +{ + while (want > 0) { + size_t have = want; + if (!apfl_io_read_bytes(r, buf, &have)) { + return false; + } + if (have == 0) { + return false; + } + want -= have; + buf += have; + } + + return true; +} + +bool +apfl_io_read_byte(struct apfl_io_reader r, unsigned char *byte) +{ + return apfl_io_read_bytes_exact_size(r, byte, 1); +} diff --git a/src/parser_test.c b/src/parser_test.c index b630e73..71ed6ee 100644 --- a/src/parser_test.c +++ b/src/parser_test.c @@ -9,7 +9,8 @@ struct parser_test { testctx t; struct apfl_allocator allocator; - struct apfl_string_source_reader_data string_source_reader; + struct apfl_io_string_reader_data string_reader; + struct apfl_io_reader reader; apfl_tokenizer_ptr tokenizer; apfl_parser_ptr parser; }; @@ -23,11 +24,12 @@ new_parser_test(testctx t, const char *source) pt->t = t; pt->allocator = allocator; - pt->string_source_reader = apfl_string_source_reader_create(apfl_string_view_from(source)); + pt->string_reader = apfl_io_string_reader_create(apfl_string_view_from(source)); + pt->reader = apfl_io_string_reader(&pt->string_reader); if ((pt->tokenizer = apfl_tokenizer_new( allocator, - apfl_string_source_reader(&pt->string_source_reader) + apfl_io_reader_as_source_reader(&pt->reader) )) == NULL) { test_fatalf(t, "Failed initializing the tokenizer"); } diff --git a/src/source_readers.c b/src/source_readers.c index 9515bc8..052cfd7 100644 --- a/src/source_readers.c +++ b/src/source_readers.c @@ -4,60 +4,18 @@ #include "apfl.h" static bool -string_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need) +io_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need) { (void)need; - - struct apfl_string_source_reader_data *ctx = opaque; - - size_t maxlen = *len; - size_t remain_len = ctx->sv.len - ctx->off; - *len = maxlen < remain_len ? maxlen : remain_len; - memcpy(buf, ctx->sv.bytes + ctx->off, *len); - ctx->off += *len; - assert(ctx->off <= ctx->sv.len); - - return true; -} - -struct apfl_string_source_reader_data -apfl_string_source_reader_create(struct apfl_string_view sv) -{ - return (struct apfl_string_source_reader_data) { - .sv = sv, - .off = 0, - }; -} - -struct apfl_source_reader apfl_string_source_reader(struct apfl_string_source_reader_data *data) -{ - return (struct apfl_source_reader) { - .callback = string_reader_callback, - .opaque = data, - }; -} - -static bool -stdio_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need) -{ - (void)need; - - FILE *f = opaque; - - size_t maxlen = *len; - *len = fread(buf, 1, maxlen, f); - if (*len == 0) { - return feof(f); - } - - return true; + struct apfl_io_reader *r = opaque; + return apfl_io_read_bytes(*r, buf, len); } struct apfl_source_reader -apfl_stdio_source_reader(FILE *f) +apfl_io_reader_as_source_reader(struct apfl_io_reader *r) { return (struct apfl_source_reader) { - .callback = stdio_reader_callback, - .opaque = f, + .callback = io_reader_callback, + .opaque = r, }; } diff --git a/src/tokenizer_test.c b/src/tokenizer_test.c index 9b62aa1..279e107 100644 --- a/src/tokenizer_test.c +++ b/src/tokenizer_test.c @@ -9,7 +9,8 @@ struct tokenizer_test { testctx t; struct apfl_allocator allocator; apfl_tokenizer_ptr tokenizer; - struct apfl_string_source_reader_data string_source_reader; + struct apfl_io_string_reader_data string_reader; + struct apfl_io_reader reader; }; static struct tokenizer_test * @@ -21,12 +22,13 @@ new_tokenizer_test_sv(testctx t, struct apfl_string_view text) *tt = (struct tokenizer_test) { .t = t, .allocator = allocator, - .string_source_reader = apfl_string_source_reader_create(text), }; + tt->string_reader = apfl_io_string_reader_create(text); + tt->reader = apfl_io_string_reader(&tt->string_reader); if ((tt->tokenizer = apfl_tokenizer_new( allocator, - apfl_string_source_reader(&tt->string_source_reader) + apfl_io_reader_as_source_reader(&tt->reader) )) == NULL) { test_fatalf(t, "Failed to initialize the tokenizer"); }