Implement bytecode (de)serialization

This commit is contained in:
Laria 2023-03-05 17:02:42 +01:00
parent 86e148554d
commit 2666c0f148
15 changed files with 1390 additions and 162 deletions

View file

@ -8,6 +8,7 @@ add_library(apfl
alloc.c
bytecode.c
compile.c
encode.c
context.c
error.c
eval.c
@ -15,6 +16,7 @@ add_library(apfl
format.c
gc.c
hashmap.c
io.c
globals.c
messages.c
parser.c
@ -28,6 +30,8 @@ add_library(apfl
value.c
)
target_link_libraries(apfl PUBLIC m)
add_executable(apfl-bin main.c)
target_link_libraries(apfl-bin PUBLIC apfl)
@ -54,6 +58,7 @@ unittest(resizable_test "resizable.h")
unittest(hashmap_test "hashmap.h")
unittest(strings_test "")
unittest(alloc_test "")
unittest(encode_test "encode.h")
function(functionaltest name)
add_test(NAME "functionaltest_${name}" COMMAND functional-test-runner ${CMAKE_SOURCE_DIR}/src/functional-tests/${name}.at)

View file

@ -141,6 +141,39 @@ bool apfl_format_put_indent(struct apfl_io_writer, unsigned);
bool apfl_format_put_number(struct apfl_io_writer, apfl_number);
bool apfl_format_put_poiner(struct apfl_io_writer, void *);
struct apfl_io_reader {
/* buf points to a buffer to fill that has a size of *len.
* The callback must set len to the number of read bytes and return true
* on success and false on failure.
*
* Setting len to 0 indicates and end of file.
*/
bool (*read)(void *, unsigned char *buf, size_t *len);
void *opaque;
};
struct apfl_io_reader apfl_io_file_reader(FILE *f);
/* apfl_io_string_reader_* implements an apfl_io_reader that reads data from a
* string view.
*/
struct apfl_io_string_reader_data {
struct apfl_string_view sv;
size_t off;
};
struct apfl_io_string_reader_data apfl_io_string_reader_create(struct apfl_string_view);
/* Creates a reader for apfl_io_string_reader_data.
* The pointed to apfl_io_string_reader_data and the underlying string view
* must be alive while the reader is in use.
*/
struct apfl_io_reader apfl_io_string_reader(struct apfl_io_string_reader_data *);
bool apfl_io_read_bytes(struct apfl_io_reader, unsigned char *buf, size_t *len);
bool apfl_io_read_bytes_exact_size(struct apfl_io_reader, unsigned char *buf, size_t len);
bool apfl_io_read_byte(struct apfl_io_reader, unsigned char *byte);
// Tokens
enum apfl_token_type {
@ -543,23 +576,7 @@ struct apfl_token apfl_tokenizer_get_token(apfl_tokenizer_ptr);
*/
struct apfl_error apfl_tokenizer_get_error(apfl_tokenizer_ptr);
/* apfl_string_source_reader_* implements an apfl_source_reader that reads
* source code from a string view.
*/
struct apfl_string_source_reader_data {
struct apfl_string_view sv;
size_t off;
};
struct apfl_string_source_reader_data apfl_string_source_reader_create(struct apfl_string_view);
/* Creates a source reader for apfl_string_source_reader_data.
* The pointed to apfl_string_source_reader_data and the underlying string view
* must be alive while the reader is in use.
*/
struct apfl_source_reader apfl_string_source_reader(struct apfl_string_source_reader_data *);
struct apfl_source_reader apfl_stdio_source_reader(FILE *f);
struct apfl_source_reader apfl_io_reader_as_source_reader(struct apfl_io_reader *);
struct apfl_parser_token_source {
enum apfl_parse_result (*next)(void *, bool need);

View file

@ -4,8 +4,165 @@
#include "alloc.h"
#include "bytecode.h"
#include "encode.h"
#include "format.h"
#include "gc.h"
#include "hashmap.h"
#include "resizable.h"
#include "strings.h"
#define BYTECODE_VERSION 0
enum instruction_argument_type {
INSN_ARGS_NONE,
INSN_ARGS_NUMBER,
INSN_ARGS_COUNT,
INSN_ARGS_INDEX,
INSN_ARGS_STRING,
INSN_ARGS_BODY,
INSN_ARGS_MATCHER,
};
enum matcher_instruction_argument_type {
MINSN_ARGS_NONE,
MINSN_ARGS_INDEX,
MINSN_ARGS_NAME,
MINSN_ARGS_NAME_INDEX_LEN,
};
static enum instruction_argument_type
argument_type_for_instruction(enum instruction insn)
{
switch (insn) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
return INSN_ARGS_NONE;
case INSN_NUMBER:
return INSN_ARGS_NUMBER;
case INSN_LIST:
case INSN_SET_LINE:
case INSN_FUNC:
return INSN_ARGS_COUNT;
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
return INSN_ARGS_INDEX;
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
return INSN_ARGS_STRING;
case INSN_FUNC_ADD_SUBFUNC:
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
return INSN_ARGS_BODY;
case INSN_MATCHER_PUSH:
return INSN_ARGS_MATCHER;
}
assert(false);
return INSN_ARGS_NONE;
}
static enum matcher_instruction_argument_type
matcher_argument_type_for_instruction(enum matcher_instruction insn)
{
switch (insn) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
return MINSN_ARGS_NONE;
case MATCHER_CHECK_CONST: // with index as values index
case MATCHER_CHECK_PRED: // with index as values index
return MINSN_ARGS_INDEX;
case MATCHER_CAPTURE_TO_VAR: // with name
case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
return MINSN_ARGS_NAME;
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
return MINSN_ARGS_NAME_INDEX_LEN;
}
assert(false);
return MINSN_ARGS_NONE;
}
static bool
valid_matcher_instruction(enum matcher_instruction insn)
{
switch (insn) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
case MATCHER_CHECK_CONST:
case MATCHER_CHECK_PRED:
case MATCHER_CAPTURE_TO_VAR:
case MATCHER_CAPTURE_TO_VAR_LOCAL:
case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
return true;
}
return false;
}
static bool
valid_instruction(enum instruction insn)
{
switch (insn) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
case INSN_NUMBER:
case INSN_LIST:
case INSN_SET_LINE:
case INSN_FUNC:
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
case INSN_FUNC_ADD_SUBFUNC:
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
case INSN_MATCHER_PUSH:
return true;
}
return false;
}
struct instruction_list *
apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename)
@ -48,46 +205,23 @@ apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, voi
}
for (size_t i = 0; i < ilist->len; i++) {
switch (ilist->instructions[i].instruction) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
case INSN_ARGS_NONE:
break;
case INSN_NUMBER:
case INSN_LIST:
case INSN_SET_LINE:
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
case INSN_FUNC:
case INSN_ARGS_NUMBER:
case INSN_ARGS_COUNT:
case INSN_ARGS_INDEX:
i++;
break;
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
case INSN_ARGS_STRING:
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
break;
case INSN_FUNC_ADD_SUBFUNC:
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
case INSN_ARGS_BODY:
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS));
break;
case INSN_MATCHER_PUSH:
case INSN_ARGS_MATCHER:
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS));
break;
@ -101,24 +235,17 @@ apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, g
union matcher_instruction_or_arg arg;
for (size_t i = 0; i < milist->len; i++) {
switch (milist->instructions[i].instruction) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
case MINSN_ARGS_NONE:
break;
case MATCHER_CHECK_CONST: // with index as values index
case MATCHER_CHECK_PRED: // with index as values index
case MINSN_ARGS_INDEX:
i++;
break;
case MATCHER_CAPTURE_TO_VAR: // with name
case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
case MINSN_ARGS_NAME:
GET_ARGUMENT(milist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
break;
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
case MINSN_ARGS_NAME_INDEX_LEN:
GET_ARGUMENT(milist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
i++;
@ -269,21 +396,15 @@ apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matc
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction)));
switch (milist->instructions[i].instruction) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
case MINSN_ARGS_NONE:
break;
case MATCHER_CAPTURE_TO_VAR:
case MATCHER_CAPTURE_TO_VAR_LOCAL:
case MINSN_ARGS_NAME:
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_io_write_string(w, *arg.string));
break;
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with string, index and len
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with string, index and len
case MINSN_ARGS_NAME_INDEX_LEN:
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_io_write_string(w, *arg.string));
@ -294,8 +415,7 @@ apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matc
FMT_TRY(apfl_io_write_string(w, ", "));
FMT_TRY(apfl_format_put_int(w, (int)arg.len));
break;
case MATCHER_CHECK_CONST:
case MATCHER_CHECK_PRED:
case MINSN_ARGS_INDEX:
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
@ -317,60 +437,37 @@ apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction)));
switch (ilist->instructions[i].instruction) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
case INSN_ARGS_NONE:
break;
case INSN_NUMBER:
case INSN_ARGS_NUMBER:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_format_put_number(w, arg.number));
break;
case INSN_LIST:
case INSN_SET_LINE:
case INSN_FUNC:
case INSN_ARGS_COUNT:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.count));
break;
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
case INSN_ARGS_INDEX:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
break;
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
case INSN_ARGS_STRING:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " "));
FMT_TRY(apfl_io_write_string(w, *arg.string));
break;
case INSN_FUNC_ADD_SUBFUNC:
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
case INSN_ARGS_BODY:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " ilist{\n"));
FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body));
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_io_write_string(w, "}"));
break;
case INSN_MATCHER_PUSH:
case INSN_ARGS_MATCHER:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_io_write_string(w, " milist{\n"));
FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher));
@ -384,3 +481,616 @@ apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_
return true;
}
struct serializer {
struct apfl_allocator allocator;
struct apfl_io_writer w;
struct apfl_hashmap string_lookup;
size_t next_string_index;
};
struct unserializer {
struct gc *gc;
struct apfl_io_reader r;
struct apfl_string **strings;
size_t strings_len;
size_t strings_cap;
};
static bool
serializer_strings_eq(void *opaque, const void *_a, const void *_b)
{
(void)opaque;
const struct apfl_string * const *a = _a;
const struct apfl_string * const *b = _b;
return apfl_string_eq(**a, **b);
}
static apfl_hash
serializer_strings_hash(void *opaque, const void *_key)
{
(void)opaque;
const struct apfl_string * const *key = _key;
struct apfl_string_view sv = apfl_string_view_from(**key);
return apfl_hash_fnv1a(sv.bytes, sv.len);
}
#define MAXU64 0xFFFFFFFFFFFFFFFF
static bool
serialize_size(struct apfl_io_writer w, size_t n)
{
uintmax_t _n = n;
if (_n > MAXU64) {
return false;
}
return apfl_encode_u64(w, _n);
}
static bool
unserialize_size(struct apfl_io_reader r, size_t *n)
{
uint_least64_t _n;
FMT_TRY(apfl_decode_u64(r, &_n));
if (_n > SIZE_MAX) {
return false;
}
*n = (size_t)_n;
return true;
}
#define GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg) \
do { \
if (i >= ilist->len) { \
assert(false); \
return false; \
} \
arg = ilist->instructions[++i]; \
} while (0)
#define DEF_APPEND_INS_OR_ARG(name, ListType, InsOrArgType) \
static bool \
name( \
ListType *milist, \
InsOrArgType instruction_or_arg \
) { \
if (milist->len >= milist->cap) { \
return false; \
} \
milist->instructions[milist->len] = instruction_or_arg; \
milist->len++; \
\
return true; \
}
#define ABSTRACT_APPEND_INS_OR_ARG(fn, InsOrArgType, ilist, which, arg) \
fn((ilist), (InsOrArgType) {.which = (arg)})
DEF_APPEND_INS_OR_ARG(
append_instruction_or_arg,
struct instruction_list,
union instruction_or_arg
)
DEF_APPEND_INS_OR_ARG(
append_matcher_instruction_or_arg,
struct matcher_instruction_list,
union matcher_instruction_or_arg
)
#define APPEND_INS_OR_ARG(ilist, which, arg) \
ABSTRACT_APPEND_INS_OR_ARG( \
append_instruction_or_arg, \
union instruction_or_arg, \
ilist, \
which, \
arg \
)
#define APPEND_MATCHER_INS_OR_ARG(ilist, which, arg) \
ABSTRACT_APPEND_INS_OR_ARG( \
append_matcher_instruction_or_arg, \
union matcher_instruction_or_arg, \
ilist, \
which, \
arg \
)
static bool
serialize_string(
struct serializer *serializer,
struct apfl_string *string
) {
if (string == NULL) {
return apfl_encode_u64(serializer->w, 0);
}
uint_least64_t index;
if (apfl_hashmap_get(&serializer->string_lookup, &string, &index)) {
if (index > (MAXU64>>1)) {
return false;
}
index <<= 1;
index |= 1;
return apfl_encode_u64(serializer->w, index);
}
uintmax_t _len = string->len;
if (_len > (MAXU64>>1)-1) {
return false;
}
FMT_TRY(apfl_encode_u64(serializer->w, (string->len + 1) << 1));
FMT_TRY(apfl_io_write_string(serializer->w, *string));
index = serializer->next_string_index;
serializer->next_string_index++;
return apfl_hashmap_set(&serializer->string_lookup, &string, &index);
}
static bool
unserialize_string(
struct unserializer *unserializer,
struct apfl_string **s
) {
uint_least64_t index_or_len;
FMT_TRY(apfl_decode_u64(unserializer->r, &index_or_len));
if (index_or_len == 0) {
*s = NULL;
return true;
}
bool is_index = (index_or_len & 1) == 1;
index_or_len >>= 1;
if (is_index) {
if (index_or_len > SIZE_MAX) {
return false;
}
size_t index = index_or_len;
if (index >= unserializer->strings_len) {
return false;
}
*s = unserializer->strings[index];
return true;
} else {
index_or_len -= 1;
if (index_or_len > SIZE_MAX) {
return false;
}
size_t len = index_or_len;
unsigned char *buf = NULL;
if (len > 0) {
buf = ALLOC_BYTES(unserializer->gc->allocator, len);
if (buf == NULL) {
return false;
}
if (!apfl_io_read_bytes_exact_size(unserializer->r, buf, len)) {
FREE_BYTES(unserializer->gc->allocator, buf, len);
return false;
}
}
struct apfl_string tmpstring = {
.bytes = buf,
.len = len,
.cap = len,
};
if ((*s = apfl_string_move_into_new_gc_string(unserializer->gc, &tmpstring)) == NULL) {
FREE_BYTES(unserializer->gc->allocator, buf, len);
return false;
}
if (!apfl_resizable_append(
unserializer->gc->allocator,
sizeof(struct apfl_string *),
(void **)&unserializer->strings,
&unserializer->strings_len,
&unserializer->strings_cap,
&(*s),
1
)) {
return false;
}
return true;
}
}
static bool
serialize_milist(
struct serializer *serializer,
struct matcher_instruction_list *milist
) {
union matcher_instruction_or_arg arg;
FMT_TRY(serialize_size(serializer->w, milist->value_count));
FMT_TRY(serialize_size(serializer->w, milist->capture_count));
FMT_TRY(serialize_size(serializer->w, milist->len));
for (size_t i = 0; i < milist->len; i++) {
enum matcher_instruction insn = milist->instructions[i].instruction;
FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));
switch (matcher_argument_type_for_instruction(insn)) {
case MINSN_ARGS_NONE:
break;
case MINSN_ARGS_INDEX:
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
FMT_TRY(serialize_size(serializer->w, arg.index));
break;
case MINSN_ARGS_NAME:
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
FMT_TRY(serialize_string(serializer, arg.string));
break;
case MINSN_ARGS_NAME_INDEX_LEN:
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
FMT_TRY(serialize_string(serializer, arg.string));
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
FMT_TRY(serialize_size(serializer->w, arg.index));
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
FMT_TRY(serialize_size(serializer->w, arg.len));
break;
}
}
return true;
}
static bool
unserialize_milist(
struct unserializer *unserializer,
struct matcher_instruction_list *milist
) {
size_t len;
FMT_TRY(unserialize_size(unserializer->r, &milist->value_count));
FMT_TRY(unserialize_size(unserializer->r, &milist->capture_count));
FMT_TRY(unserialize_size(unserializer->r, &len));
if ((milist->instructions = ALLOC_LIST(
unserializer->gc->allocator,
union matcher_instruction_or_arg,
len
)) == NULL) {
return false;
}
milist->cap = len;
milist->len = 0;
while (milist->len < len) {
unsigned char b;
FMT_TRY(apfl_io_read_byte(unserializer->r, &b));
enum matcher_instruction insn = b;
if (!valid_matcher_instruction(insn)) {
return false;
}
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, instruction, insn));
switch (matcher_argument_type_for_instruction(insn)) {
case MINSN_ARGS_NONE:
break;
case MINSN_ARGS_INDEX: {
size_t index;
FMT_TRY(unserialize_size(unserializer->r, &index));
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, index));
break;
}
case MINSN_ARGS_NAME: {
struct apfl_string *name;
FMT_TRY(unserialize_string(unserializer, &name));
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));
break;
}
case MINSN_ARGS_NAME_INDEX_LEN: {
struct apfl_string *name;
FMT_TRY(unserialize_string(unserializer, &name));
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));
size_t s;
FMT_TRY(unserialize_size(unserializer->r, &s));
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, s));
FMT_TRY(unserialize_size(unserializer->r, &s));
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, len, s));
break;
}
}
}
return true;
}
static bool
serialize_ilist(
struct serializer *serializer,
struct instruction_list *ilist
) {
union instruction_or_arg arg;
FMT_TRY(serialize_size(serializer->w, ilist->line));
FMT_TRY(serialize_string(serializer, ilist->filename));
FMT_TRY(serialize_size(serializer->w, ilist->len));
for (size_t i = 0; i < ilist->len; i++) {
enum instruction insn = ilist->instructions[i].instruction;
FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));
switch (argument_type_for_instruction(insn)) {
case INSN_ARGS_NONE:
break;
case INSN_ARGS_NUMBER:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(apfl_encode_double(serializer->w, arg.number));
break;
case INSN_ARGS_COUNT:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(serialize_size(serializer->w, arg.count));
break;
case INSN_ARGS_INDEX:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(serialize_size(serializer->w, arg.index));
break;
case INSN_ARGS_STRING:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(serialize_string(serializer, arg.string));
break;
case INSN_ARGS_BODY:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(serialize_ilist(serializer, arg.body));
break;
case INSN_ARGS_MATCHER:
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
FMT_TRY(serialize_milist(serializer, arg.matcher));
break;
}
}
return true;
}
static bool
set_ilist_nested(
struct unserializer *unserializer,
void *opaque,
struct instruction_list *ilist
) {
(void)unserializer;
struct instruction_list **dst = opaque;
*dst = ilist;
return true;
}
static bool
unserialize_ilist(
struct unserializer *unserializer,
bool (*set_ilist)(struct unserializer *, void *, struct instruction_list *ilist),
void *opaque
) {
size_t line;
FMT_TRY(unserialize_size(unserializer->r, &line));
struct apfl_string *filename;
FMT_TRY(unserialize_string(unserializer, &filename));
size_t tmproots;
if (filename != NULL) {
tmproots = apfl_gc_tmproots_begin(unserializer->gc);
if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(filename, GC_TYPE_STRING))) {
return false;
}
}
struct instruction_list *ilist = apfl_instructions_new(unserializer->gc, line, filename);
if (ilist == NULL) {
return false;
}
if (filename != NULL) {
apfl_gc_tmproots_restore(unserializer->gc, tmproots);
}
FMT_TRY(set_ilist(unserializer, opaque, ilist));
size_t len;
FMT_TRY(unserialize_size(unserializer->r, &len));
if (len > 0) {
ilist->instructions = ALLOC_LIST(unserializer->gc->allocator, union instruction_or_arg, len);
if (ilist->instructions == NULL) {
return false;
}
}
ilist->cap = len;
while (ilist->len < len) {
unsigned char b;
FMT_TRY(apfl_io_read_byte(unserializer->r, &b));
enum instruction insn = b;
if (!valid_instruction(insn)) {
return false;
}
FMT_TRY(APPEND_INS_OR_ARG(ilist, instruction, insn));
switch (argument_type_for_instruction(insn)) {
case INSN_ARGS_NONE:
break;
case INSN_ARGS_NUMBER: {
double d;
FMT_TRY(apfl_decode_double(unserializer->r, &d));
FMT_TRY(APPEND_INS_OR_ARG(ilist, number, d));
break;
}
case INSN_ARGS_COUNT: {
size_t count;
FMT_TRY(unserialize_size(unserializer->r, &count));
FMT_TRY(APPEND_INS_OR_ARG(ilist, count, count));
break;
}
case INSN_ARGS_INDEX: {
size_t index;
FMT_TRY(unserialize_size(unserializer->r, &index));
FMT_TRY(APPEND_INS_OR_ARG(ilist, index, index));
break;
}
case INSN_ARGS_STRING: {
struct apfl_string *s;
FMT_TRY(unserialize_string(unserializer, &s));
FMT_TRY(APPEND_INS_OR_ARG(ilist, string, s));
break;
}
case INSN_ARGS_BODY: {
if (ilist->len >= ilist->cap) {
return false;
}
struct instruction_list **dst = &ilist->instructions[ilist->len].body;
ilist->len++;
FMT_TRY(unserialize_ilist(unserializer, set_ilist_nested, dst));
break;
}
case INSN_ARGS_MATCHER: {
if (ilist->len >= ilist->cap) {
return false;
}
struct matcher_instruction_list *matcher = apfl_matcher_instructions_new(unserializer->gc);
if (matcher == NULL) {
return false;
}
ilist->instructions[ilist->len].matcher = matcher;
ilist->len++;
FMT_TRY(unserialize_milist(unserializer, matcher));
break;
}
}
}
return true;
}
static const unsigned char header[] = { '\0', 'a', 'p', 'f', 'l', 'B', BYTECODE_VERSION};
#define BYTE_ARRAY_SV(b) ((struct apfl_string_view) { .bytes = (b), .len = sizeof(b), })
#define HEADER_LEN sizeof(header)
static struct apfl_string_view
header_sv(void)
{
return BYTE_ARRAY_SV(header);
}
static bool
bytecode_serialize_inner(
struct serializer *serializer,
struct instruction_list *ilist
) {
FMT_TRY(apfl_io_write_string(serializer->w, header_sv()));
FMT_TRY(serialize_ilist(serializer, ilist));
return true;
}
bool
apfl_bytecode_serialize(
struct apfl_allocator allocator,
struct apfl_io_writer w,
struct instruction_list *ilist
) {
struct serializer serializer = {
.allocator = allocator,
.w = w,
.next_string_index = 0,
};
if (!apfl_hashmap_init(
&serializer.string_lookup,
allocator,
(struct apfl_hashmap_callbacks) {
.opaque = NULL,
.keys_eq = serializer_strings_eq,
.calc_hash = serializer_strings_hash,
},
sizeof(struct apfl_string *),
sizeof(uint_least64_t)
)) {
return false;
}
bool out = bytecode_serialize_inner(&serializer, ilist);
apfl_hashmap_deinit(&serializer.string_lookup);
return out;
}
static bool
set_ilist_root(
struct unserializer *unserializer,
void *opaque,
struct instruction_list *ilist
) {
struct instruction_list **dest = opaque;
if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) {
return false;
}
*dest = ilist;
return true;
}
static struct instruction_list *
bytecode_unserialize_inner(struct unserializer *unserializer)
{
unsigned char header_buf[HEADER_LEN];
FMT_TRY(apfl_io_read_bytes_exact_size(unserializer->r, header_buf, HEADER_LEN));
if (!apfl_string_eq(header_sv(), BYTE_ARRAY_SV(header_buf))) {
return NULL;
}
struct instruction_list *ilist = NULL;
if (!unserialize_ilist(unserializer, set_ilist_root, &ilist)) {
return NULL;
}
return ilist;
}
struct instruction_list *
apfl_bytecode_unserialize(
struct gc *gc,
struct apfl_io_reader r
) {
struct unserializer unserializer = {
.gc = gc,
.r = r,
.strings = NULL,
.strings_len = 0,
.strings_cap = 0,
};
size_t tmproots = apfl_gc_tmproots_begin(gc);
struct instruction_list *out = bytecode_unserialize_inner(&unserializer);
apfl_gc_tmproots_restore(gc, tmproots);
FREE_LIST(gc->allocator, unserializer.strings, unserializer.strings_cap);
return out;
}

View file

@ -105,6 +105,17 @@ void apfl_matcher_instructions_deinit(struct apfl_allocator, struct matcher_inst
bool apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist);
bool apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist);
bool apfl_bytecode_serialize(
struct apfl_allocator,
struct apfl_io_writer,
struct instruction_list *
);
struct instruction_list *apfl_bytecode_unserialize(
struct gc *gc,
struct apfl_io_reader r
);
#ifdef __cplusplus
}
#endif

View file

@ -2167,3 +2167,45 @@ apfl_load(apfl_ctx ctx, struct apfl_source_reader reader, apfl_stackidx name)
}
}
}
static void
load_bytecode_inner(apfl_ctx ctx, struct apfl_io_reader r)
{
struct instruction_list *ilist = apfl_bytecode_unserialize(&ctx->gc, r);
if (ilist == NULL) {
apfl_raise_const_error(ctx, "Failed to load bytecode");
}
if (!apfl_gc_tmproot_add(&ctx->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) {
apfl_raise_alloc_error(ctx);
}
struct apfl_value *func_value = apfl_stack_push_placeholder(ctx);
if (func_value == NULL) {
apfl_raise_alloc_error(ctx);
}
if ((func_value->func = apfl_func_new(
&ctx->gc,
1,
NULL,
ilist->line,
ilist->filename
)) == NULL) {
apfl_drop(ctx, -1);
apfl_raise_alloc_error(ctx);
}
func_value->type = VALUE_FUNC;
assert(apfl_func_add_subfunc(func_value->func, ilist, NULL) /* should not fail, func was initialized with cap of 1 */);
}
void
apfl_load_bytecode(apfl_ctx ctx, struct apfl_io_reader r)
{
size_t tmproots = apfl_gc_tmproots_begin(&ctx->gc);
load_bytecode_inner(ctx, r);
apfl_gc_tmproots_restore(&ctx->gc, tmproots);
}

View file

@ -216,6 +216,8 @@ void apfl_gc_matcher_traverse(struct matcher *, gc_visitor, void *);
void apfl_iterative_runner_visit_gc_objects(apfl_iterative_runner, gc_visitor, void *);
void apfl_load_bytecode(apfl_ctx, struct apfl_io_reader);
#ifdef __cplusplus
}
#endif

125
src/encode.c Normal file
View file

@ -0,0 +1,125 @@
#include <stdbool.h>
#include <math.h>
#include <stdint.h>
#include "apfl.h"
#include "format.h"
bool
apfl_encode_u64(struct apfl_io_writer w, uint_least64_t n)
{
unsigned char buf[8] = {
n & 0xFF,
(n & 0xFF00) >> 8,
(n & 0xFF0000) >> 16,
(n & 0xFF000000) >> 24,
(n & 0xFF00000000) >> 32,
(n & 0xFF0000000000) >> 40,
(n & 0xFF000000000000) >> 48,
(n & 0xFF00000000000000) >> 56,
};
return apfl_io_write_string_view(w, (struct apfl_string_view) {
.bytes = buf,
.len = 8,
});
}
bool
apfl_decode_u64(struct apfl_io_reader r, uint_least64_t *n)
{
unsigned char buf[8];
FMT_TRY(apfl_io_read_bytes_exact_size(r, buf, 8));
*n = ((uint_least64_t)buf[0])
| ((uint_least64_t)buf[1] << 8)
| ((uint_least64_t)buf[2] << 16)
| ((uint_least64_t)buf[3] << 24)
| ((uint_least64_t)buf[4] << 32)
| ((uint_least64_t)buf[5] << 40)
| ((uint_least64_t)buf[6] << 48)
| ((uint_least64_t)buf[7] << 56);
return true;
}
static uint_least64_t
double_repr_from_exp_and_frac(uint_least64_t exp, uint_least64_t frac)
{
return ((exp & 0x7FF) << 52) | (frac & 0xFFFFFFFFFFFFF);
}
static uint_least64_t
prepare_unsigned_double_encode(double d)
{
if (d == 0) {
return 0;
} else if (isinf(d)) {
return double_repr_from_exp_and_frac(0x7FF, 0x0);
} else if (isnan(d)) {
return double_repr_from_exp_and_frac(0x7FF, 0x8000000000000);
} else {
int _exp;
double _frac = frexp(d, &_exp);
_frac *= 9007199254740992; // 2^53
uint_least64_t frac = (uint_least64_t)(_frac) & 0xFFFFFFFFFFFFF;
uint_least64_t exp = _exp + 1022;
return double_repr_from_exp_and_frac(exp, frac);
}
}
static double
double_from_exp_and_frac(uint_least64_t exp, uint_least64_t frac)
{
if (exp == 0x7FF) {
if (frac == 0) {
return INFINITY;
} else {
return NAN;
}
} else if (exp == 0) {
if (frac == 0) {
return 0;
} else {
return 0; // TODO: Subnormal numbers
}
} else {
int iexp = (int)exp - 1022;
double dfrac = frac | 0x10000000000000;
dfrac /= 9007199254740992;
return dfrac * pow(2, (double)iexp);
}
}
bool
apfl_encode_double(struct apfl_io_writer w, double d)
{
bool negative = copysign(1, d) < 0;
d = fabs(d);
uint_least64_t out = prepare_unsigned_double_encode(d);
if (negative) {
out |= 0x8000000000000000;
}
return apfl_encode_u64(w, out);
}
bool
apfl_decode_double(struct apfl_io_reader r, double *d)
{
uint_least64_t n;
if (!apfl_decode_u64(r, &n)) {
return false;
}
double sign = (n & ((uint_least64_t)1<<63)) != 0 ? -1 : 1;
n &= 0x7FFFFFFFFFFFFFFF;
uint_least64_t exp = n >> 52;
uint_least64_t frac = n & 0xFFFFFFFFFFFFF;
*d = copysign(double_from_exp_and_frac(exp, frac), sign);
return true;
}

23
src/encode.h Normal file
View file

@ -0,0 +1,23 @@
#ifndef APFL_CONTEXT_H
#define APFL_CONTEXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include "apfl.h"
bool apfl_encode_u64(struct apfl_io_writer, uint_least64_t );
bool apfl_decode_u64(struct apfl_io_reader, uint_least64_t *);
bool apfl_encode_double(struct apfl_io_writer, double);
bool apfl_decode_double(struct apfl_io_reader, double *);
#ifdef __cplusplus
}
#endif
#endif

200
src/encode_test.c Normal file
View file

@ -0,0 +1,200 @@
#include <inttypes.h>
#include <math.h>
#include "test.h"
#include "apfl.h"
#include "encode.h"
#define HEXDUMP_FMT "%02X%02X%02X%02X%02X%02X%02X%02X"
#define HEXDUMP_ARGS(b) \
(unsigned int)((b)[0]), \
(unsigned int)((b)[1]), \
(unsigned int)((b)[2]), \
(unsigned int)((b)[3]), \
(unsigned int)((b)[4]), \
(unsigned int)((b)[5]), \
(unsigned int)((b)[6]), \
(unsigned int)((b)[7])
static void
test_u64(testctx t, void (*fn)(testctx, uint_least64_t, unsigned char[8]))
{
fn(t, 0, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00});
fn(t, 1, (unsigned char[8]) {0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00});
fn(t, 42, (unsigned char[8]) {0x2A,0x00,0x00,0x00,0x00,0x00,0x00,0x00});
fn(t, 666, (unsigned char[8]) {0x9A,0x02,0x00,0x00,0x00,0x00,0x00,0x00});
fn(t, 0x1122334455667788, (unsigned char[8]) {0x88,0x77,0x66,0x55,0x44,0x33,0x22,0x11});
fn(t, 0xFFFFFFFFFFFFFFFF, (unsigned char[8]) {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF});
}
static void u64_encode_callback(testctx t, uint_least64_t n, unsigned char bytes[8])
{
struct apfl_allocator allocator = test_allocator(t);
struct apfl_string_builder sb = apfl_string_builder_init(allocator);
struct apfl_io_writer w = apfl_io_string_writer(&sb);
if (!apfl_encode_u64(w, n)) {
test_failf(t, "Could not encode u64 0x%" PRIxLEAST64, n);
return;
}
struct apfl_string have = apfl_string_builder_move_string(&sb);
if (have.len != 8) {
test_failf(t, "Encoded length is incorrect. Got %d", (int)have.len);
return;
}
struct apfl_string_view want = { .bytes = bytes, .len = 8, };
if (!apfl_string_eq(want, have)) {
test_failf(
t,
"Encoding for 0x%" PRIxLEAST64 " is wrong. have " HEXDUMP_FMT ", want " HEXDUMP_FMT,
n,
HEXDUMP_ARGS(have.bytes),
HEXDUMP_ARGS(bytes)
);
return;
}
apfl_string_deinit(allocator, &have);
}
static void u64_decode_callback(testctx t, uint_least64_t want, unsigned char bytes[8])
{
struct apfl_string_view input = { .bytes = bytes, .len = 8, };
struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(input);
struct apfl_io_reader r = apfl_io_string_reader(&reader_data);
uint_least64_t have;
if (!apfl_decode_u64(r, &have)) {
test_failf(t, "Could not decode u64 0x%" PRIxLEAST64, want);
return;
}
if (want != have) {
test_failf(
t,
"Decoding failed, have 0x%" PRIxLEAST64 ", want 0x%" PRIxLEAST64 ".",
have,
want
);
return;
}
}
TEST(encode_u64, t) {
test_u64(t, u64_encode_callback);
}
TEST(decode_u64, t) {
test_u64(t, u64_decode_callback);
}
static void
test_double(testctx t, void (*fn)(testctx, double, unsigned char[8]))
{
fn(t, 0, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00});
fn(t, 1, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x3F});
fn(t, 42, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x45,0x40});
fn(t, -2.75, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0x06,0xC0});
fn(t, 1337.42, (unsigned char[8]) {0x48,0xE1,0x7A,0x14,0xAE,0xE5,0x94,0x40});
fn(t, 1E+50, (unsigned char[8]) {0x9A,0x64,0x7E,0xC5,0x0E,0x1B,0x51,0x4A});
fn(t, 1E-12, (unsigned char[8]) {0x11,0xEA,0x2D,0x81,0x99,0x97,0x71,0x3D});
fn(t, NAN, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF8,0x7F});
fn(t, -NAN, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF8,0xFF});
fn(t, INFINITY, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F});
fn(t, -INFINITY, (unsigned char[8]) {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF});
}
static void
double_encode_callback(testctx t, double d, unsigned char bytes[8])
{
struct apfl_allocator allocator = test_allocator(t);
struct apfl_string_builder sb = apfl_string_builder_init(allocator);
struct apfl_io_writer w = apfl_io_string_writer(&sb);
if (!apfl_encode_double(w, d)) {
test_failf(t, "Could not encode double %a", d);
return;
}
struct apfl_string have = apfl_string_builder_move_string(&sb);
if (have.len != 8) {
test_failf(t, "Encoded length is incorrect. Got %d", (int)have.len);
return;
}
struct apfl_string_view want = { .bytes = bytes, .len = 8, };
if (!apfl_string_eq(want, have)) {
test_failf(
t,
"Encoding for %a is wrong. have " HEXDUMP_FMT ", want " HEXDUMP_FMT,
d,
HEXDUMP_ARGS(have.bytes),
HEXDUMP_ARGS(bytes)
);
return;
}
apfl_string_deinit(allocator, &have);
}
static bool
cmpdouble(double a, double b)
{
if (isnan(a)) {
return isnan(b) && signbit(a) == signbit(b);
} else {
return a == b;
}
}
static void
double_decode_callback(testctx t, double want, unsigned char bytes[8])
{
struct apfl_string_view input = { .bytes = bytes, .len = 8, };
struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(input);
struct apfl_io_reader r = apfl_io_string_reader(&reader_data);
double have;
if (!apfl_decode_double(r, &have)) {
test_failf(t, "Could not decode double %a", want);
return;
}
if (!cmpdouble(want, have)) {
test_failf(
t,
"Decoding failed, have %a, want %a.",
have,
want
);
return;
}
}
TEST(encode_double, t) {
test_double(t, double_encode_callback);
}
TEST(decode_double, t) {
test_double(t, double_decode_callback);
}
TESTS_BEGIN
ADDTEST(encode_u64),
ADDTEST(decode_u64),
ADDTEST(encode_double),
ADDTEST(decode_double),
TESTS_END

View file

@ -163,8 +163,9 @@ runtest(const char *filename)
.output_writer = apfl_io_string_writer(&output),
});
struct apfl_string_source_reader_data src_data = apfl_string_source_reader_create(parts.script);
apfl_iterative_runner runner = apfl_iterative_runner_new(ctx, apfl_string_source_reader(&src_data));
struct apfl_io_string_reader_data src_data = apfl_io_string_reader_create(parts.script);
struct apfl_io_reader r = apfl_io_string_reader(&src_data);
apfl_iterative_runner runner = apfl_iterative_runner_new(ctx, apfl_io_reader_as_source_reader(&r));
assert(runner != NULL);
while (apfl_iterative_runner_next(runner)) {

View file

@ -529,7 +529,8 @@ loadfile(apfl_ctx ctx)
}
apfl_drop(ctx, -2); // drop cstring
apfl_load(ctx, apfl_stdio_source_reader(*fh), -2);
struct apfl_io_reader r = apfl_io_file_reader(*fh);
apfl_load(ctx, apfl_io_reader_as_source_reader(&r), -2);
closefile(fh);
apfl_drop(ctx, -2);
}
@ -541,11 +542,47 @@ loadstring(apfl_ctx ctx)
apfl_tostring(ctx, -1);
apfl_push_const_string(ctx, "(loadstring)");
struct apfl_string_source_reader_data reader_data = apfl_string_source_reader_create(apfl_get_string(ctx, -2));
apfl_load(ctx, apfl_string_source_reader(&reader_data), -1);
struct apfl_io_string_reader_data reader_data = apfl_io_string_reader_create(apfl_get_string(ctx, -2));
struct apfl_io_reader r = apfl_io_string_reader(&reader_data);
apfl_load(ctx, apfl_io_reader_as_source_reader(&r), -1);
apfl_drop(ctx, -2);
}
static void
serialize_bytecode(apfl_ctx ctx)
{
apfl_get_list_member_by_index(ctx, 0, 0);
struct apfl_value value = apfl_stack_must_get(ctx, -1);
if (value.type == VALUE_CFUNC) {
apfl_raise_const_error(ctx, "-serialize-bytecode needs a apfl function, got a native function instead");
} else if (value.type != VALUE_FUNC) {
apfl_raise_errorfmt(ctx, "-serialize-bytecode needs a apfl function, got value of type {value:type} instead", value);
}
apfl_get_list_member_by_index(ctx, 0, 1);
FILE **fh = apfl_get_native_object(ctx, &file_object, -1);
struct apfl_io_writer w = apfl_io_file_writer(*fh);
if (!apfl_bytecode_serialize(ctx->gc.allocator, w, value.func->subfunctions[0].body)) {
apfl_raise_const_error(ctx, "Could not serialize function");
}
apfl_push_nil(ctx);
}
static void
unserialize_bytecode(apfl_ctx ctx)
{
apfl_get_list_member_by_index(ctx, 0, 0);
apfl_drop(ctx, -2);
FILE **fh = apfl_get_native_object(ctx, &file_object, -1);
struct apfl_io_reader r = apfl_io_file_reader(*fh);
apfl_load_bytecode(ctx, r);
}
static const struct global_def globals[] = {
{"if", impl_if},
{"==", impl_eq},
@ -575,6 +612,8 @@ static const struct global_def globals[] = {
{"fclose", impl_fclose},
{"loadfile", loadfile},
{"loadstring", loadstring},
{"-serialize-bytecode", serialize_bytecode},
{"-unserialize-bytecode", unserialize_bytecode},
{NULL, NULL},
};

91
src/io.c Normal file
View file

@ -0,0 +1,91 @@
#include <assert.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include "apfl.h"
static bool
file_read(void *opaque, unsigned char *buf, size_t *len)
{
FILE *f = opaque;
size_t maxlen = *len;
*len = fread(buf, 1, maxlen, f);
if (*len == 0) {
return feof(f);
}
return true;
}
struct apfl_io_reader
apfl_io_file_reader(FILE *f)
{
return (struct apfl_io_reader) {
.read = file_read,
.opaque = f,
};
}
static bool
string_reader_callback(void *opaque, unsigned char *buf, size_t *len)
{
struct apfl_io_string_reader_data *ctx = opaque;
size_t maxlen = *len;
size_t remain_len = ctx->sv.len - ctx->off;
*len = maxlen < remain_len ? maxlen : remain_len;
memcpy(buf, ctx->sv.bytes + ctx->off, *len);
ctx->off += *len;
assert(ctx->off <= ctx->sv.len);
return true;
}
struct apfl_io_string_reader_data
apfl_io_string_reader_create(struct apfl_string_view sv)
{
return (struct apfl_io_string_reader_data) {
.sv = sv,
.off = 0,
};
}
struct apfl_io_reader apfl_io_string_reader(struct apfl_io_string_reader_data *data)
{
return (struct apfl_io_reader) {
.read = string_reader_callback,
.opaque = data,
};
}
bool
apfl_io_read_bytes(struct apfl_io_reader r, unsigned char *buf, size_t *len)
{
return r.read(r.opaque, buf, len);
}
bool
apfl_io_read_bytes_exact_size(struct apfl_io_reader r, unsigned char *buf, size_t want)
{
while (want > 0) {
size_t have = want;
if (!apfl_io_read_bytes(r, buf, &have)) {
return false;
}
if (have == 0) {
return false;
}
want -= have;
buf += have;
}
return true;
}
bool
apfl_io_read_byte(struct apfl_io_reader r, unsigned char *byte)
{
return apfl_io_read_bytes_exact_size(r, byte, 1);
}

View file

@ -9,7 +9,8 @@
struct parser_test {
testctx t;
struct apfl_allocator allocator;
struct apfl_string_source_reader_data string_source_reader;
struct apfl_io_string_reader_data string_reader;
struct apfl_io_reader reader;
apfl_tokenizer_ptr tokenizer;
apfl_parser_ptr parser;
};
@ -23,11 +24,12 @@ new_parser_test(testctx t, const char *source)
pt->t = t;
pt->allocator = allocator;
pt->string_source_reader = apfl_string_source_reader_create(apfl_string_view_from(source));
pt->string_reader = apfl_io_string_reader_create(apfl_string_view_from(source));
pt->reader = apfl_io_string_reader(&pt->string_reader);
if ((pt->tokenizer = apfl_tokenizer_new(
allocator,
apfl_string_source_reader(&pt->string_source_reader)
apfl_io_reader_as_source_reader(&pt->reader)
)) == NULL) {
test_fatalf(t, "Failed initializing the tokenizer");
}

View file

@ -4,60 +4,18 @@
#include "apfl.h"
static bool
string_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need)
io_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need)
{
(void)need;
struct apfl_string_source_reader_data *ctx = opaque;
size_t maxlen = *len;
size_t remain_len = ctx->sv.len - ctx->off;
*len = maxlen < remain_len ? maxlen : remain_len;
memcpy(buf, ctx->sv.bytes + ctx->off, *len);
ctx->off += *len;
assert(ctx->off <= ctx->sv.len);
return true;
}
struct apfl_string_source_reader_data
apfl_string_source_reader_create(struct apfl_string_view sv)
{
return (struct apfl_string_source_reader_data) {
.sv = sv,
.off = 0,
};
}
struct apfl_source_reader apfl_string_source_reader(struct apfl_string_source_reader_data *data)
{
return (struct apfl_source_reader) {
.callback = string_reader_callback,
.opaque = data,
};
}
static bool
stdio_reader_callback(void *opaque, unsigned char *buf, size_t *len, bool need)
{
(void)need;
FILE *f = opaque;
size_t maxlen = *len;
*len = fread(buf, 1, maxlen, f);
if (*len == 0) {
return feof(f);
}
return true;
struct apfl_io_reader *r = opaque;
return apfl_io_read_bytes(*r, buf, len);
}
struct apfl_source_reader
apfl_stdio_source_reader(FILE *f)
apfl_io_reader_as_source_reader(struct apfl_io_reader *r)
{
return (struct apfl_source_reader) {
.callback = stdio_reader_callback,
.opaque = f,
.callback = io_reader_callback,
.opaque = r,
};
}

View file

@ -9,7 +9,8 @@ struct tokenizer_test {
testctx t;
struct apfl_allocator allocator;
apfl_tokenizer_ptr tokenizer;
struct apfl_string_source_reader_data string_source_reader;
struct apfl_io_string_reader_data string_reader;
struct apfl_io_reader reader;
};
static struct tokenizer_test *
@ -21,12 +22,13 @@ new_tokenizer_test_sv(testctx t, struct apfl_string_view text)
*tt = (struct tokenizer_test) {
.t = t,
.allocator = allocator,
.string_source_reader = apfl_string_source_reader_create(text),
};
tt->string_reader = apfl_io_string_reader_create(text);
tt->reader = apfl_io_string_reader(&tt->string_reader);
if ((tt->tokenizer = apfl_tokenizer_new(
allocator,
apfl_string_source_reader(&tt->string_source_reader)
apfl_io_reader_as_source_reader(&tt->reader)
)) == NULL) {
test_fatalf(t, "Failed to initialize the tokenizer");
}