Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
|
|
#include "apfl.h"
|
|
|
|
|
|
|
|
|
|
#include "alloc.h"
|
|
|
|
|
#include "bytecode.h"
|
2023-03-05 16:02:42 +00:00
|
|
|
#include "encode.h"
|
2022-07-28 18:49:29 +00:00
|
|
|
#include "format.h"
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
#include "gc.h"
|
2023-03-05 16:02:42 +00:00
|
|
|
#include "hashmap.h"
|
|
|
|
|
#include "resizable.h"
|
|
|
|
|
#include "strings.h"
|
|
|
|
|
|
|
|
|
|
#define BYTECODE_VERSION 0
|
|
|
|
|
|
|
|
|
|
enum instruction_argument_type {
|
|
|
|
|
INSN_ARGS_NONE,
|
|
|
|
|
INSN_ARGS_NUMBER,
|
|
|
|
|
INSN_ARGS_COUNT,
|
|
|
|
|
INSN_ARGS_INDEX,
|
|
|
|
|
INSN_ARGS_STRING,
|
|
|
|
|
INSN_ARGS_BODY,
|
|
|
|
|
INSN_ARGS_MATCHER,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum matcher_instruction_argument_type {
|
|
|
|
|
MINSN_ARGS_NONE,
|
|
|
|
|
MINSN_ARGS_INDEX,
|
|
|
|
|
MINSN_ARGS_NAME,
|
|
|
|
|
MINSN_ARGS_NAME_INDEX_LEN,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static enum instruction_argument_type
|
|
|
|
|
argument_type_for_instruction(enum instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case INSN_NIL:
|
|
|
|
|
case INSN_TRUE:
|
|
|
|
|
case INSN_FALSE:
|
|
|
|
|
case INSN_LIST_APPEND:
|
|
|
|
|
case INSN_LIST_EXPAND_INTO:
|
|
|
|
|
case INSN_DICT:
|
|
|
|
|
case INSN_DICT_APPEND_KVPAIR:
|
|
|
|
|
case INSN_GET_MEMBER:
|
|
|
|
|
case INSN_NEXT_LINE:
|
|
|
|
|
case INSN_DROP:
|
|
|
|
|
case INSN_DUP:
|
|
|
|
|
case INSN_CALL:
|
|
|
|
|
case INSN_MATCHER_MUST_MATCH:
|
2023-03-22 22:54:03 +00:00
|
|
|
case INSN_BUILD_PAIR:
|
2023-03-05 16:02:42 +00:00
|
|
|
return INSN_ARGS_NONE;
|
|
|
|
|
case INSN_NUMBER:
|
|
|
|
|
return INSN_ARGS_NUMBER;
|
|
|
|
|
case INSN_LIST:
|
|
|
|
|
case INSN_SET_LINE:
|
|
|
|
|
case INSN_FUNC:
|
|
|
|
|
return INSN_ARGS_COUNT;
|
|
|
|
|
case INSN_GET_BY_INDEX_KEEP:
|
|
|
|
|
case INSN_MATCHER_SET_VAL:
|
|
|
|
|
return INSN_ARGS_INDEX;
|
|
|
|
|
case INSN_STRING:
|
|
|
|
|
case INSN_VAR_GET:
|
|
|
|
|
case INSN_VAR_SET:
|
|
|
|
|
case INSN_VAR_SET_LOCAL:
|
|
|
|
|
case INSN_VAR_NEW:
|
|
|
|
|
case INSN_VAR_NEW_LOCAL:
|
|
|
|
|
case INSN_MOVE_TO_LOCAL_VAR:
|
|
|
|
|
case INSN_FUNC_SET_NAME:
|
|
|
|
|
return INSN_ARGS_STRING;
|
|
|
|
|
case INSN_FUNC_ADD_SUBFUNC:
|
|
|
|
|
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
|
|
|
|
|
return INSN_ARGS_BODY;
|
|
|
|
|
case INSN_MATCHER_PUSH:
|
|
|
|
|
return INSN_ARGS_MATCHER;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(false);
|
|
|
|
|
return INSN_ARGS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static enum matcher_instruction_argument_type
|
|
|
|
|
matcher_argument_type_for_instruction(enum matcher_instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case MATCHER_IGNORE:
|
|
|
|
|
case MATCHER_ENTER_LIST:
|
|
|
|
|
case MATCHER_LEAVE_LIST:
|
|
|
|
|
case MATCHER_CONTINUE_FROM_END:
|
|
|
|
|
case MATCHER_REMAINDING:
|
2023-03-22 22:54:03 +00:00
|
|
|
case MATCHER_UNPACK_PAIR:
|
2023-03-05 16:02:42 +00:00
|
|
|
return MINSN_ARGS_NONE;
|
|
|
|
|
case MATCHER_CHECK_CONST: // with index as values index
|
|
|
|
|
case MATCHER_CHECK_PRED: // with index as values index
|
|
|
|
|
return MINSN_ARGS_INDEX;
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR: // with name
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
|
|
|
|
|
return MINSN_ARGS_NAME;
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
|
|
|
|
|
return MINSN_ARGS_NAME_INDEX_LEN;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(false);
|
|
|
|
|
return MINSN_ARGS_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
valid_matcher_instruction(enum matcher_instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case MATCHER_IGNORE:
|
|
|
|
|
case MATCHER_ENTER_LIST:
|
|
|
|
|
case MATCHER_LEAVE_LIST:
|
|
|
|
|
case MATCHER_CONTINUE_FROM_END:
|
|
|
|
|
case MATCHER_REMAINDING:
|
|
|
|
|
case MATCHER_CHECK_CONST:
|
|
|
|
|
case MATCHER_CHECK_PRED:
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR:
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL:
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
|
2023-03-22 22:54:03 +00:00
|
|
|
case MATCHER_UNPACK_PAIR:
|
2023-03-05 16:02:42 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
valid_instruction(enum instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case INSN_NIL:
|
|
|
|
|
case INSN_TRUE:
|
|
|
|
|
case INSN_FALSE:
|
|
|
|
|
case INSN_LIST_APPEND:
|
|
|
|
|
case INSN_LIST_EXPAND_INTO:
|
|
|
|
|
case INSN_DICT:
|
|
|
|
|
case INSN_DICT_APPEND_KVPAIR:
|
|
|
|
|
case INSN_GET_MEMBER:
|
|
|
|
|
case INSN_NEXT_LINE:
|
|
|
|
|
case INSN_DROP:
|
|
|
|
|
case INSN_DUP:
|
|
|
|
|
case INSN_CALL:
|
|
|
|
|
case INSN_MATCHER_MUST_MATCH:
|
|
|
|
|
case INSN_NUMBER:
|
|
|
|
|
case INSN_LIST:
|
|
|
|
|
case INSN_SET_LINE:
|
|
|
|
|
case INSN_FUNC:
|
|
|
|
|
case INSN_GET_BY_INDEX_KEEP:
|
|
|
|
|
case INSN_MATCHER_SET_VAL:
|
|
|
|
|
case INSN_STRING:
|
|
|
|
|
case INSN_VAR_GET:
|
|
|
|
|
case INSN_VAR_SET:
|
|
|
|
|
case INSN_VAR_SET_LOCAL:
|
|
|
|
|
case INSN_VAR_NEW:
|
|
|
|
|
case INSN_VAR_NEW_LOCAL:
|
|
|
|
|
case INSN_MOVE_TO_LOCAL_VAR:
|
|
|
|
|
case INSN_FUNC_SET_NAME:
|
|
|
|
|
case INSN_FUNC_ADD_SUBFUNC:
|
|
|
|
|
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
|
|
|
|
|
case INSN_MATCHER_PUSH:
|
2023-03-22 22:54:03 +00:00
|
|
|
case INSN_BUILD_PAIR:
|
2023-03-05 16:02:42 +00:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
|
|
|
|
|
struct instruction_list *
|
2023-02-16 20:41:02 +00:00
|
|
|
apfl_instructions_new(struct gc *gc, size_t line, struct apfl_string *filename)
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
{
|
|
|
|
|
struct instruction_list *ilist = apfl_gc_new_instructions(gc);
|
|
|
|
|
if (ilist == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
*ilist = (struct instruction_list) {
|
|
|
|
|
.instructions = NULL,
|
|
|
|
|
.len = 0,
|
|
|
|
|
.cap = 0,
|
|
|
|
|
.line = line,
|
2023-01-30 21:50:01 +00:00
|
|
|
.filename = filename,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
};
|
|
|
|
|
return ilist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_instructions_deinit(struct apfl_allocator allocator, struct instruction_list *ilist)
|
|
|
|
|
{
|
|
|
|
|
FREE_LIST(allocator, ilist->instructions, ilist->cap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GET_ARGUMENT(ilist, i, arg) \
|
|
|
|
|
do { \
|
|
|
|
|
if (i >= ilist->len) { \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
arg = ilist->instructions[++i]; \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, void *opaque)
|
|
|
|
|
{
|
|
|
|
|
union instruction_or_arg arg;
|
|
|
|
|
|
2023-01-30 21:50:01 +00:00
|
|
|
if (ilist->filename != NULL) {
|
|
|
|
|
cb(opaque, GC_OBJECT_FROM(ilist->filename, GC_TYPE_STRING));
|
|
|
|
|
}
|
|
|
|
|
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
for (size_t i = 0; i < ilist->len; i++) {
|
2023-03-05 16:02:42 +00:00
|
|
|
switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
|
|
|
|
|
case INSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_NUMBER:
|
|
|
|
|
case INSN_ARGS_COUNT:
|
|
|
|
|
case INSN_ARGS_INDEX:
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
i++;
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_STRING:
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
GET_ARGUMENT(ilist, i, arg);
|
|
|
|
|
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
|
2022-07-01 20:03:34 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_BODY:
|
2022-07-11 19:41:05 +00:00
|
|
|
GET_ARGUMENT(ilist, i, arg);
|
|
|
|
|
cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS));
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_MATCHER:
|
2022-07-28 18:46:32 +00:00
|
|
|
GET_ARGUMENT(ilist, i, arg);
|
|
|
|
|
cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS));
|
|
|
|
|
break;
|
2022-11-19 21:06:23 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, gc_visitor cb, void *opaque)
|
|
|
|
|
{
|
|
|
|
|
union matcher_instruction_or_arg arg;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < milist->len; i++) {
|
2023-03-05 16:02:42 +00:00
|
|
|
switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
|
|
|
|
|
case MINSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case MINSN_ARGS_INDEX:
|
2022-11-19 21:06:23 +00:00
|
|
|
i++;
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case MINSN_ARGS_NAME:
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT(milist, i, arg);
|
2022-07-28 18:46:32 +00:00
|
|
|
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
|
2022-11-19 21:06:23 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case MINSN_ARGS_NAME_INDEX_LEN:
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT(milist, i, arg);
|
|
|
|
|
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
|
|
|
|
|
i++;
|
2022-07-28 18:46:32 +00:00
|
|
|
i++;
|
|
|
|
|
break;
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
apfl_instruction_to_string(enum instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case INSN_NIL:
|
|
|
|
|
return "INSN_NIL";
|
|
|
|
|
case INSN_TRUE:
|
|
|
|
|
return "INSN_TRUE";
|
|
|
|
|
case INSN_FALSE:
|
|
|
|
|
return "INSN_FALSE";
|
|
|
|
|
case INSN_NUMBER:
|
|
|
|
|
return "INSN_NUMBER";
|
|
|
|
|
case INSN_STRING:
|
|
|
|
|
return "INSN_STRING";
|
|
|
|
|
case INSN_LIST:
|
|
|
|
|
return "INSN_LIST";
|
|
|
|
|
case INSN_LIST_APPEND:
|
|
|
|
|
return "INSN_LIST_APPEND";
|
|
|
|
|
case INSN_LIST_EXPAND_INTO:
|
|
|
|
|
return "INSN_LIST_EXPAND_INTO";
|
|
|
|
|
case INSN_DICT:
|
|
|
|
|
return "INSN_DICT";
|
|
|
|
|
case INSN_DICT_APPEND_KVPAIR:
|
|
|
|
|
return "INSN_DICT_APPEND_KVPAIR";
|
|
|
|
|
case INSN_GET_MEMBER:
|
|
|
|
|
return "INSN_GET_MEMBER";
|
|
|
|
|
case INSN_VAR_GET:
|
|
|
|
|
return "INSN_VAR_GET";
|
|
|
|
|
case INSN_VAR_SET:
|
|
|
|
|
return "INSN_VAR_SET";
|
2022-07-11 19:41:05 +00:00
|
|
|
case INSN_VAR_SET_LOCAL:
|
|
|
|
|
return "INSN_VAR_SET_LOCAL";
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
case INSN_VAR_NEW:
|
|
|
|
|
return "INSN_VAR_NEW";
|
2022-07-11 19:41:05 +00:00
|
|
|
case INSN_VAR_NEW_LOCAL:
|
|
|
|
|
return "INSN_VAR_NEW_LOCAL";
|
|
|
|
|
case INSN_MOVE_TO_LOCAL_VAR:
|
|
|
|
|
return "INSN_MOVE_TO_LOCAL_VAR";
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
case INSN_NEXT_LINE:
|
|
|
|
|
return "INSN_NEXT_LINE";
|
|
|
|
|
case INSN_SET_LINE:
|
|
|
|
|
return "INSN_SET_LINE";
|
2022-07-11 19:41:05 +00:00
|
|
|
case INSN_GET_BY_INDEX_KEEP:
|
|
|
|
|
return "INSN_GET_BY_INDEX_KEEP";
|
|
|
|
|
case INSN_DROP:
|
|
|
|
|
return "INSN_DROP";
|
2022-11-19 21:06:23 +00:00
|
|
|
case INSN_DUP:
|
|
|
|
|
return "INSN_DUP";
|
2022-07-11 19:41:05 +00:00
|
|
|
case INSN_CALL:
|
|
|
|
|
return "INSN_CALL";
|
|
|
|
|
case INSN_FUNC:
|
|
|
|
|
return "INSN_FUNC";
|
2022-08-12 22:50:26 +00:00
|
|
|
case INSN_FUNC_ADD_SUBFUNC:
|
|
|
|
|
return "INSN_FUNC_ADD_SUBFUNC";
|
2023-02-25 22:19:45 +00:00
|
|
|
case INSN_FUNC_ADD_SUBFUNC_ANYARGS:
|
|
|
|
|
return "INSN_FUNC_ADD_SUBFUNC_ANYARGS";
|
2023-01-24 20:59:54 +00:00
|
|
|
case INSN_FUNC_SET_NAME:
|
|
|
|
|
return "INSN_FUNC_SET_NAME";
|
2022-11-19 21:06:23 +00:00
|
|
|
case INSN_MATCHER_PUSH:
|
|
|
|
|
return "INSN_MATCHER_PUSH";
|
2022-07-28 18:46:32 +00:00
|
|
|
case INSN_MATCHER_SET_VAL:
|
|
|
|
|
return "INSN_MATCHER_SET_VAL";
|
|
|
|
|
case INSN_MATCHER_MUST_MATCH:
|
|
|
|
|
return "INSN_MATCHER_MUST_MATCH";
|
2023-03-22 22:54:03 +00:00
|
|
|
case INSN_BUILD_PAIR:
|
|
|
|
|
return "INSN_BUILD_PAIR";
|
2022-07-28 18:46:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "??";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
apfl_matcher_instruction_to_string(enum matcher_instruction insn)
|
|
|
|
|
{
|
|
|
|
|
switch (insn) {
|
|
|
|
|
case MATCHER_IGNORE:
|
|
|
|
|
return "MATCHER_IGNORE";
|
2022-11-19 21:06:23 +00:00
|
|
|
case MATCHER_CAPTURE_TO_VAR:
|
|
|
|
|
return "MATCHER_CAPTURE_TO_VAR";
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL:
|
|
|
|
|
return "MATCHER_CAPTURE_TO_VAR_LOCAL";
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
|
|
|
|
|
return "MATCHER_CAPTURE_TO_VAR_WITH_PATH";
|
|
|
|
|
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
|
|
|
|
|
return "MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH";
|
2022-07-28 18:46:32 +00:00
|
|
|
case MATCHER_CHECK_CONST:
|
|
|
|
|
return "MATCHER_CHECK_CONST";
|
|
|
|
|
case MATCHER_CHECK_PRED:
|
|
|
|
|
return "MATCHER_CHECK_PRED";
|
|
|
|
|
case MATCHER_ENTER_LIST:
|
|
|
|
|
return "MATCHER_ENTER_LIST";
|
|
|
|
|
case MATCHER_LEAVE_LIST:
|
|
|
|
|
return "MATCHER_LEAVE_LIST";
|
|
|
|
|
case MATCHER_CONTINUE_FROM_END:
|
|
|
|
|
return "MATCHER_CONTINUE_FROM_END";
|
|
|
|
|
case MATCHER_REMAINDING:
|
|
|
|
|
return "MATCHER_REMAINDING";
|
2023-03-22 22:54:03 +00:00
|
|
|
case MATCHER_UNPACK_PAIR:
|
|
|
|
|
return "MATCHER_UNPACK_PAIR";
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "??";
|
|
|
|
|
}
|
2022-07-28 18:46:32 +00:00
|
|
|
|
|
|
|
|
struct matcher_instruction_list *
|
|
|
|
|
apfl_matcher_instructions_new(struct gc *gc)
|
|
|
|
|
{
|
|
|
|
|
struct matcher_instruction_list *milist = apfl_gc_new_matcher_instructions(gc);
|
|
|
|
|
if (milist == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
*milist = (struct matcher_instruction_list) {
|
|
|
|
|
.instructions = NULL,
|
|
|
|
|
.len = 0,
|
|
|
|
|
.cap = 0,
|
|
|
|
|
.capture_count = 0,
|
|
|
|
|
.value_count = 0,
|
|
|
|
|
};
|
|
|
|
|
return milist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_matcher_instructions_deinit(struct apfl_allocator allocator, struct matcher_instruction_list *milist)
|
|
|
|
|
{
|
|
|
|
|
FREE_LIST(allocator, milist->instructions, milist->cap);
|
|
|
|
|
}
|
2022-07-28 18:49:29 +00:00
|
|
|
|
2022-11-19 21:06:23 +00:00
|
|
|
#define GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg) \
|
|
|
|
|
do { \
|
|
|
|
|
if (i >= ilist->len) { \
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, "Bytecode corrupted")); \
|
2022-11-19 21:06:23 +00:00
|
|
|
return false; \
|
|
|
|
|
} \
|
|
|
|
|
arg = ilist->instructions[++i]; \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
2022-08-12 22:50:26 +00:00
|
|
|
bool
|
2023-02-10 20:38:54 +00:00
|
|
|
apfl_bytecode_dump_matcher(unsigned indent, struct apfl_io_writer w, struct matcher_instruction_list *milist)
|
2022-07-28 18:49:29 +00:00
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < milist->len; i++) {
|
2022-11-19 21:06:23 +00:00
|
|
|
union matcher_instruction_or_arg arg;
|
|
|
|
|
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_indent(w, indent));
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction)));
|
2022-07-28 18:49:29 +00:00
|
|
|
|
2023-03-05 16:02:42 +00:00
|
|
|
switch (matcher_argument_type_for_instruction(milist->instructions[i].instruction)) {
|
|
|
|
|
case MINSN_ARGS_NONE:
|
2022-07-28 18:49:29 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case MINSN_ARGS_NAME:
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
|
|
|
|
FMT_TRY(apfl_io_write_string(w, *arg.string));
|
2022-11-19 21:06:23 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case MINSN_ARGS_NAME_INDEX_LEN:
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
|
|
|
|
FMT_TRY(apfl_io_write_string(w, *arg.string));
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, ", "));
|
2022-11-19 21:06:23 +00:00
|
|
|
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
|
|
|
|
|
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, ", "));
|
2022-11-19 21:06:23 +00:00
|
|
|
FMT_TRY(apfl_format_put_int(w, (int)arg.len));
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case MINSN_ARGS_INDEX:
|
2022-11-19 21:06:23 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
2022-11-19 21:06:23 +00:00
|
|
|
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
|
2022-07-28 18:49:29 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_byte(w, '\n'));
|
2022-07-28 18:49:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-12 22:50:26 +00:00
|
|
|
bool
|
2023-02-10 20:38:54 +00:00
|
|
|
apfl_bytecode_dump(unsigned indent, struct apfl_io_writer w, struct instruction_list *ilist)
|
2022-07-28 18:49:29 +00:00
|
|
|
{
|
|
|
|
|
union instruction_or_arg arg;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < ilist->len; i++) {
|
|
|
|
|
FMT_TRY(apfl_format_put_indent(w, indent));
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction)));
|
2022-07-28 18:49:29 +00:00
|
|
|
|
2023-03-05 16:02:42 +00:00
|
|
|
switch (argument_type_for_instruction(ilist->instructions[i].instruction)) {
|
|
|
|
|
case INSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_NUMBER:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_number(w, arg.number));
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_COUNT:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_int(w, (int)arg.count));
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_INDEX:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
|
|
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_STRING:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " "));
|
|
|
|
|
FMT_TRY(apfl_io_write_string(w, *arg.string));
|
2022-07-28 18:49:29 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_BODY:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " ilist{\n"));
|
2022-08-12 22:50:26 +00:00
|
|
|
FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body));
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_indent(w, indent));
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, "}"));
|
2022-07-28 18:49:29 +00:00
|
|
|
break;
|
2023-03-05 16:02:42 +00:00
|
|
|
case INSN_ARGS_MATCHER:
|
2022-07-28 18:49:29 +00:00
|
|
|
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, " milist{\n"));
|
2022-08-12 22:50:26 +00:00
|
|
|
FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher));
|
2022-07-28 18:49:29 +00:00
|
|
|
FMT_TRY(apfl_format_put_indent(w, indent));
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_string(w, "}"));
|
2022-07-28 18:49:29 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-10 20:38:54 +00:00
|
|
|
FMT_TRY(apfl_io_write_byte(w, '\n'));
|
2022-07-28 18:49:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2023-03-05 16:02:42 +00:00
|
|
|
|
|
|
|
|
struct serializer {
|
|
|
|
|
struct apfl_allocator allocator;
|
|
|
|
|
struct apfl_io_writer w;
|
|
|
|
|
struct apfl_hashmap string_lookup;
|
|
|
|
|
size_t next_string_index;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct unserializer {
|
|
|
|
|
struct gc *gc;
|
|
|
|
|
struct apfl_io_reader r;
|
|
|
|
|
struct apfl_string **strings;
|
|
|
|
|
size_t strings_len;
|
|
|
|
|
size_t strings_cap;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
serializer_strings_eq(void *opaque, const void *_a, const void *_b)
|
|
|
|
|
{
|
|
|
|
|
(void)opaque;
|
|
|
|
|
|
|
|
|
|
const struct apfl_string * const *a = _a;
|
|
|
|
|
const struct apfl_string * const *b = _b;
|
|
|
|
|
|
|
|
|
|
return apfl_string_eq(**a, **b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static apfl_hash
|
|
|
|
|
serializer_strings_hash(void *opaque, const void *_key)
|
|
|
|
|
{
|
|
|
|
|
(void)opaque;
|
|
|
|
|
|
|
|
|
|
const struct apfl_string * const *key = _key;
|
|
|
|
|
struct apfl_string_view sv = apfl_string_view_from(**key);
|
|
|
|
|
return apfl_hash_fnv1a(sv.bytes, sv.len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define MAXU64 0xFFFFFFFFFFFFFFFF
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
serialize_size(struct apfl_io_writer w, size_t n)
|
|
|
|
|
{
|
|
|
|
|
uintmax_t _n = n;
|
|
|
|
|
if (_n > MAXU64) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return apfl_encode_u64(w, _n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
unserialize_size(struct apfl_io_reader r, size_t *n)
|
|
|
|
|
{
|
|
|
|
|
uint_least64_t _n;
|
|
|
|
|
FMT_TRY(apfl_decode_u64(r, &_n));
|
|
|
|
|
if (_n > SIZE_MAX) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
*n = (size_t)_n;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg) \
|
|
|
|
|
do { \
|
|
|
|
|
if (i >= ilist->len) { \
|
|
|
|
|
assert(false); \
|
|
|
|
|
return false; \
|
|
|
|
|
} \
|
|
|
|
|
arg = ilist->instructions[++i]; \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define DEF_APPEND_INS_OR_ARG(name, ListType, InsOrArgType) \
|
|
|
|
|
static bool \
|
|
|
|
|
name( \
|
|
|
|
|
ListType *milist, \
|
|
|
|
|
InsOrArgType instruction_or_arg \
|
|
|
|
|
) { \
|
|
|
|
|
if (milist->len >= milist->cap) { \
|
|
|
|
|
return false; \
|
|
|
|
|
} \
|
|
|
|
|
milist->instructions[milist->len] = instruction_or_arg; \
|
|
|
|
|
milist->len++; \
|
|
|
|
|
\
|
|
|
|
|
return true; \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define ABSTRACT_APPEND_INS_OR_ARG(fn, InsOrArgType, ilist, which, arg) \
|
|
|
|
|
fn((ilist), (InsOrArgType) {.which = (arg)})
|
|
|
|
|
|
|
|
|
|
DEF_APPEND_INS_OR_ARG(
|
|
|
|
|
append_instruction_or_arg,
|
|
|
|
|
struct instruction_list,
|
|
|
|
|
union instruction_or_arg
|
|
|
|
|
)
|
|
|
|
|
DEF_APPEND_INS_OR_ARG(
|
|
|
|
|
append_matcher_instruction_or_arg,
|
|
|
|
|
struct matcher_instruction_list,
|
|
|
|
|
union matcher_instruction_or_arg
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#define APPEND_INS_OR_ARG(ilist, which, arg) \
|
|
|
|
|
ABSTRACT_APPEND_INS_OR_ARG( \
|
|
|
|
|
append_instruction_or_arg, \
|
|
|
|
|
union instruction_or_arg, \
|
|
|
|
|
ilist, \
|
|
|
|
|
which, \
|
|
|
|
|
arg \
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#define APPEND_MATCHER_INS_OR_ARG(ilist, which, arg) \
|
|
|
|
|
ABSTRACT_APPEND_INS_OR_ARG( \
|
|
|
|
|
append_matcher_instruction_or_arg, \
|
|
|
|
|
union matcher_instruction_or_arg, \
|
|
|
|
|
ilist, \
|
|
|
|
|
which, \
|
|
|
|
|
arg \
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
serialize_string(
|
|
|
|
|
struct serializer *serializer,
|
|
|
|
|
struct apfl_string *string
|
|
|
|
|
) {
|
|
|
|
|
if (string == NULL) {
|
|
|
|
|
return apfl_encode_u64(serializer->w, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint_least64_t index;
|
|
|
|
|
if (apfl_hashmap_get(&serializer->string_lookup, &string, &index)) {
|
|
|
|
|
if (index > (MAXU64>>1)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
index <<= 1;
|
|
|
|
|
index |= 1;
|
|
|
|
|
|
|
|
|
|
return apfl_encode_u64(serializer->w, index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uintmax_t _len = string->len;
|
|
|
|
|
if (_len > (MAXU64>>1)-1) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FMT_TRY(apfl_encode_u64(serializer->w, (string->len + 1) << 1));
|
|
|
|
|
FMT_TRY(apfl_io_write_string(serializer->w, *string));
|
|
|
|
|
|
|
|
|
|
index = serializer->next_string_index;
|
|
|
|
|
serializer->next_string_index++;
|
|
|
|
|
|
|
|
|
|
return apfl_hashmap_set(&serializer->string_lookup, &string, &index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
unserialize_string(
|
|
|
|
|
struct unserializer *unserializer,
|
|
|
|
|
struct apfl_string **s
|
|
|
|
|
) {
|
|
|
|
|
uint_least64_t index_or_len;
|
|
|
|
|
FMT_TRY(apfl_decode_u64(unserializer->r, &index_or_len));
|
|
|
|
|
|
|
|
|
|
if (index_or_len == 0) {
|
|
|
|
|
*s = NULL;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool is_index = (index_or_len & 1) == 1;
|
|
|
|
|
index_or_len >>= 1;
|
|
|
|
|
if (is_index) {
|
|
|
|
|
if (index_or_len > SIZE_MAX) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
size_t index = index_or_len;
|
|
|
|
|
if (index >= unserializer->strings_len) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
*s = unserializer->strings[index];
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
index_or_len -= 1;
|
|
|
|
|
if (index_or_len > SIZE_MAX) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
size_t len = index_or_len;
|
|
|
|
|
unsigned char *buf = NULL;
|
|
|
|
|
if (len > 0) {
|
|
|
|
|
buf = ALLOC_BYTES(unserializer->gc->allocator, len);
|
|
|
|
|
if (buf == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
if (!apfl_io_read_bytes_exact_size(unserializer->r, buf, len)) {
|
|
|
|
|
FREE_BYTES(unserializer->gc->allocator, buf, len);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_string tmpstring = {
|
|
|
|
|
.bytes = buf,
|
|
|
|
|
.len = len,
|
|
|
|
|
.cap = len,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if ((*s = apfl_string_move_into_new_gc_string(unserializer->gc, &tmpstring)) == NULL) {
|
|
|
|
|
FREE_BYTES(unserializer->gc->allocator, buf, len);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_resizable_append(
|
|
|
|
|
unserializer->gc->allocator,
|
|
|
|
|
sizeof(struct apfl_string *),
|
|
|
|
|
(void **)&unserializer->strings,
|
|
|
|
|
&unserializer->strings_len,
|
|
|
|
|
&unserializer->strings_cap,
|
|
|
|
|
&(*s),
|
|
|
|
|
1
|
|
|
|
|
)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
serialize_milist(
|
|
|
|
|
struct serializer *serializer,
|
|
|
|
|
struct matcher_instruction_list *milist
|
|
|
|
|
) {
|
|
|
|
|
union matcher_instruction_or_arg arg;
|
|
|
|
|
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, milist->value_count));
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, milist->capture_count));
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, milist->len));
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < milist->len; i++) {
|
|
|
|
|
enum matcher_instruction insn = milist->instructions[i].instruction;
|
|
|
|
|
FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));
|
|
|
|
|
|
|
|
|
|
switch (matcher_argument_type_for_instruction(insn)) {
|
|
|
|
|
case MINSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case MINSN_ARGS_INDEX:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, arg.index));
|
|
|
|
|
break;
|
|
|
|
|
case MINSN_ARGS_NAME:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_string(serializer, arg.string));
|
|
|
|
|
break;
|
|
|
|
|
case MINSN_ARGS_NAME_INDEX_LEN:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_string(serializer, arg.string));
|
|
|
|
|
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, arg.index));
|
|
|
|
|
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(milist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, arg.len));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
unserialize_milist(
|
|
|
|
|
struct unserializer *unserializer,
|
|
|
|
|
struct matcher_instruction_list *milist
|
|
|
|
|
) {
|
|
|
|
|
size_t len;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &milist->value_count));
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &milist->capture_count));
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &len));
|
|
|
|
|
|
|
|
|
|
if ((milist->instructions = ALLOC_LIST(
|
|
|
|
|
unserializer->gc->allocator,
|
|
|
|
|
union matcher_instruction_or_arg,
|
|
|
|
|
len
|
|
|
|
|
)) == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
milist->cap = len;
|
|
|
|
|
milist->len = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (milist->len < len) {
|
|
|
|
|
unsigned char b;
|
|
|
|
|
FMT_TRY(apfl_io_read_byte(unserializer->r, &b));
|
|
|
|
|
|
|
|
|
|
enum matcher_instruction insn = b;
|
|
|
|
|
|
|
|
|
|
if (!valid_matcher_instruction(insn)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, instruction, insn));
|
|
|
|
|
|
|
|
|
|
switch (matcher_argument_type_for_instruction(insn)) {
|
|
|
|
|
case MINSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case MINSN_ARGS_INDEX: {
|
|
|
|
|
size_t index;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &index));
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, index));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case MINSN_ARGS_NAME: {
|
|
|
|
|
struct apfl_string *name;
|
|
|
|
|
FMT_TRY(unserialize_string(unserializer, &name));
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case MINSN_ARGS_NAME_INDEX_LEN: {
|
|
|
|
|
struct apfl_string *name;
|
|
|
|
|
FMT_TRY(unserialize_string(unserializer, &name));
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, string, name));
|
|
|
|
|
|
|
|
|
|
size_t s;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &s));
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, index, s));
|
|
|
|
|
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &s));
|
|
|
|
|
FMT_TRY(APPEND_MATCHER_INS_OR_ARG(milist, len, s));
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
serialize_ilist(
|
|
|
|
|
struct serializer *serializer,
|
|
|
|
|
struct instruction_list *ilist
|
|
|
|
|
) {
|
|
|
|
|
union instruction_or_arg arg;
|
|
|
|
|
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, ilist->line));
|
|
|
|
|
FMT_TRY(serialize_string(serializer, ilist->filename));
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, ilist->len));
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < ilist->len; i++) {
|
|
|
|
|
enum instruction insn = ilist->instructions[i].instruction;
|
|
|
|
|
FMT_TRY(apfl_io_write_byte(serializer->w, (char)insn));
|
|
|
|
|
|
|
|
|
|
switch (argument_type_for_instruction(insn)) {
|
|
|
|
|
case INSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_NUMBER:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(apfl_encode_double(serializer->w, arg.number));
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_COUNT:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, arg.count));
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_INDEX:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_size(serializer->w, arg.index));
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_STRING:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_string(serializer, arg.string));
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_BODY:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_ilist(serializer, arg.body));
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_MATCHER:
|
|
|
|
|
GET_ARGUMENT_FOR_SERIALIZE(ilist, i, arg);
|
|
|
|
|
FMT_TRY(serialize_milist(serializer, arg.matcher));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
set_ilist_nested(
|
|
|
|
|
struct unserializer *unserializer,
|
|
|
|
|
void *opaque,
|
|
|
|
|
struct instruction_list *ilist
|
|
|
|
|
) {
|
|
|
|
|
(void)unserializer;
|
|
|
|
|
|
|
|
|
|
struct instruction_list **dst = opaque;
|
|
|
|
|
*dst = ilist;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
unserialize_ilist(
|
|
|
|
|
struct unserializer *unserializer,
|
|
|
|
|
bool (*set_ilist)(struct unserializer *, void *, struct instruction_list *ilist),
|
|
|
|
|
void *opaque
|
|
|
|
|
) {
|
|
|
|
|
size_t line;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &line));
|
|
|
|
|
|
|
|
|
|
struct apfl_string *filename;
|
|
|
|
|
FMT_TRY(unserialize_string(unserializer, &filename));
|
|
|
|
|
|
|
|
|
|
size_t tmproots;
|
|
|
|
|
if (filename != NULL) {
|
|
|
|
|
tmproots = apfl_gc_tmproots_begin(unserializer->gc);
|
|
|
|
|
if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(filename, GC_TYPE_STRING))) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct instruction_list *ilist = apfl_instructions_new(unserializer->gc, line, filename);
|
|
|
|
|
if (ilist == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filename != NULL) {
|
|
|
|
|
apfl_gc_tmproots_restore(unserializer->gc, tmproots);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FMT_TRY(set_ilist(unserializer, opaque, ilist));
|
|
|
|
|
|
|
|
|
|
size_t len;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &len));
|
|
|
|
|
|
|
|
|
|
if (len > 0) {
|
|
|
|
|
ilist->instructions = ALLOC_LIST(unserializer->gc->allocator, union instruction_or_arg, len);
|
|
|
|
|
if (ilist->instructions == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ilist->cap = len;
|
|
|
|
|
|
|
|
|
|
while (ilist->len < len) {
|
|
|
|
|
unsigned char b;
|
|
|
|
|
FMT_TRY(apfl_io_read_byte(unserializer->r, &b));
|
|
|
|
|
|
|
|
|
|
enum instruction insn = b;
|
|
|
|
|
|
|
|
|
|
if (!valid_instruction(insn)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FMT_TRY(APPEND_INS_OR_ARG(ilist, instruction, insn));
|
|
|
|
|
|
|
|
|
|
switch (argument_type_for_instruction(insn)) {
|
|
|
|
|
case INSN_ARGS_NONE:
|
|
|
|
|
break;
|
|
|
|
|
case INSN_ARGS_NUMBER: {
|
|
|
|
|
double d;
|
|
|
|
|
FMT_TRY(apfl_decode_double(unserializer->r, &d));
|
|
|
|
|
FMT_TRY(APPEND_INS_OR_ARG(ilist, number, d));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case INSN_ARGS_COUNT: {
|
|
|
|
|
size_t count;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &count));
|
|
|
|
|
FMT_TRY(APPEND_INS_OR_ARG(ilist, count, count));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case INSN_ARGS_INDEX: {
|
|
|
|
|
size_t index;
|
|
|
|
|
FMT_TRY(unserialize_size(unserializer->r, &index));
|
|
|
|
|
FMT_TRY(APPEND_INS_OR_ARG(ilist, index, index));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case INSN_ARGS_STRING: {
|
|
|
|
|
struct apfl_string *s;
|
|
|
|
|
FMT_TRY(unserialize_string(unserializer, &s));
|
|
|
|
|
FMT_TRY(APPEND_INS_OR_ARG(ilist, string, s));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case INSN_ARGS_BODY: {
|
|
|
|
|
if (ilist->len >= ilist->cap) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
struct instruction_list **dst = &ilist->instructions[ilist->len].body;
|
|
|
|
|
ilist->len++;
|
|
|
|
|
FMT_TRY(unserialize_ilist(unserializer, set_ilist_nested, dst));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
case INSN_ARGS_MATCHER: {
|
|
|
|
|
if (ilist->len >= ilist->cap) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
struct matcher_instruction_list *matcher = apfl_matcher_instructions_new(unserializer->gc);
|
|
|
|
|
if (matcher == NULL) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
ilist->instructions[ilist->len].matcher = matcher;
|
|
|
|
|
ilist->len++;
|
|
|
|
|
FMT_TRY(unserialize_milist(unserializer, matcher));
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const unsigned char header[] = { '\0', 'a', 'p', 'f', 'l', 'B', BYTECODE_VERSION};
|
|
|
|
|
#define BYTE_ARRAY_SV(b) ((struct apfl_string_view) { .bytes = (b), .len = sizeof(b), })
|
|
|
|
|
#define HEADER_LEN sizeof(header)
|
|
|
|
|
|
|
|
|
|
static struct apfl_string_view
|
|
|
|
|
header_sv(void)
|
|
|
|
|
{
|
|
|
|
|
return BYTE_ARRAY_SV(header);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
bytecode_serialize_inner(
|
|
|
|
|
struct serializer *serializer,
|
|
|
|
|
struct instruction_list *ilist
|
|
|
|
|
) {
|
|
|
|
|
FMT_TRY(apfl_io_write_string(serializer->w, header_sv()));
|
|
|
|
|
FMT_TRY(serialize_ilist(serializer, ilist));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_bytecode_serialize(
|
|
|
|
|
struct apfl_allocator allocator,
|
|
|
|
|
struct apfl_io_writer w,
|
|
|
|
|
struct instruction_list *ilist
|
|
|
|
|
) {
|
|
|
|
|
struct serializer serializer = {
|
|
|
|
|
.allocator = allocator,
|
|
|
|
|
.w = w,
|
|
|
|
|
.next_string_index = 0,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (!apfl_hashmap_init(
|
|
|
|
|
&serializer.string_lookup,
|
|
|
|
|
allocator,
|
|
|
|
|
(struct apfl_hashmap_callbacks) {
|
|
|
|
|
.opaque = NULL,
|
|
|
|
|
.keys_eq = serializer_strings_eq,
|
|
|
|
|
.calc_hash = serializer_strings_hash,
|
|
|
|
|
},
|
|
|
|
|
sizeof(struct apfl_string *),
|
|
|
|
|
sizeof(uint_least64_t)
|
|
|
|
|
)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool out = bytecode_serialize_inner(&serializer, ilist);
|
|
|
|
|
|
|
|
|
|
apfl_hashmap_deinit(&serializer.string_lookup);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
set_ilist_root(
|
|
|
|
|
struct unserializer *unserializer,
|
|
|
|
|
void *opaque,
|
|
|
|
|
struct instruction_list *ilist
|
|
|
|
|
) {
|
|
|
|
|
struct instruction_list **dest = opaque;
|
|
|
|
|
if (!apfl_gc_tmproot_add(unserializer->gc, GC_OBJECT_FROM(ilist, GC_TYPE_INSTRUCTIONS))) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*dest = ilist;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct instruction_list *
|
|
|
|
|
bytecode_unserialize_inner(struct unserializer *unserializer)
|
|
|
|
|
{
|
|
|
|
|
unsigned char header_buf[HEADER_LEN];
|
|
|
|
|
FMT_TRY(apfl_io_read_bytes_exact_size(unserializer->r, header_buf, HEADER_LEN));
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_eq(header_sv(), BYTE_ARRAY_SV(header_buf))) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct instruction_list *ilist = NULL;
|
|
|
|
|
|
|
|
|
|
if (!unserialize_ilist(unserializer, set_ilist_root, &ilist)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ilist;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct instruction_list *
|
|
|
|
|
apfl_bytecode_unserialize(
|
|
|
|
|
struct gc *gc,
|
|
|
|
|
struct apfl_io_reader r
|
|
|
|
|
) {
|
|
|
|
|
struct unserializer unserializer = {
|
|
|
|
|
.gc = gc,
|
|
|
|
|
.r = r,
|
|
|
|
|
.strings = NULL,
|
|
|
|
|
.strings_len = 0,
|
|
|
|
|
.strings_cap = 0,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
size_t tmproots = apfl_gc_tmproots_begin(gc);
|
|
|
|
|
struct instruction_list *out = bytecode_unserialize_inner(&unserializer);
|
|
|
|
|
apfl_gc_tmproots_restore(gc, tmproots);
|
|
|
|
|
|
|
|
|
|
FREE_LIST(gc->allocator, unserializer.strings, unserializer.strings_cap);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|