apfl/src/bytecode.c

383 lines
12 KiB
C
Raw Normal View History

Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
#include <assert.h>
#include "apfl.h"
#include "alloc.h"
#include "bytecode.h"
#include "format.h"
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
#include "gc.h"
struct instruction_list *
2023-01-30 21:50:01 +00:00
apfl_instructions_new(struct gc *gc, int line, struct apfl_string *filename)
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
{
struct instruction_list *ilist = apfl_gc_new_instructions(gc);
if (ilist == NULL) {
return NULL;
}
*ilist = (struct instruction_list) {
.instructions = NULL,
.len = 0,
.cap = 0,
.line = line,
2023-01-30 21:50:01 +00:00
.filename = filename,
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
};
return ilist;
}
void
apfl_instructions_deinit(struct apfl_allocator allocator, struct instruction_list *ilist)
{
FREE_LIST(allocator, ilist->instructions, ilist->cap);
}
#define GET_ARGUMENT(ilist, i, arg) \
do { \
if (i >= ilist->len) { \
return; \
} \
arg = ilist->instructions[++i]; \
} while (0)
void
apfl_gc_instructions_traverse(struct instruction_list *ilist, gc_visitor cb, void *opaque)
{
union instruction_or_arg arg;
2023-01-30 21:50:01 +00:00
if (ilist->filename != NULL) {
cb(opaque, GC_OBJECT_FROM(ilist->filename, GC_TYPE_STRING));
}
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
for (size_t i = 0; i < ilist->len; i++) {
switch (ilist->instructions[i].instruction) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
break;
case INSN_NUMBER:
case INSN_LIST:
case INSN_SET_LINE:
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
case INSN_FUNC:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
i++;
break;
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
break;
case INSN_FUNC_ADD_SUBFUNC:
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.body, GC_TYPE_INSTRUCTIONS));
break;
case INSN_MATCHER_PUSH:
GET_ARGUMENT(ilist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.matcher, GC_TYPE_MATCHER_INSTRUCTIONS));
break;
}
}
}
void
apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *milist, gc_visitor cb, void *opaque)
{
union matcher_instruction_or_arg arg;
for (size_t i = 0; i < milist->len; i++) {
switch (milist->instructions[i].instruction) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
break;
case MATCHER_CHECK_CONST: // with index as values index
case MATCHER_CHECK_PRED: // with index as values index
i++;
break;
case MATCHER_CAPTURE_TO_VAR: // with name
case MATCHER_CAPTURE_TO_VAR_LOCAL: // with name
GET_ARGUMENT(milist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
break;
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with name, index and len
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with name, index and len
GET_ARGUMENT(milist, i, arg);
cb(opaque, GC_OBJECT_FROM(arg.string, GC_TYPE_STRING));
i++;
i++;
break;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
}
}
}
const char *
apfl_instruction_to_string(enum instruction insn)
{
switch (insn) {
case INSN_NIL:
return "INSN_NIL";
case INSN_TRUE:
return "INSN_TRUE";
case INSN_FALSE:
return "INSN_FALSE";
case INSN_NUMBER:
return "INSN_NUMBER";
case INSN_STRING:
return "INSN_STRING";
case INSN_LIST:
return "INSN_LIST";
case INSN_LIST_APPEND:
return "INSN_LIST_APPEND";
case INSN_LIST_EXPAND_INTO:
return "INSN_LIST_EXPAND_INTO";
case INSN_DICT:
return "INSN_DICT";
case INSN_DICT_APPEND_KVPAIR:
return "INSN_DICT_APPEND_KVPAIR";
case INSN_GET_MEMBER:
return "INSN_GET_MEMBER";
case INSN_VAR_GET:
return "INSN_VAR_GET";
case INSN_VAR_SET:
return "INSN_VAR_SET";
case INSN_VAR_SET_LOCAL:
return "INSN_VAR_SET_LOCAL";
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
case INSN_VAR_NEW:
return "INSN_VAR_NEW";
case INSN_VAR_NEW_LOCAL:
return "INSN_VAR_NEW_LOCAL";
case INSN_MOVE_TO_LOCAL_VAR:
return "INSN_MOVE_TO_LOCAL_VAR";
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
case INSN_NEXT_LINE:
return "INSN_NEXT_LINE";
case INSN_SET_LINE:
return "INSN_SET_LINE";
case INSN_GET_BY_INDEX_KEEP:
return "INSN_GET_BY_INDEX_KEEP";
case INSN_DROP:
return "INSN_DROP";
case INSN_DUP:
return "INSN_DUP";
case INSN_CALL:
return "INSN_CALL";
case INSN_FUNC:
return "INSN_FUNC";
case INSN_FUNC_ADD_SUBFUNC:
return "INSN_FUNC_ADD_SUBFUNC";
case INSN_FUNC_SET_NAME:
return "INSN_FUNC_SET_NAME";
case INSN_MATCHER_PUSH:
return "INSN_MATCHER_PUSH";
case INSN_MATCHER_SET_VAL:
return "INSN_MATCHER_SET_VAL";
case INSN_MATCHER_MUST_MATCH:
return "INSN_MATCHER_MUST_MATCH";
}
return "??";
}
const char *
apfl_matcher_instruction_to_string(enum matcher_instruction insn)
{
switch (insn) {
case MATCHER_IGNORE:
return "MATCHER_IGNORE";
case MATCHER_CAPTURE_TO_VAR:
return "MATCHER_CAPTURE_TO_VAR";
case MATCHER_CAPTURE_TO_VAR_LOCAL:
return "MATCHER_CAPTURE_TO_VAR_LOCAL";
case MATCHER_CAPTURE_TO_VAR_WITH_PATH:
return "MATCHER_CAPTURE_TO_VAR_WITH_PATH";
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH:
return "MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH";
case MATCHER_CHECK_CONST:
return "MATCHER_CHECK_CONST";
case MATCHER_CHECK_PRED:
return "MATCHER_CHECK_PRED";
case MATCHER_ENTER_LIST:
return "MATCHER_ENTER_LIST";
case MATCHER_LEAVE_LIST:
return "MATCHER_LEAVE_LIST";
case MATCHER_CONTINUE_FROM_END:
return "MATCHER_CONTINUE_FROM_END";
case MATCHER_REMAINDING:
return "MATCHER_REMAINDING";
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
}
return "??";
}
struct matcher_instruction_list *
apfl_matcher_instructions_new(struct gc *gc)
{
struct matcher_instruction_list *milist = apfl_gc_new_matcher_instructions(gc);
if (milist == NULL) {
return NULL;
}
*milist = (struct matcher_instruction_list) {
.instructions = NULL,
.len = 0,
.cap = 0,
.capture_count = 0,
.value_count = 0,
};
return milist;
}
void
apfl_matcher_instructions_deinit(struct apfl_allocator allocator, struct matcher_instruction_list *milist)
{
FREE_LIST(allocator, milist->instructions, milist->cap);
}
#define GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg) \
do { \
if (i >= ilist->len) { \
FMT_TRY(apfl_format_put_string(w, "Bytecode corrupted")); \
return false; \
} \
arg = ilist->instructions[++i]; \
} while (0)
bool
apfl_bytecode_dump_matcher(unsigned indent, struct apfl_format_writer w, struct matcher_instruction_list *milist)
{
for (size_t i = 0; i < milist->len; i++) {
union matcher_instruction_or_arg arg;
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_format_put_string(w, apfl_matcher_instruction_to_string(milist->instructions[i].instruction)));
switch (milist->instructions[i].instruction) {
case MATCHER_IGNORE:
case MATCHER_ENTER_LIST:
case MATCHER_LEAVE_LIST:
case MATCHER_CONTINUE_FROM_END:
case MATCHER_REMAINDING:
break;
case MATCHER_CAPTURE_TO_VAR:
case MATCHER_CAPTURE_TO_VAR_LOCAL:
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_string(w, *arg.string));
break;
case MATCHER_CAPTURE_TO_VAR_WITH_PATH: // with string, index and len
case MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH: // with string, index and len
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_string(w, *arg.string));
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_format_put_string(w, ", "));
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_format_put_string(w, ", "));
FMT_TRY(apfl_format_put_int(w, (int)arg.len));
break;
case MATCHER_CHECK_CONST:
case MATCHER_CHECK_PRED:
GET_ARGUMENT_FOR_DUMP(w, milist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
break;
}
FMT_TRY(apfl_format_put_char(w, '\n'));
}
return true;
}
bool
apfl_bytecode_dump(unsigned indent, struct apfl_format_writer w, struct instruction_list *ilist)
{
union instruction_or_arg arg;
for (size_t i = 0; i < ilist->len; i++) {
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_format_put_string(w, apfl_instruction_to_string(ilist->instructions[i].instruction)));
switch (ilist->instructions[i].instruction) {
case INSN_NIL:
case INSN_TRUE:
case INSN_FALSE:
case INSN_LIST_APPEND:
case INSN_LIST_EXPAND_INTO:
case INSN_DICT:
case INSN_DICT_APPEND_KVPAIR:
case INSN_GET_MEMBER:
case INSN_NEXT_LINE:
case INSN_DROP:
case INSN_DUP:
case INSN_CALL:
case INSN_MATCHER_MUST_MATCH:
break;
case INSN_NUMBER:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_number(w, arg.number));
break;
case INSN_LIST:
case INSN_SET_LINE:
case INSN_FUNC:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.count));
break;
case INSN_GET_BY_INDEX_KEEP:
case INSN_MATCHER_SET_VAL:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_int(w, (int)arg.index));
break;
case INSN_STRING:
case INSN_VAR_GET:
case INSN_VAR_SET:
case INSN_VAR_SET_LOCAL:
case INSN_VAR_NEW:
case INSN_VAR_NEW_LOCAL:
case INSN_MOVE_TO_LOCAL_VAR:
case INSN_FUNC_SET_NAME:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " "));
FMT_TRY(apfl_format_put_string(w, *arg.string));
break;
case INSN_FUNC_ADD_SUBFUNC:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " ilist{\n"));
FMT_TRY(apfl_bytecode_dump(indent+1, w, arg.body));
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_format_put_string(w, "}"));
break;
case INSN_MATCHER_PUSH:
GET_ARGUMENT_FOR_DUMP(w, ilist, i, arg);
FMT_TRY(apfl_format_put_string(w, " milist{\n"));
FMT_TRY(apfl_bytecode_dump_matcher(indent+1, w, arg.matcher));
FMT_TRY(apfl_format_put_indent(w, indent));
FMT_TRY(apfl_format_put_string(w, "}"));
break;
}
FMT_TRY(apfl_format_put_char(w, '\n'));
}
return true;
}