Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
#ifndef APFL_BYTECODE_H
|
|
|
|
|
#define APFL_BYTECODE_H
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include "apfl.h"
|
|
|
|
|
|
|
|
|
|
#include "gc.h"
|
|
|
|
|
|
2022-07-28 18:46:32 +00:00
|
|
|
enum matcher_instruction {
|
|
|
|
|
MATCHER_IGNORE,
|
2022-11-19 21:06:23 +00:00
|
|
|
MATCHER_CAPTURE_TO_VAR, // with name
|
|
|
|
|
MATCHER_CAPTURE_TO_VAR_LOCAL, // with name
|
|
|
|
|
MATCHER_CAPTURE_TO_VAR_WITH_PATH, // with name, index and len
|
|
|
|
|
MATCHER_CAPTURE_TO_VAR_LOCAL_WITH_PATH, // with name, index and len
|
2022-07-28 18:46:32 +00:00
|
|
|
MATCHER_CHECK_CONST, // with index as values index
|
|
|
|
|
MATCHER_CHECK_PRED, // with index as values index
|
|
|
|
|
MATCHER_ENTER_LIST,
|
|
|
|
|
MATCHER_LEAVE_LIST,
|
|
|
|
|
MATCHER_CONTINUE_FROM_END,
|
|
|
|
|
MATCHER_REMAINDING,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
union matcher_instruction_or_arg {
|
|
|
|
|
enum matcher_instruction instruction;
|
|
|
|
|
size_t index;
|
2022-11-19 21:06:23 +00:00
|
|
|
size_t len;
|
|
|
|
|
struct apfl_string *string;
|
2022-07-28 18:46:32 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct matcher_instruction_list {
|
|
|
|
|
union matcher_instruction_or_arg *instructions;
|
|
|
|
|
size_t len;
|
|
|
|
|
size_t cap;
|
|
|
|
|
size_t value_count;
|
2022-11-19 21:06:23 +00:00
|
|
|
size_t capture_count;
|
2022-07-28 18:46:32 +00:00
|
|
|
};
|
|
|
|
|
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
enum instruction {
|
2022-07-28 18:46:32 +00:00
|
|
|
INSN_NIL, // ( -- nil)
|
|
|
|
|
INSN_TRUE, // ( -- true)
|
|
|
|
|
INSN_FALSE, // ( -- false)
|
|
|
|
|
INSN_NUMBER, // ( -- number), arg: number
|
|
|
|
|
INSN_STRING, // ( -- string), arg: string
|
|
|
|
|
INSN_LIST, // ( -- list), arg: count (preallocation hint)
|
|
|
|
|
INSN_LIST_APPEND, // ( list val -- list' )
|
|
|
|
|
INSN_LIST_EXPAND_INTO, // ( list list -- list' )
|
|
|
|
|
INSN_DICT, // ( -- dict )
|
|
|
|
|
INSN_DICT_APPEND_KVPAIR, // ( dict key value -- dict' )
|
|
|
|
|
INSN_GET_MEMBER, // ( list/dict key -- value )
|
|
|
|
|
INSN_GET_BY_INDEX_KEEP, // ( list/dict -- list/dict value ), arg: index
|
|
|
|
|
INSN_VAR_GET, // ( -- value ), arg: string
|
|
|
|
|
INSN_VAR_SET, // ( value -- value ), arg: string
|
|
|
|
|
INSN_VAR_SET_LOCAL, // ( value -- value ), arg: string
|
|
|
|
|
INSN_VAR_NEW, // ( -- ), arg: string
|
|
|
|
|
INSN_VAR_NEW_LOCAL, // ( -- ), arg: string
|
|
|
|
|
INSN_MOVE_TO_LOCAL_VAR, // ( value -- ), arg: string
|
|
|
|
|
INSN_NEXT_LINE, // ( -- )
|
|
|
|
|
INSN_SET_LINE, // ( -- ), arg: count (new line number)
|
|
|
|
|
INSN_DROP, // ( value -- )
|
2022-11-19 21:06:23 +00:00
|
|
|
INSN_DUP, // ( value -- value value)
|
2022-07-28 18:46:32 +00:00
|
|
|
INSN_CALL, // ( func list -- value )
|
2022-08-12 22:50:26 +00:00
|
|
|
INSN_FUNC, // ( -- func ), arg: count
|
2022-11-19 21:06:23 +00:00
|
|
|
INSN_FUNC_ADD_SUBFUNC, // ( func -- func' ), arg: body; pops a matcher from the matcher stack
|
|
|
|
|
INSN_MATCHER_PUSH, // ( -- ), arg: matcher; pushes a matcher onto the matcher stack
|
2022-07-28 18:46:32 +00:00
|
|
|
INSN_MATCHER_SET_VAL, // ( val -- ), arg: index
|
2022-11-19 21:06:23 +00:00
|
|
|
INSN_MATCHER_MUST_MATCH, // ( val -- ); pops a matcher from the matcher stack
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
union instruction_or_arg {
|
|
|
|
|
enum instruction instruction;
|
|
|
|
|
struct apfl_string *string;
|
|
|
|
|
apfl_number number;
|
|
|
|
|
size_t count;
|
2022-07-11 19:41:05 +00:00
|
|
|
size_t index;
|
|
|
|
|
struct instruction_list *body;
|
2022-07-28 18:46:32 +00:00
|
|
|
struct matcher_instruction_list *matcher;
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct instruction_list {
|
|
|
|
|
union instruction_or_arg *instructions;
|
|
|
|
|
size_t len;
|
|
|
|
|
size_t cap;
|
|
|
|
|
|
|
|
|
|
int line;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const char *apfl_instruction_to_string(enum instruction);
|
2022-07-28 18:46:32 +00:00
|
|
|
const char *apfl_matcher_instruction_to_string(enum matcher_instruction);
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
|
|
|
|
|
struct instruction_list *apfl_instructions_new(struct gc *, int line);
|
|
|
|
|
void apfl_instructions_deinit(struct apfl_allocator, struct instruction_list *);
|
|
|
|
|
|
|
|
|
|
void apfl_gc_instructions_traverse(struct instruction_list *, gc_visitor, void *);
|
2022-11-19 21:06:23 +00:00
|
|
|
void apfl_gc_matcher_instructions_traverse(struct matcher_instruction_list *, gc_visitor, void *);
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
|
2022-07-28 18:46:32 +00:00
|
|
|
struct matcher_instruction_list *apfl_matcher_instructions_new(struct gc *);
|
|
|
|
|
void apfl_matcher_instructions_deinit(struct apfl_allocator, struct matcher_instruction_list *);
|
|
|
|
|
|
2022-08-12 22:50:26 +00:00
|
|
|
bool apfl_bytecode_dump_matcher(unsigned indent, struct apfl_format_writer w, struct matcher_instruction_list *milist);
|
|
|
|
|
bool apfl_bytecode_dump(unsigned indent, struct apfl_format_writer w, struct instruction_list *ilist);
|
2022-07-28 18:49:29 +00:00
|
|
|
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif
|