Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#include "apfl.h"
|
|
|
|
|
#include "alloc.h"
|
|
|
|
|
#include "context.h"
|
|
|
|
|
#include "gc.h"
|
|
|
|
|
#include "hashmap.h"
|
|
|
|
|
#include "resizable.h"
|
|
|
|
|
#include "value.h"
|
|
|
|
|
|
|
|
|
|
static struct stack *
|
|
|
|
|
stack_new(struct gc *gc)
|
|
|
|
|
{
|
|
|
|
|
struct stack *stack = apfl_gc_new_stack(gc);
|
|
|
|
|
if (stack == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*stack = (struct stack) {
|
|
|
|
|
.items = NULL,
|
|
|
|
|
.len = 0,
|
|
|
|
|
.cap = 0,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return stack;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_stack_deinit(struct apfl_allocator allocator, struct stack *stack)
|
|
|
|
|
{
|
|
|
|
|
FREE_LIST(allocator, stack->items, stack->cap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_stack_push(apfl_ctx ctx, struct apfl_value value)
|
|
|
|
|
{
|
|
|
|
|
return apfl_resizable_append(
|
|
|
|
|
ctx->gc.allocator,
|
|
|
|
|
sizeof(struct apfl_value),
|
|
|
|
|
(void **)&ctx->stack->items,
|
|
|
|
|
&ctx->stack->len,
|
|
|
|
|
&ctx->stack->cap,
|
|
|
|
|
&value,
|
|
|
|
|
1
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_stack_check_index(apfl_ctx ctx, apfl_stackidx *index)
|
|
|
|
|
{
|
|
|
|
|
if (*index < 0) {
|
|
|
|
|
if ((size_t)-*index > ctx->stack->len) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
*index = ctx->stack->len + *index;
|
|
|
|
|
} else if ((size_t)*index >= ctx->stack->len) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(0 <= *index && (size_t)*index < ctx->stack->len);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
cmp_stackidx(const void *_a, const void *_b)
|
|
|
|
|
{
|
|
|
|
|
const apfl_stackidx *a = _a;
|
|
|
|
|
const apfl_stackidx *b = _b;
|
|
|
|
|
return *a - *b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool apfl_stack_drop_multi(apfl_ctx ctx, size_t count, apfl_stackidx *indices)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < count; i++) {
|
|
|
|
|
if (!apfl_stack_check_index(ctx, &indices[i])) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
qsort(indices, count, sizeof(apfl_stackidx), cmp_stackidx);
|
|
|
|
|
|
|
|
|
|
for (size_t i = count; i-- > 0; ) {
|
|
|
|
|
// Will not fail, as we've already checked the indices
|
|
|
|
|
assert(apfl_resizable_cut_without_resize(
|
|
|
|
|
sizeof(struct apfl_value),
|
|
|
|
|
(void **)&ctx->stack->items,
|
|
|
|
|
&ctx->stack->len,
|
|
|
|
|
indices[i],
|
|
|
|
|
1
|
|
|
|
|
));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: Shrink stack
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_stack_pop(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx index)
|
|
|
|
|
{
|
|
|
|
|
if (!apfl_stack_check_index(ctx, &index)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*value = ctx->stack->items[index];
|
|
|
|
|
|
|
|
|
|
assert(apfl_resizable_splice(
|
|
|
|
|
ctx->gc.allocator,
|
|
|
|
|
sizeof(struct apfl_value),
|
|
|
|
|
(void **)ctx->stack,
|
|
|
|
|
&ctx->stack->len,
|
|
|
|
|
&ctx->stack->cap,
|
|
|
|
|
index,
|
|
|
|
|
1,
|
|
|
|
|
NULL,
|
|
|
|
|
0
|
|
|
|
|
));
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_value *
|
|
|
|
|
stack_get_pointer(apfl_ctx ctx, apfl_stackidx index)
|
|
|
|
|
{
|
|
|
|
|
if (!apfl_stack_check_index(ctx, &index)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return &ctx->stack->items[index];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
stack_get_and_adjust_index(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx *index)
|
|
|
|
|
{
|
|
|
|
|
if (!apfl_stack_check_index(ctx, index)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*value = ctx->stack->items[*index];
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_stack_get(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx index)
|
|
|
|
|
{
|
|
|
|
|
return stack_get_and_adjust_index(ctx, value, &index);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_value *
|
|
|
|
|
stack_push_placeholder(apfl_ctx ctx)
|
|
|
|
|
{
|
|
|
|
|
if (!apfl_stack_push(ctx, (struct apfl_value) {.type = VALUE_NIL})) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return stack_get_pointer(ctx, -1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
apfl_stack_drop(apfl_ctx ctx, apfl_stackidx index)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_value value;
|
|
|
|
|
return apfl_stack_pop(ctx, &value, index);
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-21 19:15:20 +00:00
|
|
|
void
|
|
|
|
|
apfl_stack_clear(apfl_ctx ctx)
|
|
|
|
|
{
|
|
|
|
|
ctx->stack->len = 0;
|
|
|
|
|
}
|
|
|
|
|
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
apfl_ctx
|
|
|
|
|
apfl_ctx_new(struct apfl_allocator base_allocator)
|
|
|
|
|
{
|
|
|
|
|
apfl_ctx ctx = ALLOC_OBJ(base_allocator, struct apfl_ctx_data);
|
|
|
|
|
if (ctx == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_gc_init(&ctx->gc, base_allocator)) {
|
|
|
|
|
FREE_OBJ(base_allocator, ctx);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-15 12:41:22 +00:00
|
|
|
if ((ctx->scope = apfl_scope_new(&ctx->gc)) == NULL) {
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_gc_root_add(&ctx->gc, GC_OBJECT_FROM(ctx->scope, GC_TYPE_SCOPE))) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((ctx->stack = stack_new(&ctx->gc)) == NULL) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_gc_root_add(&ctx->gc, GC_OBJECT_FROM(ctx->stack, GC_TYPE_STACK))) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ctx;
|
|
|
|
|
|
|
|
|
|
error:
|
|
|
|
|
apfl_ctx_destroy(ctx);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_ctx_destroy(apfl_ctx ctx)
|
|
|
|
|
{
|
|
|
|
|
if (ctx == NULL) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_allocator base_allocator = ctx->gc.base_allocator;
|
|
|
|
|
|
|
|
|
|
apfl_gc_full(&ctx->gc);
|
|
|
|
|
apfl_gc_deinit(&ctx->gc);
|
|
|
|
|
|
|
|
|
|
FREE_OBJ(base_allocator, ctx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define CREATE_GC_OBJECT_VALUE_ON_STACK(ctx, TYPE, MEMB, NEW) \
|
|
|
|
|
struct apfl_value *value = stack_push_placeholder(ctx); \
|
|
|
|
|
if (value == NULL) { \
|
|
|
|
|
return APFL_RESULT_ERR_FATAL; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
struct apfl_value new_value = {.type = TYPE}; \
|
|
|
|
|
if ((new_value.MEMB = NEW) == NULL) { \
|
|
|
|
|
assert(apfl_stack_drop(ctx, -1)); \
|
|
|
|
|
return APFL_RESULT_ERR_FATAL; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
*value = new_value; \
|
|
|
|
|
\
|
|
|
|
|
return APFL_RESULT_OK;
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_list_create(apfl_ctx ctx, size_t initial_cap)
|
|
|
|
|
{
|
|
|
|
|
CREATE_GC_OBJECT_VALUE_ON_STACK(
|
|
|
|
|
ctx,
|
|
|
|
|
VALUE_LIST,
|
|
|
|
|
list,
|
|
|
|
|
apfl_list_new(&ctx->gc, initial_cap)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_list_append(apfl_ctx ctx, apfl_stackidx list_index, apfl_stackidx value_index)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_value *list_val = stack_get_pointer(ctx, list_index);
|
|
|
|
|
if (list_val == NULL) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (list_val->type != VALUE_LIST) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_value value;
|
|
|
|
|
if (!apfl_stack_get(ctx, &value, value_index)) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result result = apfl_list_splice(
|
|
|
|
|
&ctx->gc,
|
|
|
|
|
&list_val->list,
|
|
|
|
|
list_val->list->len,
|
|
|
|
|
0,
|
|
|
|
|
&value,
|
|
|
|
|
1
|
|
|
|
|
)
|
|
|
|
|
? APFL_RESULT_OK
|
|
|
|
|
: APFL_RESULT_ERR;
|
|
|
|
|
|
|
|
|
|
assert(apfl_stack_drop(ctx, value_index));
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_list_append_list(apfl_ctx ctx, apfl_stackidx dst_index, apfl_stackidx src_index)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_value *dst_val = stack_get_pointer(ctx, dst_index);
|
|
|
|
|
if (dst_val == NULL) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dst_val->type != VALUE_LIST) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_value src_val;
|
|
|
|
|
if (!apfl_stack_get(ctx, &src_val, src_index)) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dst_val->type != VALUE_LIST) {
|
|
|
|
|
assert(apfl_stack_drop(ctx, src_index));
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result result = apfl_list_splice(
|
|
|
|
|
&ctx->gc,
|
|
|
|
|
&dst_val->list,
|
|
|
|
|
dst_val->list->len,
|
|
|
|
|
0,
|
|
|
|
|
src_val.list->items,
|
|
|
|
|
src_val.list->len
|
|
|
|
|
)
|
|
|
|
|
? APFL_RESULT_OK
|
|
|
|
|
: APFL_RESULT_ERR;
|
|
|
|
|
|
|
|
|
|
assert(apfl_stack_drop(ctx, src_index));
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_dict_create(apfl_ctx ctx)
|
|
|
|
|
{
|
|
|
|
|
CREATE_GC_OBJECT_VALUE_ON_STACK(
|
|
|
|
|
ctx,
|
|
|
|
|
VALUE_DICT,
|
|
|
|
|
dict,
|
|
|
|
|
apfl_dict_new(&ctx->gc)
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_dict_set(
|
|
|
|
|
apfl_ctx ctx,
|
|
|
|
|
apfl_stackidx dict_index,
|
|
|
|
|
apfl_stackidx k_index,
|
|
|
|
|
apfl_stackidx v_index
|
|
|
|
|
) {
|
|
|
|
|
struct apfl_value k;
|
|
|
|
|
struct apfl_value v;
|
|
|
|
|
struct apfl_value *dict_value;
|
|
|
|
|
|
|
|
|
|
if (
|
|
|
|
|
!apfl_stack_get(ctx, &k, k_index)
|
|
|
|
|
|| !apfl_stack_get(ctx, &v, v_index)
|
|
|
|
|
|| (dict_value = stack_get_pointer(ctx, dict_index)) == NULL
|
|
|
|
|
) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dict_value->type != VALUE_DICT) {
|
|
|
|
|
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_dict_set_raw(&ctx->gc, &dict_value->dict, k, v)) {
|
|
|
|
|
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
|
|
|
|
|
return APFL_RESULT_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_result
|
|
|
|
|
apfl_get_member_inner(
|
|
|
|
|
apfl_ctx ctx,
|
|
|
|
|
struct apfl_value container,
|
|
|
|
|
struct apfl_value k
|
|
|
|
|
) {
|
|
|
|
|
struct apfl_value *value = stack_push_placeholder(ctx);
|
|
|
|
|
if (value == NULL) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (apfl_value_get_item(container, k, value) != GET_ITEM_OK) {
|
|
|
|
|
assert(apfl_stack_drop(ctx, -1));
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return APFL_RESULT_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result
|
|
|
|
|
apfl_get_member(
|
|
|
|
|
apfl_ctx ctx,
|
|
|
|
|
apfl_stackidx container_index,
|
|
|
|
|
apfl_stackidx k_index
|
|
|
|
|
) {
|
|
|
|
|
struct apfl_value container;
|
|
|
|
|
struct apfl_value k;
|
|
|
|
|
if (
|
|
|
|
|
!stack_get_and_adjust_index(ctx, &container, &container_index)
|
|
|
|
|
|| !stack_get_and_adjust_index(ctx, &k, &k_index)
|
|
|
|
|
) {
|
|
|
|
|
return APFL_RESULT_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum apfl_result result = apfl_get_member_inner(ctx, container, k);
|
|
|
|
|
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, container_index}));
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_gc_stack_traverse(struct stack *stack, gc_visitor visitor, void *opaque)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < stack->len; i++) {
|
|
|
|
|
struct gc_object *object = apfl_value_get_gc_object(stack->items[i]);
|
|
|
|
|
if (object != NULL) {
|
|
|
|
|
visitor(opaque, object);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|