apfl/src/context.c

472 lines
11 KiB
C
Raw Normal View History

Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "apfl.h"
#include "alloc.h"
#include "context.h"
#include "gc.h"
#include "hashmap.h"
#include "resizable.h"
2022-04-22 21:17:28 +00:00
#include "strings.h"
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
#include "value.h"
static struct stack *
stack_new(struct gc *gc)
{
struct stack *stack = apfl_gc_new_stack(gc);
if (stack == NULL) {
return NULL;
}
*stack = (struct stack) {
.items = NULL,
.len = 0,
.cap = 0,
};
return stack;
}
void
apfl_stack_deinit(struct apfl_allocator allocator, struct stack *stack)
{
FREE_LIST(allocator, stack->items, stack->cap);
}
bool
apfl_stack_push(apfl_ctx ctx, struct apfl_value value)
{
return apfl_resizable_append(
ctx->gc.allocator,
sizeof(struct apfl_value),
(void **)&ctx->stack->items,
&ctx->stack->len,
&ctx->stack->cap,
&value,
1
);
}
bool
apfl_stack_check_index(apfl_ctx ctx, apfl_stackidx *index)
{
if (*index < 0) {
if ((size_t)-*index > ctx->stack->len) {
return false;
}
*index = ctx->stack->len + *index;
} else if ((size_t)*index >= ctx->stack->len) {
return false;
}
assert(0 <= *index && (size_t)*index < ctx->stack->len);
return true;
}
static int
cmp_stackidx(const void *_a, const void *_b)
{
const apfl_stackidx *a = _a;
const apfl_stackidx *b = _b;
return *a - *b;
}
bool apfl_stack_drop_multi(apfl_ctx ctx, size_t count, apfl_stackidx *indices)
{
for (size_t i = 0; i < count; i++) {
if (!apfl_stack_check_index(ctx, &indices[i])) {
return false;
}
}
qsort(indices, count, sizeof(apfl_stackidx), cmp_stackidx);
for (size_t i = count; i-- > 0; ) {
// Will not fail, as we've already checked the indices
assert(apfl_resizable_cut_without_resize(
sizeof(struct apfl_value),
(void **)&ctx->stack->items,
&ctx->stack->len,
indices[i],
1
));
}
// TODO: Shrink stack
return true;
}
bool
apfl_stack_pop(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx index)
{
if (!apfl_stack_check_index(ctx, &index)) {
return false;
}
*value = ctx->stack->items[index];
assert(apfl_resizable_splice(
ctx->gc.allocator,
sizeof(struct apfl_value),
(void **)ctx->stack,
&ctx->stack->len,
&ctx->stack->cap,
index,
1,
NULL,
0
));
return true;
}
static struct apfl_value *
stack_get_pointer(apfl_ctx ctx, apfl_stackidx index)
{
if (!apfl_stack_check_index(ctx, &index)) {
return NULL;
}
return &ctx->stack->items[index];
}
static bool
stack_get_and_adjust_index(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx *index)
{
if (!apfl_stack_check_index(ctx, index)) {
return false;
}
*value = ctx->stack->items[*index];
return true;
}
bool
apfl_stack_get(apfl_ctx ctx, struct apfl_value *value, apfl_stackidx index)
{
return stack_get_and_adjust_index(ctx, value, &index);
}
struct apfl_value *
apfl_stack_push_placeholder(apfl_ctx ctx)
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
{
if (!apfl_stack_push(ctx, (struct apfl_value) {.type = VALUE_NIL})) {
return NULL;
}
return stack_get_pointer(ctx, -1);
}
bool
apfl_stack_drop(apfl_ctx ctx, apfl_stackidx index)
{
struct apfl_value value;
return apfl_stack_pop(ctx, &value, index);
}
2022-04-21 19:15:20 +00:00
void
apfl_stack_clear(apfl_ctx ctx)
{
ctx->stack->len = 0;
}
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
apfl_ctx
apfl_ctx_new(struct apfl_allocator base_allocator)
{
apfl_ctx ctx = ALLOC_OBJ(base_allocator, struct apfl_ctx_data);
if (ctx == NULL) {
return NULL;
}
if (!apfl_gc_init(&ctx->gc, base_allocator)) {
FREE_OBJ(base_allocator, ctx);
return NULL;
}
if ((ctx->scope = apfl_scope_new(&ctx->gc)) == NULL) {
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
goto error;
}
if (!apfl_gc_root_add(&ctx->gc, GC_OBJECT_FROM(ctx->scope, GC_TYPE_SCOPE))) {
goto error;
}
if ((ctx->stack = stack_new(&ctx->gc)) == NULL) {
goto error;
}
if (!apfl_gc_root_add(&ctx->gc, GC_OBJECT_FROM(ctx->stack, GC_TYPE_STACK))) {
goto error;
}
return ctx;
error:
apfl_ctx_destroy(ctx);
return NULL;
}
void
apfl_ctx_destroy(apfl_ctx ctx)
{
if (ctx == NULL) {
return;
}
struct apfl_allocator base_allocator = ctx->gc.base_allocator;
apfl_gc_full(&ctx->gc);
apfl_gc_deinit(&ctx->gc);
FREE_OBJ(base_allocator, ctx);
}
#define CREATE_GC_OBJECT_VALUE_ON_STACK(ctx, TYPE, MEMB, NEW) \
struct apfl_value *value = apfl_stack_push_placeholder(ctx); \
if (value == NULL) { \
return APFL_RESULT_ERR_FATAL; \
} \
\
struct apfl_value new_value = {.type = TYPE}; \
if ((new_value.MEMB = NEW) == NULL) { \
assert(apfl_stack_drop(ctx, -1)); \
return APFL_RESULT_ERR_FATAL; \
} \
\
*value = new_value; \
\
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
return APFL_RESULT_OK;
enum apfl_result
apfl_push_nil(apfl_ctx ctx)
{
return apfl_stack_push(ctx, (struct apfl_value) {
.type = VALUE_NIL,
}) ? APFL_RESULT_OK : APFL_RESULT_ERR;
}
enum apfl_result
apfl_push_bool(apfl_ctx ctx, bool b)
{
return apfl_stack_push(ctx, (struct apfl_value) {
.type = VALUE_BOOLEAN,
.boolean = b,
}) ? APFL_RESULT_OK : APFL_RESULT_ERR;
}
enum apfl_result
apfl_push_number(apfl_ctx ctx, apfl_number num)
{
return apfl_stack_push(ctx, (struct apfl_value) {
.type = VALUE_NUMBER,
.number = num,
}) ? APFL_RESULT_OK : APFL_RESULT_ERR;
}
static struct apfl_string *
new_copied_string(struct gc *gc, struct apfl_string_view sv)
{
struct apfl_string s = apfl_string_blank();
if (!apfl_string_copy(gc->allocator, &s, sv)) {
return NULL;
}
struct apfl_string *out = apfl_string_move_into_new_gc_string(gc, &s);
if (out == NULL) {
apfl_string_deinit(gc->allocator, &s);
return NULL;
}
return out;
}
enum apfl_result
apfl_push_string_view_copy(apfl_ctx ctx, struct apfl_string_view sv)
{
CREATE_GC_OBJECT_VALUE_ON_STACK(
ctx,
VALUE_STRING,
string,
new_copied_string(&ctx->gc, sv)
)
}
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
enum apfl_result
apfl_list_create(apfl_ctx ctx, size_t initial_cap)
{
CREATE_GC_OBJECT_VALUE_ON_STACK(
ctx,
VALUE_LIST,
list,
apfl_list_new(&ctx->gc, initial_cap)
)
}
enum apfl_result
apfl_list_append(apfl_ctx ctx, apfl_stackidx list_index, apfl_stackidx value_index)
{
struct apfl_value *list_val = stack_get_pointer(ctx, list_index);
if (list_val == NULL) {
return APFL_RESULT_ERR;
}
if (list_val->type != VALUE_LIST) {
return APFL_RESULT_ERR;
}
struct apfl_value value;
if (!apfl_stack_get(ctx, &value, value_index)) {
return APFL_RESULT_ERR;
}
enum apfl_result result = apfl_list_splice(
&ctx->gc,
&list_val->list,
list_val->list->len,
0,
&value,
1
)
? APFL_RESULT_OK
: APFL_RESULT_ERR;
assert(apfl_stack_drop(ctx, value_index));
return result;
}
enum apfl_result
apfl_list_append_list(apfl_ctx ctx, apfl_stackidx dst_index, apfl_stackidx src_index)
{
struct apfl_value *dst_val = stack_get_pointer(ctx, dst_index);
if (dst_val == NULL) {
return APFL_RESULT_ERR;
}
if (dst_val->type != VALUE_LIST) {
return APFL_RESULT_ERR;
}
struct apfl_value src_val;
if (!apfl_stack_get(ctx, &src_val, src_index)) {
return APFL_RESULT_ERR;
}
if (dst_val->type != VALUE_LIST) {
assert(apfl_stack_drop(ctx, src_index));
return APFL_RESULT_ERR;
}
enum apfl_result result = apfl_list_splice(
&ctx->gc,
&dst_val->list,
dst_val->list->len,
0,
src_val.list->items,
src_val.list->len
)
? APFL_RESULT_OK
: APFL_RESULT_ERR;
assert(apfl_stack_drop(ctx, src_index));
return result;
}
enum apfl_result
apfl_dict_create(apfl_ctx ctx)
{
CREATE_GC_OBJECT_VALUE_ON_STACK(
ctx,
VALUE_DICT,
dict,
apfl_dict_new(&ctx->gc)
)
}
enum apfl_result
apfl_dict_set(
apfl_ctx ctx,
apfl_stackidx dict_index,
apfl_stackidx k_index,
apfl_stackidx v_index
) {
struct apfl_value k;
struct apfl_value v;
struct apfl_value *dict_value;
if (
!apfl_stack_get(ctx, &k, k_index)
|| !apfl_stack_get(ctx, &v, v_index)
|| (dict_value = stack_get_pointer(ctx, dict_index)) == NULL
) {
return APFL_RESULT_ERR;
}
if (dict_value->type != VALUE_DICT) {
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
return APFL_RESULT_ERR;
}
if (!apfl_dict_set_raw(&ctx->gc, &dict_value->dict, k, v)) {
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
return APFL_RESULT_ERR;
}
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, v_index}));
return APFL_RESULT_OK;
}
static enum apfl_result
apfl_get_member_inner(
apfl_ctx ctx,
struct apfl_value container,
struct apfl_value k
) {
struct apfl_value *value = apfl_stack_push_placeholder(ctx);
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
if (value == NULL) {
return APFL_RESULT_ERR;
}
if (apfl_value_get_item(container, k, value) != GET_ITEM_OK) {
assert(apfl_stack_drop(ctx, -1));
return APFL_RESULT_ERR;
}
return APFL_RESULT_OK;
}
enum apfl_result
apfl_get_member(
apfl_ctx ctx,
apfl_stackidx container_index,
apfl_stackidx k_index
) {
struct apfl_value container;
struct apfl_value k;
if (
!stack_get_and_adjust_index(ctx, &container, &container_index)
|| !stack_get_and_adjust_index(ctx, &k, &k_index)
) {
return APFL_RESULT_ERR;
}
enum apfl_result result = apfl_get_member_inner(ctx, container, k);
assert(apfl_stack_drop_multi(ctx, 2, (apfl_stackidx[]){k_index, container_index}));
return result;
}
void
apfl_gc_stack_traverse(struct stack *stack, gc_visitor visitor, void *opaque)
{
for (size_t i = 0; i < stack->len; i++) {
struct gc_object *object = apfl_value_get_gc_object(stack->items[i]);
if (object != NULL) {
visitor(opaque, object);
}
}
}