apfl/src/gc.c

546 lines
14 KiB
C
Raw Normal View History

Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
#include <assert.h>
#include <stdio.h>
#include "alloc.h"
#include "bytecode.h"
#include "context.h"
#include "gc.h"
#include "hashmap.h"
#include "resizable.h"
#include "value.h"
#define DEBUG_GC 1
struct gc_object {
// Unlike most other tagged unions in apfl, the union is first here.
// This allows us to have pointers to the wrapped object that can be cast
// into gc_object pointers and vice versa.
union {
struct list_header list;
struct dict_header dict;
struct apfl_value var;
struct apfl_string string;
struct instruction_list instructions;
struct scope scope;
struct stack stack;
};
enum gc_type type;
enum gc_status status;
};
#define GC_OBJECTS_PER_BLOCK 128
struct gc_block {
struct gc_object objects[GC_OBJECTS_PER_BLOCK];
struct gc_block *next;
};
static void *
gc_allocator(void *opaque, void *oldptr, size_t oldsize, size_t newsize)
{
struct gc *gc = opaque;
void *out = ALLOCATOR_CALL(gc->base_allocator, oldptr, oldsize, newsize);
if (newsize != 0 && out == NULL) {
// We're out of memory! Try to get out of this situation by doing a full
// GC run.
apfl_gc_full(gc);
// Hopefully we now have memory again. Try the allocation again.
out = ALLOCATOR_CALL(gc->base_allocator, oldptr, oldsize, newsize);
}
if (newsize != 0 && out == NULL) {
return NULL;
}
// TODO: incremental GC step
return out;
}
struct gc_object *
apfl_gc_object_from_ptr(void *ptr, enum gc_type type)
{
struct gc_object *object = ptr;
assert(object->type == type);
return object;
}
bool
apfl_gc_init(struct gc *gc, struct apfl_allocator allocator)
{
gc->base_allocator = allocator;
gc->allocator = (struct apfl_allocator) {
.opaque = gc,
.alloc = gc_allocator,
};
gc->block = NULL;
gc->tmproots = (struct gc_tmproots) {
.roots = NULL,
.len = 0,
.cap = 0,
};
gc->tmproot_for_adding = NULL;
// TODO: It's a bit wasteful that we use a hashmap here. We are only
// ever interested in the keys.
return apfl_hashmap_init(
&gc->roots,
allocator,
(struct apfl_hashmap_callbacks) {.opaque = NULL},
sizeof(struct gc_object *),
sizeof(char)
);
}
static struct gc_block *
new_block(struct gc *gc)
{
struct gc_block *block = ALLOC_OBJ(gc->allocator, struct gc_block);
if (block == NULL) {
return NULL;
}
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
block->objects[i] = (struct gc_object) { .status = GC_STATUS_FREE };
}
block->next = NULL;
return block;
}
static struct gc_object *
new_object_inner(struct gc *gc)
{
struct gc_block **cur = &gc->block;
while (*cur != NULL) {
struct gc_block *block = *cur;
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
if (block->objects[i].status == GC_STATUS_FREE) {
return &block->objects[i];
}
}
cur = &block->next;
}
struct gc_block *nb = new_block(gc);
if (nb == NULL) {
return NULL;
}
*cur = nb;
return &nb->objects[0];
}
static struct gc_object *
new_object(struct gc *gc, enum gc_type type)
{
struct gc_object *object = new_object_inner(gc);
if (object == NULL) {
return NULL;
}
assert(object->status == GC_STATUS_FREE);
object->status = GC_STATUS_WHITE;
object->type = type;
return object;
}
#define IMPL_NEW(t, name, type, field) \
t * \
name(struct gc *gc) \
{ \
struct gc_object *object = new_object(gc, type); \
return object == NULL ? NULL : &object->field; \
}
IMPL_NEW(struct list_header, apfl_gc_new_list, GC_TYPE_LIST, list )
IMPL_NEW(struct dict_header, apfl_gc_new_dict, GC_TYPE_DICT, dict )
IMPL_NEW(struct apfl_value, apfl_gc_new_var, GC_TYPE_VAR, var )
IMPL_NEW(struct apfl_string, apfl_gc_new_string, GC_TYPE_STRING, string )
IMPL_NEW(struct instruction_list, apfl_gc_new_instructions, GC_TYPE_INSTRUCTIONS, instructions )
IMPL_NEW(struct scope, apfl_gc_new_scope, GC_TYPE_SCOPE, scope )
IMPL_NEW(struct stack, apfl_gc_new_stack, GC_TYPE_STACK, stack )
bool
apfl_gc_root_add(struct gc *gc, struct gc_object *object)
{
// Since setting the new root can trigger a garbage collection, we need to
// set the root as the tmproot_for_adding, so we'll treat it as a root and
// not free it.
assert(gc->tmproot_for_adding == NULL);
gc->tmproot_for_adding = object;
char v = 0;
bool ok = apfl_hashmap_set(&gc->roots, &object, &v);
gc->tmproot_for_adding = NULL;
return ok;
}
void
apfl_gc_root_remove(struct gc *gc, struct gc_object *object)
{
apfl_hashmap_delete(&gc->roots, &object);
}
size_t
apfl_gc_tmproots_begin(struct gc *gc)
{
return gc->tmproots.len;
}
void
apfl_gc_tmproots_restore(struct gc *gc, size_t newlen)
{
assert(newlen <= gc->tmproots.len);
gc->tmproots.len = newlen;
}
bool
apfl_gc_tmproot_add(struct gc *gc, struct gc_object *object)
{
// Since appending the new tmproot can trigger a garbage collection, we need
// to set the tmproot as the tmproot_for_adding, so we'll treat it as a root
// and not free it.
assert(gc->tmproot_for_adding == NULL);
gc->tmproot_for_adding = object;
bool ok = apfl_resizable_append(
gc->allocator,
sizeof(struct gc_object *),
(void **)&gc->tmproots.roots,
&gc->tmproots.len,
&gc->tmproots.cap,
&object,
1
);
gc->tmproot_for_adding = NULL;
return ok;
}
static void
color_object_grey(struct gc_object *object)
{
object->status = object->status == GC_STATUS_BLACK ? GC_STATUS_BLACK : GC_STATUS_GREY;
}
static void
visit_roots(struct gc *gc, gc_visitor visitor, void *opaque)
{
HASHMAP_EACH(&gc->roots, cur) {
struct gc_object *obj;
assert(apfl_hashmap_cursor_get_key(cur, &obj));
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
visitor(opaque, obj);
}
for (size_t i = 0; i < gc->tmproots.len; i++) {
visitor(opaque, gc->tmproots.roots[i]);
}
if (gc->tmproot_for_adding != NULL) {
visitor(opaque, gc->tmproot_for_adding);
}
}
static void
mark_roots_visitor(void *opaque, struct gc_object *root)
{
(void)opaque;
color_object_grey(root);
}
static void
mark_roots(struct gc *gc)
{
visit_roots(gc, mark_roots_visitor, NULL);
}
static void
visit_children(struct gc_object *object, gc_visitor cb, void *opaque)
{
switch (object->type) {
case GC_TYPE_LIST:
apfl_gc_list_traverse(&object->list, cb, opaque);
return;
case GC_TYPE_DICT:
apfl_gc_dict_traverse(&object->dict, cb, opaque);
return;
case GC_TYPE_VAR:
apfl_gc_var_traverse(&object->var, cb, opaque);
return;
case GC_TYPE_SCOPE:
apfl_gc_scope_traverse(&object->scope, cb, opaque);
return;
case GC_TYPE_STRING:
// Intentionally left blank. Object doesn't reference other objects.
return;
case GC_TYPE_INSTRUCTIONS:
apfl_gc_instructions_traverse(&object->instructions, cb, opaque);
return;
case GC_TYPE_STACK:
apfl_gc_stack_traverse(&object->stack, cb, opaque);
return;
}
assert(false);
}
static void
trace_callback(void *opaque, struct gc_object *object)
{
(void)opaque;
color_object_grey(object);
}
static void
trace(struct gc_object *object)
{
object->status = GC_STATUS_BLACK;
visit_children(object, trace_callback, NULL);
}
static void
trace_while_having_grey(struct gc *gc)
{
bool found_grey;
do {
found_grey = false;
for (
struct gc_block *cur = gc->block;
cur != NULL;
cur = cur->next
) {
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &cur->objects[i];
if (object->status == GC_STATUS_GREY) {
trace(object);
found_grey = true;
}
}
}
} while (found_grey);
}
static void
deinit_object(struct gc *gc, struct gc_object *object)
{
switch (object->type) {
case GC_TYPE_LIST:
apfl_list_deinit(gc->allocator, &object->list);
return;
case GC_TYPE_DICT:
apfl_dict_deinit(&object->dict);
return;
case GC_TYPE_VAR:
return;
case GC_TYPE_STRING:
apfl_string_deinit(gc->allocator, &object->string);
return;
case GC_TYPE_INSTRUCTIONS:
apfl_instructions_deinit(gc->allocator, &object->instructions);
return;
case GC_TYPE_SCOPE:
apfl_scope_deinit(gc->allocator, &object->scope);
return;
case GC_TYPE_STACK:
apfl_stack_deinit(gc->allocator, &object->stack);
return;
}
assert(false);
}
static void
sweep(struct gc *gc)
{
struct gc_block **cur = &gc->block;
while (*cur != NULL) {
struct gc_block *block = *cur;
bool completely_free = true;
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &block->objects[i];
switch (object->status) {
case GC_STATUS_FREE:
break;
case GC_STATUS_WHITE:
deinit_object(gc, object);
object->status = GC_STATUS_FREE;
break;
case GC_STATUS_GREY:
assert(false /*Encountered grey object while sweeping*/);
break;
case GC_STATUS_BLACK:
object->status = GC_STATUS_WHITE; // Prepare for next run
completely_free = false;
break;
}
}
if (completely_free) {
*cur = block->next;
FREE_OBJ(gc->allocator, block);
} else {
cur = &block->next;
}
}
}
void
apfl_gc_full(struct gc *gc)
{
mark_roots(gc);
apfl_gc_debug_dump_graph(gc, stderr);
trace_while_having_grey(gc);
apfl_gc_debug_dump_graph(gc, stderr);
sweep(gc);
apfl_gc_debug_dump_graph(gc, stderr);
}
static const char *
dump_graph_bgcolor(enum gc_status status)
{
switch (status) {
case GC_STATUS_BLACK:
return "black";
case GC_STATUS_GREY:
return "grey";
default:
return "white";
}
}
static const char *
dump_graph_fgcolor(enum gc_status status)
{
switch (status) {
case GC_STATUS_BLACK:
return "white";
default:
return "black";
}
}
static const char *
type_to_string(enum gc_type type)
{
switch (type) {
case GC_TYPE_LIST:
return "list";
case GC_TYPE_DICT:
return "dict";
case GC_TYPE_VAR:
return "var";
case GC_TYPE_STRING:
return "string";
case GC_TYPE_INSTRUCTIONS:
return "instructions";
case GC_TYPE_SCOPE:
return "scope";
case GC_TYPE_STACK:
return "stack";
}
assert(false);
return "???";
}
static void
dump_graph_roots_visitor(void *opaque, struct gc_object *obj)
{
FILE *out = opaque;
fprintf(out, "ROOTS -> obj_%p;\n", (void *)obj);
}
struct dump_graph_visitor_data {
FILE *out;
struct gc_object *parent;
};
static void
dump_graph_visitor(void *opaque, struct gc_object *obj)
{
struct dump_graph_visitor_data *data = opaque;
fprintf(data->out, "obj_%p -> obj_%p\n", (void *)data->parent, (void *)obj);
}
void
apfl_gc_debug_dump_graph(struct gc *gc, FILE *out)
{
fprintf(out, "digraph G {\n");
visit_roots(gc, dump_graph_roots_visitor, out);
for (struct gc_block *block = gc->block; block != NULL; block = block->next) {
int counts[4] = {0, 0, 0, 0};
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *obj = &block->objects[i];
counts[obj->status]++;
if (obj->status == GC_STATUS_FREE) {
continue;
}
fprintf(out, "blk_%p -> obj_%p;\n", (void *)block, (void *)obj);
fprintf(
out,
"obj_%p [style=filled,fillcolor=%s,fontcolor=%s,label=\"Object %p\\ntype: %s\"];\n",
(void *)obj,
dump_graph_bgcolor(obj->status),
dump_graph_fgcolor(obj->status),
(void *)obj,
type_to_string(obj->type)
);
visit_children(obj, dump_graph_visitor, &(struct dump_graph_visitor_data) {
.out = out,
.parent = obj,
});
}
fprintf(out, "BLOCKS -> blk_%p;\n", (void *)block);
fprintf(
out,
"blk_%p [label=\"Block %p\\nfree %d, black %d, grey %d, white %d\"];\n",
(void *)block,
(void *)block,
counts[GC_STATUS_FREE],
counts[GC_STATUS_BLACK],
counts[GC_STATUS_GREY],
counts[GC_STATUS_WHITE]
);
}
fprintf(out, "}\n");
}
void
apfl_gc_deinit(struct gc *gc)
{
for (struct gc_block *block = gc->block; block != NULL; ) {
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &block->objects[i];
if (object->status != GC_STATUS_FREE) {
deinit_object(gc, object);
}
}
struct gc_block *next = block->next;
FREE_OBJ(gc->allocator, block);
block = next;
}
gc->block = NULL;
FREE_LIST(gc->allocator, gc->tmproots.roots, gc->tmproots.cap);
apfl_hashmap_deinit(&gc->roots);
}