apfl/src/gc.c
Laria Carolin Chabowski 396c5ad866 gc: Tighter coupling to full context
We'll soon need the whole apfl_ctx in the garbage collector.
2023-11-23 21:30:51 +01:00

733 lines
21 KiB
C

#include <assert.h>
#include <stdio.h>
#include <string.h>
#include "alloc.h"
#include "bytecode.h"
#include "context.h"
#include "format.h"
#include "gc.h"
#include "resizable.h"
#include "scope.h"
#include "value.h"
// #define GC_DEBUG_COLLECT_EVERY_ALLOCATION 1
// #define GC_DEBUG_STATS 1
// #define GC_DEBUG_WIPE_RECLAIMED_OBJECTS 1
// #define GC_DEBUG_DUMP_GRAPH_ON_COLLECT 1
// #define GC_DEBUG_LOG_NEW_AND_RECLAIM 1
struct gc_object {
// Unlike most other tagged unions in apfl, the union is first here.
// This allows us to have pointers to the wrapped object that can be cast
// into gc_object pointers and vice versa.
union {
struct list_header list;
struct dict_header dict;
struct apfl_value var;
struct apfl_string string;
struct instruction_list instructions;
struct scope scope;
struct stack stack;
struct function function;
struct cfunction cfunction;
struct matcher_instruction_list matcher_instructions;
struct matcher matcher;
struct native_object native_object;
struct value_pair pair;
struct apfl_string* symbol;
};
enum gc_type type;
enum gc_status status;
};
#define GC_OBJECTS_PER_BLOCK 128
struct gc_block {
struct gc_object objects[GC_OBJECTS_PER_BLOCK];
struct gc_block *next;
};
static void *
gc_allocator(void *opaque, void *oldptr, size_t oldsize, size_t newsize)
{
apfl_ctx ctx = opaque;
struct gc *gc = &ctx->gc;
#ifdef GC_DEBUG_COLLECT_EVERY_ALLOCATION
if (newsize != 0 && !gc->is_collecting) {
apfl_gc_full(gc);
}
#endif
void *out = ALLOCATOR_CALL(gc->base_allocator, oldptr, oldsize, newsize);
if (newsize != 0 && out == NULL && !gc->is_collecting) {
// We're out of memory! Try to get out of this situation by doing a full
// GC run.
apfl_gc_full(ctx);
// Hopefully we now have memory again. Try the allocation again.
out = ALLOCATOR_CALL(gc->base_allocator, oldptr, oldsize, newsize);
}
if (newsize != 0 && out == NULL) {
return NULL;
}
// TODO: incremental GC step
return out;
}
struct gc_object *
apfl_gc_object_from_ptr(void *ptr, enum gc_type type)
{
struct gc_object *object = ptr;
assert(object->type == type);
return object;
}
void
apfl_gc_init(apfl_ctx ctx, struct apfl_allocator allocator)
{
struct gc *gc = &ctx->gc;
gc->base_allocator = allocator;
gc->allocator = (struct apfl_allocator) {
.opaque = ctx,
.alloc = gc_allocator,
};
gc->block = NULL;
gc->tmproots = (struct gc_tmproots) {
.roots = NULL,
.len = 0,
.cap = 0,
};
gc->tmproot_for_adding = NULL;
gc->is_collecting = false;
}
static struct gc_block *
new_block(struct gc *gc)
{
struct gc_block *block = ALLOC_OBJ(gc->allocator, struct gc_block);
if (block == NULL) {
return NULL;
}
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
block->objects[i] = (struct gc_object) { .status = GC_STATUS_FREE };
}
block->next = NULL;
return block;
}
static struct gc_object *
new_object_inner(struct gc *gc)
{
struct gc_block *cur = gc->block;
while (cur != NULL) {
struct gc_block *block = cur;
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
if (block->objects[i].status == GC_STATUS_FREE) {
return &block->objects[i];
}
}
cur = block->next;
}
struct gc_block *nb = new_block(gc);
if (nb == NULL) {
return NULL;
}
nb->next = gc->block;
gc->block = nb;
return &nb->objects[0];
}
static struct gc_object *
new_object(struct gc *gc, enum gc_type type)
{
struct gc_object *object = new_object_inner(gc);
if (object == NULL) {
return NULL;
}
assert(object->status == GC_STATUS_FREE);
object->status = GC_STATUS_WHITE;
object->type = type;
return object;
}
static const char *type_to_string(enum gc_type);
#ifdef GC_DEBUG_LOG_NEW_AND_RECLAIM
# define LOG_NEW_AND_RECLAIM(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
#else
# define LOG_NEW_AND_RECLAIM(fmt, ...)
#endif
#define IMPL_NEW(t, name, type, field) \
t * \
name(struct gc *gc) \
{ \
struct gc_object *object = new_object(gc, type); \
LOG_NEW_AND_RECLAIM("New %s object at %p\n", type_to_string(type), (void *)object); \
return object == NULL ? NULL : &object->field; \
}
IMPL_NEW(struct list_header, apfl_gc_new_list, GC_TYPE_LIST, list )
IMPL_NEW(struct dict_header, apfl_gc_new_dict, GC_TYPE_DICT, dict )
IMPL_NEW(struct apfl_value, apfl_gc_new_var, GC_TYPE_VAR, var )
IMPL_NEW(struct apfl_string, apfl_gc_new_string, GC_TYPE_STRING, string )
IMPL_NEW(struct instruction_list, apfl_gc_new_instructions, GC_TYPE_INSTRUCTIONS, instructions )
IMPL_NEW(struct scope, apfl_gc_new_scope, GC_TYPE_SCOPE, scope )
IMPL_NEW(struct function, apfl_gc_new_func, GC_TYPE_FUNC, function )
IMPL_NEW(struct cfunction, apfl_gc_new_cfunc, GC_TYPE_CFUNC, cfunction )
IMPL_NEW(struct matcher_instruction_list, apfl_gc_new_matcher_instructions, GC_TYPE_MATCHER_INSTRUCTIONS, matcher_instructions)
IMPL_NEW(struct matcher, apfl_gc_new_matcher, GC_TYPE_MATCHER, matcher )
IMPL_NEW(struct native_object, apfl_gc_new_native_object, GC_TYPE_NATIVE_OBJECT, native_object )
IMPL_NEW(struct value_pair, apfl_gc_new_pair, GC_TYPE_PAIR, pair )
IMPL_NEW(struct apfl_string *, apfl_gc_new_symbol, GC_TYPE_SYMBOL, symbol )
size_t
apfl_gc_tmproots_begin(struct gc *gc)
{
return gc->tmproots.len;
}
void
apfl_gc_tmproots_restore(struct gc *gc, size_t newlen)
{
assert(newlen <= gc->tmproots.len);
gc->tmproots.len = newlen;
}
bool
apfl_gc_tmproot_add(struct gc *gc, struct gc_object *object)
{
// Since appending the new tmproot can trigger a garbage collection, we need
// to set the tmproot as the tmproot_for_adding, so we'll treat it as a root
// and not free it.
assert(gc->tmproot_for_adding == NULL);
gc->tmproot_for_adding = object;
bool ok = apfl_resizable_append(
gc->allocator,
sizeof(struct gc_object *),
(void **)&gc->tmproots.roots,
&gc->tmproots.len,
&gc->tmproots.cap,
&object,
1
);
gc->tmproot_for_adding = NULL;
return ok;
}
static void
color_object_grey(struct gc_object *object)
{
object->status = object->status == GC_STATUS_BLACK ? GC_STATUS_BLACK : GC_STATUS_GREY;
}
static void
visit_roots(apfl_ctx ctx, gc_visitor visitor, void *opaque)
{
struct gc *gc = &ctx->gc;
apfl_gc_roots_traverse(ctx, visitor, opaque);
for (size_t i = 0; i < gc->tmproots.len; i++) {
visitor(opaque, gc->tmproots.roots[i]);
}
if (gc->tmproot_for_adding != NULL) {
visitor(opaque, gc->tmproot_for_adding);
}
}
static void
mark_roots_visitor(void *opaque, struct gc_object *root)
{
(void)opaque;
color_object_grey(root);
}
static void
mark_roots(apfl_ctx ctx)
{
visit_roots(ctx, mark_roots_visitor, NULL);
}
static void
visit_children(struct gc_object *object, gc_visitor cb, void *opaque)
{
switch (object->type) {
case GC_TYPE_LIST:
apfl_gc_list_traverse(&object->list, cb, opaque);
return;
case GC_TYPE_DICT:
apfl_gc_dict_traverse(&object->dict, cb, opaque);
return;
case GC_TYPE_VAR:
apfl_gc_var_traverse(&object->var, cb, opaque);
return;
case GC_TYPE_SCOPE:
apfl_gc_scope_traverse(&object->scope, cb, opaque);
return;
case GC_TYPE_STRING:
case GC_TYPE_NATIVE_OBJECT:
// Intentionally left blank. Object doesn't reference other objects.
return;
case GC_TYPE_INSTRUCTIONS:
apfl_gc_instructions_traverse(&object->instructions, cb, opaque);
return;
case GC_TYPE_FUNC:
apfl_gc_func_traverse(&object->function, cb, opaque);
return;
case GC_TYPE_CFUNC:
apfl_gc_cfunc_traverse(&object->cfunction, cb, opaque);
return;
case GC_TYPE_MATCHER_INSTRUCTIONS:
apfl_gc_matcher_instructions_traverse(&object->matcher_instructions, cb, opaque);
return;
case GC_TYPE_MATCHER:
apfl_gc_matcher_traverse(&object->matcher, cb, opaque);
return;
case GC_TYPE_PAIR:
apfl_gc_pair_traverse(&object->pair, cb, opaque);
return;
case GC_TYPE_SYMBOL:
apfl_gc_symbol_traverse(&object->symbol, cb, opaque);
return;
}
assert(false);
}
static void
trace_callback(void *opaque, struct gc_object *object)
{
(void)opaque;
color_object_grey(object);
}
static void
trace(struct gc_object *object)
{
object->status = GC_STATUS_BLACK;
visit_children(object, trace_callback, NULL);
}
static void
trace_while_having_grey(struct gc *gc)
{
bool found_grey;
do {
found_grey = false;
for (
struct gc_block *cur = gc->block;
cur != NULL;
cur = cur->next
) {
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &cur->objects[i];
if (object->status == GC_STATUS_GREY) {
trace(object);
found_grey = true;
}
}
}
} while (found_grey);
}
static void
deinit_object(apfl_ctx ctx, struct gc_object *object)
{
struct gc *gc = &ctx->gc;
switch (object->type) {
case GC_TYPE_LIST:
apfl_list_deinit(gc->allocator, &object->list);
return;
case GC_TYPE_DICT:
apfl_dict_deinit(&object->dict);
return;
case GC_TYPE_VAR:
case GC_TYPE_PAIR:
case GC_TYPE_SYMBOL:
return;
case GC_TYPE_STRING:
apfl_string_deinit(gc->allocator, &object->string);
return;
case GC_TYPE_INSTRUCTIONS:
apfl_instructions_deinit(gc->allocator, &object->instructions);
return;
case GC_TYPE_SCOPE:
apfl_scope_deinit(gc->allocator, &object->scope);
return;
case GC_TYPE_FUNC:
apfl_function_deinit(gc->allocator, &object->function);
return;
case GC_TYPE_CFUNC:
apfl_cfunction_deinit(gc->allocator, &object->cfunction);
return;
case GC_TYPE_MATCHER_INSTRUCTIONS:
apfl_matcher_instructions_deinit(gc->allocator, &object->matcher_instructions);
return;
case GC_TYPE_MATCHER:
apfl_matcher_deinit(gc->allocator, &object->matcher);
return;
case GC_TYPE_NATIVE_OBJECT:
apfl_native_object_deinit(gc->allocator, &object->native_object);
return;
}
assert(false);
}
static void
sweep(apfl_ctx ctx)
{
struct gc *gc = &ctx->gc;
#ifdef GC_DEBUG_STATS
int reclaimed_objects = 0;
int reclaimed_blocks = 0;
#endif
struct gc_block **cur = &gc->block;
while (*cur != NULL) {
struct gc_block *block = *cur;
bool completely_free = true;
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &block->objects[i];
switch (object->status) {
case GC_STATUS_FREE:
break;
case GC_STATUS_WHITE:
LOG_NEW_AND_RECLAIM("reclaiming %p of type %s\n", (void *)object, type_to_string(object->type));
deinit_object(ctx, object);
#ifdef GC_DEBUG_WIPE_RECLAIMED_OBJECTS
memset(object, 0, sizeof(struct gc_object));
object->type = 0xFF; // Some intentionally undefined type
#endif
object->status = GC_STATUS_FREE;
#ifdef GC_DEBUG_STATS
reclaimed_objects++;
#endif
break;
case GC_STATUS_GREY:
assert(false /*Encountered grey object while sweeping*/);
break;
case GC_STATUS_BLACK:
object->status = GC_STATUS_WHITE; // Prepare for next run
completely_free = false;
break;
}
}
if (completely_free) {
*cur = block->next;
FREE_OBJ(gc->allocator, block);
#ifdef GC_DEBUG_STATS
reclaimed_blocks++;
#endif
} else {
cur = &block->next;
}
}
#ifdef GC_DEBUG_STATS
fprintf(stderr, "gc: reclaimed %d objects, %d blocks\n", reclaimed_objects, reclaimed_blocks);
#endif
}
#ifdef GC_DEBUG_DUMP_GRAPH_ON_COLLECT
# define DUMP_ON_COLLECT() apfl_gc_debug_dump_graph(ctx, apfl_io_file_writer(stderr))
#else
# define DUMP_ON_COLLECT()
#endif
void
apfl_gc_full(apfl_ctx ctx)
{
struct gc *gc = &ctx->gc;
assert(!gc->is_collecting);
gc->is_collecting = true;
mark_roots(ctx);
DUMP_ON_COLLECT();
trace_while_having_grey(gc);
DUMP_ON_COLLECT();
sweep(ctx);
DUMP_ON_COLLECT();
gc->is_collecting = false;
}
void
apfl_gc_add_child(struct gc_object *parent, struct gc_object* child)
{
if (parent->status == GC_STATUS_BLACK) {
color_object_grey(child);
}
}
static const char *
dump_graph_bgcolor(enum gc_status status)
{
switch (status) {
case GC_STATUS_BLACK:
return "black";
case GC_STATUS_GREY:
return "grey";
default:
return "white";
}
}
static const char *
dump_graph_fgcolor(enum gc_status status)
{
switch (status) {
case GC_STATUS_BLACK:
return "white";
default:
return "black";
}
}
static const char *
type_to_string(enum gc_type type)
{
switch (type) {
case GC_TYPE_LIST:
return "list";
case GC_TYPE_DICT:
return "dict";
case GC_TYPE_VAR:
return "var";
case GC_TYPE_STRING:
return "string";
case GC_TYPE_INSTRUCTIONS:
return "instructions";
case GC_TYPE_SCOPE:
return "scope";
case GC_TYPE_FUNC:
return "func";
case GC_TYPE_CFUNC:
return "cfunc";
case GC_TYPE_MATCHER_INSTRUCTIONS:
return "matcher instructions";
case GC_TYPE_MATCHER:
return "matcher";
case GC_TYPE_NATIVE_OBJECT:
return "native object";
case GC_TYPE_PAIR:
return "pair";
case GC_TYPE_SYMBOL:
return "symbol";
}
assert(false);
return "???";
}
struct dump_graph_roots_visitor_data {
struct apfl_io_writer w;
bool success;
};
static void
dump_graph_roots_visitor(void *opaque, struct gc_object *obj)
{
struct dump_graph_roots_visitor_data *data = opaque;
data->success = data->success
&& apfl_io_write_string(data->w, " ROOTS -> obj_")
&& apfl_format_put_poiner(data->w, (void *)obj)
&& apfl_io_write_string(data->w, "\n");
}
struct dump_graph_visitor_data {
struct apfl_io_writer w;
struct gc_object *parent;
bool success;
};
static void
dump_graph_visitor(void *opaque, struct gc_object *obj)
{
struct dump_graph_visitor_data *data = opaque;
data->success = data->success
&& apfl_io_write_string(data->w, " obj_")
&& apfl_format_put_poiner(data->w, (void *)data->parent)
&& apfl_io_write_string(data->w, " -> obj_")
&& apfl_format_put_poiner(data->w, (void *)obj)
&& apfl_io_write_string(data->w, "\n");
}
bool
apfl_gc_debug_dump_graph(apfl_ctx ctx, struct apfl_io_writer w)
{
struct gc *gc = &ctx->gc;
FMT_TRY(apfl_io_write_string(w, "digraph G {\n"));
struct dump_graph_roots_visitor_data roots_visitor_data = {
.w = w,
.success = true,
};
visit_roots(ctx, dump_graph_roots_visitor, &roots_visitor_data);
FMT_TRY(roots_visitor_data.success);
for (struct gc_block *block = gc->block; block != NULL; block = block->next) {
int counts[4] = {0, 0, 0, 0};
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *obj = &block->objects[i];
counts[obj->status]++;
if (obj->status == GC_STATUS_FREE) {
continue;
}
FMT_TRY(apfl_io_write_string(w, " blk_"));
FMT_TRY(apfl_format_put_poiner(w, (void *)block));
FMT_TRY(apfl_io_write_string(w, " -> obj_"));
FMT_TRY(apfl_format_put_poiner(w, (void *)obj));
FMT_TRY(apfl_io_write_string(w, "\n"));
FMT_TRY(apfl_io_write_string(w, " obj_"));
FMT_TRY(apfl_format_put_poiner(w, (void *)obj));
FMT_TRY(apfl_io_write_string(w, "[style=filled,fillcolor="));
FMT_TRY(apfl_io_write_string(w, dump_graph_bgcolor(obj->status)));
FMT_TRY(apfl_io_write_string(w, ",fontcolor="));
FMT_TRY(apfl_io_write_string(w, dump_graph_fgcolor(obj->status)));
FMT_TRY(apfl_io_write_string(w, ",label=\"Object "));
FMT_TRY(apfl_format_put_poiner(w, (void *)obj));
FMT_TRY(apfl_io_write_string(w, "\\ntype: "));
FMT_TRY(apfl_io_write_string(w, type_to_string(obj->type)));
FMT_TRY(apfl_io_write_string(w, "\"];\n"));
struct dump_graph_visitor_data visitor_data = {
.w = w,
.parent = obj,
.success = true,
};
visit_children(obj, dump_graph_visitor, &visitor_data);
FMT_TRY(visitor_data.success);
}
FMT_TRY(apfl_io_write_string(w, " BLOCKS -> blk_"));
FMT_TRY(apfl_format_put_poiner(w, (void *)block));
FMT_TRY(apfl_io_write_string(w, ";\n"));
FMT_TRY(apfl_io_write_string(w, " blk_"));
FMT_TRY(apfl_format_put_poiner(w, (void *)block));
FMT_TRY(apfl_io_write_string(w, " [label=\"Block "));
FMT_TRY(apfl_format_put_poiner(w, (void *)block));
FMT_TRY(apfl_io_write_string(w, "\\nfree "));
FMT_TRY(apfl_format_put_int(w, counts[GC_STATUS_FREE]));
FMT_TRY(apfl_io_write_string(w, ", black "));
FMT_TRY(apfl_format_put_int(w, counts[GC_STATUS_BLACK]));
FMT_TRY(apfl_io_write_string(w, ", grey "));
FMT_TRY(apfl_format_put_int(w, counts[GC_STATUS_GREY]));
FMT_TRY(apfl_io_write_string(w, ", white "));
FMT_TRY(apfl_format_put_int(w, counts[GC_STATUS_WHITE]));
FMT_TRY(apfl_io_write_string(w, "\"];\n"));
}
FMT_TRY(apfl_io_write_string(w, "}\n"));
return true;
}
typedef size_t statuscounts[4];
static bool
blockstat_line(struct apfl_io_writer w, size_t i, statuscounts counts)
{
FMT_TRY(apfl_format_put_int(w, (int)i));
FMT_TRY(apfl_io_write_byte(w, '\t'));
FMT_TRY(apfl_format_put_int(w, (int)counts[GC_STATUS_FREE]));
FMT_TRY(apfl_io_write_byte(w, '\t'));
FMT_TRY(apfl_format_put_int(w, (int)counts[GC_STATUS_BLACK]));
FMT_TRY(apfl_io_write_byte(w, '\t'));
FMT_TRY(apfl_format_put_int(w, (int)counts[GC_STATUS_GREY]));
FMT_TRY(apfl_io_write_byte(w, '\t'));
FMT_TRY(apfl_format_put_int(w, (int)counts[GC_STATUS_WHITE]));
FMT_TRY(apfl_io_write_byte(w, '\n'));
return true;
}
bool
apfl_gc_debug_blockstats(apfl_ctx ctx, struct apfl_io_writer w)
{
struct gc *gc = &ctx->gc;
statuscounts total = {0, 0, 0, 0};
size_t i = 0;
FMT_TRY(apfl_io_write_string(w, "block#\tfree\tblack\tgrey\twhite\n========================================\n"));
for (
struct gc_block *cur = gc->block;
cur != NULL;
cur = cur->next
) {
statuscounts block = {0, 0, 0, 0};
for (size_t j = 0; j < GC_OBJECTS_PER_BLOCK; j++) {
enum gc_status status = cur->objects[j].status;
block[status]++;
total[status]++;
}
FMT_TRY(blockstat_line(w, i, block));
i++;
}
FMT_TRY(apfl_io_write_string(w, "========================================\n"));
FMT_TRY(blockstat_line(w, i, total));
return true;
}
void
apfl_gc_deinit(apfl_ctx ctx)
{
struct gc *gc = &ctx->gc;
for (struct gc_block *block = gc->block; block != NULL; ) {
for (size_t i = 0; i < GC_OBJECTS_PER_BLOCK; i++) {
struct gc_object *object = &block->objects[i];
if (object->status != GC_STATUS_FREE) {
deinit_object(ctx, object);
}
}
struct gc_block *next = block->next;
FREE_OBJ(gc->allocator, block);
block = next;
}
gc->block = NULL;
FREE_LIST(gc->allocator, gc->tmproots.roots, gc->tmproots.cap);
}