apfl/src/parser.c

2193 lines
58 KiB
C
Raw Normal View History

2021-12-10 20:22:16 +00:00
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include "apfl.h"
#include "alloc.h"
2021-12-10 20:22:16 +00:00
#include "resizable.h"
struct apfl_parser {
struct apfl_allocator allocator;
2021-12-10 20:22:16 +00:00
struct apfl_parser_token_source token_source;
bool has_expr;
struct apfl_expr expr;
2021-12-10 20:22:16 +00:00
struct apfl_error error;
bool eof;
bool has_token;
2021-12-10 20:22:16 +00:00
bool has_unread;
struct apfl_token token;
2021-12-10 20:22:16 +00:00
};
enum parse_fragment_result {
PF_OK,
PF_CANT_HANDLE,
PF_EOF,
PF_ERROR,
};
enum parse_fragment_flags {
FFLAG_NO_EXPAND = 1,
FFLAG_NO_POSTFIXS = 2,
2021-12-10 20:22:16 +00:00
};
enum fragment_type {
FRAG_EXPAND,
FRAG_CONSTANT,
FRAG_NAME,
FRAG_DOT,
FRAG_AT,
FRAG_PREDICATE,
FRAG_EXPR,
FRAG_LIST,
FRAG_BLANK,
2021-12-10 20:22:16 +00:00
};
struct fragment_dot {
struct fragment *lhs;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
struct apfl_string rhs;
2021-12-10 20:22:16 +00:00
};
struct fragment_lhs_rhs {
struct fragment *lhs;
struct fragment *rhs;
};
struct fragment_list {
APFL_RESIZABLE_TRAIT(struct fragment, children)
};
struct fragment {
enum fragment_type type;
union {
struct fragment *expand;
struct apfl_expr_const constant;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
struct apfl_string name;
2021-12-10 20:22:16 +00:00
struct fragment_dot dot;
struct fragment_lhs_rhs at;
struct fragment_lhs_rhs predicate;
struct apfl_expr expr;
struct fragment_list list;
};
struct apfl_position position;
};
static enum parse_fragment_result parse_fragment(apfl_parser_ptr, struct fragment*, bool need, enum parse_fragment_flags);
static bool
grow_fragment_cap(struct apfl_allocator allocator, struct fragment_list *list, size_t inc)
2021-12-10 20:22:16 +00:00
{
return apfl_resizable_ensure_cap_for_more_elements(
allocator,
2021-12-10 20:22:16 +00:00
sizeof(struct fragment),
(void **)&list->children,
list->len,
&list->cap,
2021-12-10 20:22:16 +00:00
inc
);
}
static bool
append_fragment(struct apfl_allocator allocator, struct fragment_list *list, struct fragment fragment)
2021-12-10 20:22:16 +00:00
{
return apfl_resizable_append(
allocator,
2021-12-10 20:22:16 +00:00
sizeof(struct fragment),
APFL_RESIZABLE_ARGS(*list, children),
&fragment,
1
);
}
static void fragment_deinit(struct apfl_allocator allocator, struct fragment *);
2021-12-10 20:22:16 +00:00
static void
deinit_fragment_lhs_rhs(struct apfl_allocator allocator, struct fragment_lhs_rhs *lr)
2021-12-10 20:22:16 +00:00
{
DESTROY(allocator, lr->lhs, fragment_deinit);
DESTROY(allocator, lr->rhs, fragment_deinit);
2021-12-10 20:22:16 +00:00
}
static void
fragment_list_deinit(struct apfl_allocator allocator, struct fragment_list *list)
2021-12-10 20:22:16 +00:00
{
DEINIT_CAP_LIST(allocator, list->children, list->len, list->cap, fragment_deinit);
2021-12-10 20:22:16 +00:00
apfl_resizable_init(APFL_RESIZABLE_ARGS(*list, children));
}
static void
fragment_deinit(struct apfl_allocator allocator, struct fragment *fragment)
2021-12-10 20:22:16 +00:00
{
if (fragment == NULL) {
return;
}
switch (fragment->type) {
case FRAG_EXPAND:
DESTROY(allocator, fragment->expand, fragment_deinit);
2021-12-10 20:22:16 +00:00
break;
case FRAG_CONSTANT:
apfl_expr_const_deinit(allocator, &fragment->constant);
2021-12-10 20:22:16 +00:00
break;
case FRAG_NAME:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
apfl_string_deinit(allocator, &fragment->name);
break;
2021-12-10 20:22:16 +00:00
case FRAG_DOT:
DESTROY(allocator, fragment->dot.lhs, fragment_deinit);
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
apfl_string_deinit(allocator, &fragment->dot.rhs);
2021-12-10 20:22:16 +00:00
break;
case FRAG_AT:
deinit_fragment_lhs_rhs(allocator, &fragment->at);
2021-12-10 20:22:16 +00:00
break;
case FRAG_PREDICATE:
deinit_fragment_lhs_rhs(allocator, &fragment->predicate);
2021-12-10 20:22:16 +00:00
break;
case FRAG_EXPR:
apfl_expr_deinit(allocator, &fragment->expr);
2021-12-10 20:22:16 +00:00
break;
case FRAG_LIST:
fragment_list_deinit(allocator, &fragment->list);
2021-12-10 20:22:16 +00:00
break;
case FRAG_BLANK:
// nop
break;
2021-12-10 20:22:16 +00:00
}
}
static struct fragment_dot
fragment_dot_move(struct fragment_dot *in)
{
struct fragment_dot out;
MOVEPTR(out.lhs, in->lhs);
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out.rhs = apfl_string_move(&in->rhs);
2021-12-10 20:22:16 +00:00
return out;
}
static struct fragment_lhs_rhs
fragment_lhs_rhs_move(struct fragment_lhs_rhs *in)
{
struct fragment_lhs_rhs out;
MOVEPTR(out.lhs, in->lhs);
MOVEPTR(out.rhs, in->rhs);
return out;
}
static struct fragment_list
fragment_list_move(struct fragment_list *in)
{
struct fragment_list out = *in;
2021-12-10 20:22:16 +00:00
MOVEPTR(out.children, in->children);
in->len = 0;
in->cap = 0;
return out;
}
static struct fragment
fragment_move(struct fragment *in)
{
struct fragment out = *in;
switch (in->type) {
case FRAG_EXPAND:
MOVEPTR(out.expand, in->expand);
break;
case FRAG_CONSTANT:
out.constant = apfl_expr_const_move(&in->constant);
break;
case FRAG_NAME:
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out.name = apfl_string_move(&in->name);
2021-12-10 20:22:16 +00:00
break;
case FRAG_DOT:
out.dot = fragment_dot_move(&in->dot);
break;
case FRAG_AT:
out.at = fragment_lhs_rhs_move(&in->at);
break;
case FRAG_PREDICATE:
out.predicate = fragment_lhs_rhs_move(&in->predicate);
break;
case FRAG_EXPR:
out.expr = apfl_expr_move(&in->expr);
break;
case FRAG_LIST:
out.list = fragment_list_move(&in->list);
2021-12-10 20:22:16 +00:00
break;
case FRAG_BLANK:
// nop
break;
2021-12-10 20:22:16 +00:00
}
return out;
}
apfl_parser_ptr
apfl_parser_new(struct apfl_allocator allocator, struct apfl_parser_token_source token_source)
2021-12-10 20:22:16 +00:00
{
apfl_parser_ptr p = ALLOC_OBJ(allocator, struct apfl_parser);
2021-12-10 20:22:16 +00:00
if (p == NULL) {
return NULL;
}
p->allocator = allocator;
2021-12-10 20:22:16 +00:00
p->token_source = token_source;
p->eof = false;
p->has_token = false;
2021-12-10 20:22:16 +00:00
p->has_unread = false;
p->has_expr = false;
2021-12-10 20:22:16 +00:00
return p;
}
static enum apfl_parse_result
get_raw_token(apfl_parser_ptr p, struct apfl_token *token, bool need)
{
struct apfl_parser_token_source *src = &(p->token_source);
enum apfl_parse_result result = src->next(src->opaque, need);
switch (result) {
case APFL_PARSE_ERROR:
p->error = src->get_error(src->opaque);
break;
case APFL_PARSE_OK:
*token = src->get_token(src->opaque);
break;
default:
// nop
break;
}
return result;
}
static enum apfl_parse_result
get_non_comment_token(apfl_parser_ptr p, struct apfl_token *token, bool need)
{
for (;;) {
enum apfl_parse_result result = get_raw_token(p, token, need);
if (result != APFL_PARSE_OK) {
return result;
}
if (token->type == APFL_TOK_COMMENT) {
apfl_token_deinit(p->allocator, token);
2021-12-10 20:22:16 +00:00
} else {
return APFL_PARSE_OK;
}
}
}
static enum apfl_parse_result
get_preprocessed_token(apfl_parser_ptr p, struct apfl_token *token, bool need)
{
enum apfl_parse_result result;
for (;;) {
result = get_non_comment_token(p, token, need);
if (result != APFL_PARSE_OK) {
return result;
}
if (token->type != APFL_TOK_CONTINUE_LINE) {
return APFL_PARSE_OK;
}
struct apfl_position continue_line_pos = token->position;
apfl_token_deinit(p->allocator, token);
2021-12-10 20:22:16 +00:00
result = get_non_comment_token(p, token, true);
if (result != APFL_PARSE_OK) {
return result;
}
if (token->type != APFL_TOK_LINEBREAK) {
apfl_token_deinit(p->allocator, token);
2021-12-10 20:22:16 +00:00
p->error = (struct apfl_error) {
.type = APFL_ERR_NO_LINEBREAK_AFTER_CONTINUE_LINE,
.position = continue_line_pos,
};
return APFL_PARSE_ERROR;
}
apfl_token_deinit(p->allocator, token);
2021-12-10 20:22:16 +00:00
}
}
static enum apfl_parse_result
read_token(apfl_parser_ptr p, bool need)
2021-12-10 20:22:16 +00:00
{
if (p->eof) {
return APFL_PARSE_EOF;
}
if (p->has_unread) {
p->has_unread = false;
return APFL_PARSE_OK;
}
if (p->has_token) {
apfl_token_deinit(p->allocator, &p->token);
}
enum apfl_parse_result result = get_preprocessed_token(p, &p->token, need);
2021-12-10 20:22:16 +00:00
p->eof = result == APFL_PARSE_EOF;
p->has_token = result == APFL_PARSE_OK;
2021-12-10 20:22:16 +00:00
return result;
}
static void
unread_token(apfl_parser_ptr p)
2021-12-10 20:22:16 +00:00
{
assert(!p->eof);
assert(p->has_token);
assert(!p->has_unread);
2021-12-10 20:22:16 +00:00
p->has_unread = true;
}
// Must only be called after an PF_CANT_HANDLE!
static void
2021-12-10 20:22:16 +00:00
read_token_after_cant_handle(apfl_parser_ptr p)
{
// A function that returns PF_CANT_HANDLE always unreads a token, so we are
// guaranteed to have at least one token.
assert(read_token(p, true) == APFL_PARSE_OK);
2021-12-10 20:22:16 +00:00
}
static struct apfl_error
err_unexpected_token(enum apfl_token_type token_type, struct apfl_position pos)
{
return (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_TOKEN,
.token_type = token_type,
.position = pos,
};
}
#define ERR_UNEXPECTED_TOKEN(t) (err_unexpected_token((t).type, (t).position))
static enum parse_fragment_result
parse_fragment_into_list(apfl_parser_ptr p, struct fragment_list *list, bool need, enum parse_fragment_flags flags)
{
if (!grow_fragment_cap(p->allocator, list, 1)) {
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return PF_ERROR;
}
struct fragment *elem = &list->children[list->len];
enum parse_fragment_result result = parse_fragment(p, elem, need, flags);
if (result != PF_OK) {
2021-12-10 20:22:16 +00:00
return result;
}
list->len++;
return PF_OK;
}
static struct apfl_error
err_unexpected_eof_after(enum apfl_token_type token_type, struct apfl_position pos)
{
return (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_EOF_AFTER_TOKEN,
.token_type = token_type,
.position = pos,
};
}
static bool fragments_to_call(
apfl_parser_ptr,
struct fragment_list,
struct apfl_position position,
struct apfl_expr *
);
static bool
parse_parens_head(
apfl_parser_ptr p,
struct fragment_list *children,
struct apfl_position position
) {
switch (parse_fragment_into_list(p, children, true, FFLAG_NO_EXPAND)) {
2021-12-10 20:22:16 +00:00
case PF_OK:
return true;
2021-12-10 20:22:16 +00:00
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LPAREN, position);
return false;
2021-12-10 20:22:16 +00:00
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
2021-12-10 20:22:16 +00:00
case PF_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
assert(false);
}
2021-12-10 20:22:16 +00:00
static bool
parse_parens_tail(apfl_parser_ptr p, struct fragment_list *children, struct apfl_position position)
{
for (;;) {
switch (parse_fragment_into_list(p, children, true, 0)) {
case PF_OK:
break;
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LPAREN, position);
return false;
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
if (p->token.type == APFL_TOK_RPAREN) {
return true;
} else if (p->token.type == APFL_TOK_LINEBREAK) {
// Ignore linebreaks inside (...)
break;
} else {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
}
case PF_ERROR:
return false;
}
2021-12-10 20:22:16 +00:00
}
}
static bool
2021-12-10 20:22:16 +00:00
parse_parens(apfl_parser_ptr p, struct fragment *out, struct apfl_position position)
{
struct fragment_list children;
apfl_resizable_init(APFL_RESIZABLE_ARGS(children, children));
if (!parse_parens_head(p, &children, position)) {
goto error;
2021-12-10 20:22:16 +00:00
}
if (!parse_parens_tail(p, &children, position)) {
goto error;
2021-12-10 20:22:16 +00:00
}
out->type = FRAG_EXPR;
out->position = position;
if (!fragments_to_call(p, children, position, &out->expr)) {
goto error;
}
return true;
2021-12-10 20:22:16 +00:00
error:
fragment_list_deinit(p->allocator, &children);
2021-12-10 20:22:16 +00:00
return false;
2021-12-10 20:22:16 +00:00
}
static bool
2021-12-10 20:22:16 +00:00
skip_inner_bracket_separators(apfl_parser_ptr p)
{
for (;;) {
switch (read_token(p, true)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
if (
p->token.type == APFL_TOK_COMMA
|| p->token.type == APFL_TOK_LINEBREAK
|| p->token.type == APFL_TOK_SEMICOLON
) {
2021-12-10 20:22:16 +00:00
break; // Note: breaks switch, continues loop
}
unread_token(p);
return true;
2021-12-10 20:22:16 +00:00
case APFL_PARSE_EOF:
return true;
2021-12-10 20:22:16 +00:00
case APFL_PARSE_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
}
}
static bool
2021-12-10 20:22:16 +00:00
parse_empty_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position position)
{
// We already got `[ ->`, we now read another (non separator) token and return success, if it's an `]`.
// Else it's a syntax error
if (!skip_inner_bracket_separators(p)) {
return false;
2021-12-10 20:22:16 +00:00
}
switch (read_token(p, true)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
p->error = apfl_error_simple(APFL_ERR_UNEXPECTED_EOF);
return false;
2021-12-10 20:22:16 +00:00
case APFL_PARSE_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
if (p->token.type != APFL_TOK_RBRACKET) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
2021-12-10 20:22:16 +00:00
}
out->type = FRAG_EXPR;
out->expr = (struct apfl_expr) {
.type = APFL_EXPR_DICT,
.dict.items = NULL,
.dict.len = 0,
.dict.cap = 0,
.position = position,
};
2021-12-10 20:22:16 +00:00
out->position = position;
return true;
2021-12-10 20:22:16 +00:00
}
// Must only be called after PF_CANT_HANDLE
static bool
parse_empty_list_or_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position position)
2021-12-10 20:22:16 +00:00
{
read_token_after_cant_handle(p);
2021-12-10 20:22:16 +00:00
switch (p->token.type) {
2021-12-10 20:22:16 +00:00
case APFL_TOK_RBRACKET:
out->type = FRAG_LIST;
out->list = (struct fragment_list) {
.children = NULL,
.len = 0,
};
2021-12-10 20:22:16 +00:00
out->position = position;
return true;
2021-12-10 20:22:16 +00:00
case APFL_TOK_MAPSTO:
return parse_empty_dict(p, out, position);
default:
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
2021-12-10 20:22:16 +00:00
}
}
static bool fragment_to_list_item(
apfl_parser_ptr,
struct fragment,
struct apfl_expr_list_item *
);
static struct apfl_expr *fragment_to_expr_allocated(apfl_parser_ptr, struct fragment);
2021-12-10 20:22:16 +00:00
static bool
fragment_list_to_expr_list(apfl_parser_ptr p, struct fragment_list *frag_list, size_t off, struct apfl_expr_list *out)
{
out->len = 0;
out->items = NULL;
size_t total = frag_list->len - off;
if (total == 0) {
out->items = NULL;
return true;
}
if ((out->items = ALLOC_LIST(
p->allocator,
struct apfl_expr_list_item,
total
)) == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return false;
}
for (size_t i = off; i < frag_list->len; i++) {
if (!fragment_to_list_item(p, fragment_move(&frag_list->children[i]), &out->items[i - off])) {
DEINIT_CAP_LIST(p->allocator, out->items, out->len, total, apfl_expr_list_item_deinit);
return false;
}
out->len++;
}
return true;
}
2021-12-10 20:22:16 +00:00
static bool
fragment_to_expr_inner(apfl_parser_ptr p, struct fragment *fragment, struct apfl_expr *out)
2021-12-10 20:22:16 +00:00
{
switch (fragment->type) {
case FRAG_EXPAND:
p->error = err_unexpected_token(APFL_TOK_EXPAND, fragment->position);
return false;
case FRAG_CONSTANT:
out->type = APFL_EXPR_CONSTANT;
out->constant = apfl_expr_const_move(&fragment->constant);
out->position = fragment->position;
return true;
case FRAG_NAME:
out->type = APFL_EXPR_VAR;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out->var = apfl_string_move(&fragment->name);
out->position = fragment->position;
return true;
case FRAG_DOT:
out->type = APFL_EXPR_DOT;
if ((out->dot.lhs = fragment_to_expr_allocated(p, fragment_move(fragment->dot.lhs))) == NULL) {
return false;
}
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out->dot.rhs = apfl_string_move(&fragment->dot.rhs);
out->position = fragment->position;
return true;
case FRAG_AT:
out->type = APFL_EXPR_AT;
if ((out->at.lhs = fragment_to_expr_allocated(p, fragment_move(fragment->at.lhs))) == NULL) {
return false;
}
if ((out->at.rhs = fragment_to_expr_allocated(p, fragment_move(fragment->at.rhs))) == NULL) {
return false;
}
out->position = fragment->position;
return true;
case FRAG_PREDICATE:
p->error = err_unexpected_token(APFL_TOK_QUESTION_MARK, fragment->position);
return false;
case FRAG_EXPR:
*out = apfl_expr_move(&fragment->expr);
return true;
case FRAG_LIST:
out->type = APFL_EXPR_LIST;
out->position = fragment->position;
return fragment_list_to_expr_list(p, &fragment->list, 0, &out->list);
case FRAG_BLANK:
out->type = APFL_EXPR_BLANK;
out->position = fragment->position;
return true;
}
assert(false);
2021-12-10 20:22:16 +00:00
}
static bool
fragment_to_expr(apfl_parser_ptr p, struct fragment fragment, struct apfl_expr *out)
{
bool result = fragment_to_expr_inner(p, &fragment, out);
fragment_deinit(p->allocator, &fragment);
return result;
}
2021-12-10 20:22:16 +00:00
static struct apfl_expr *
fragment_to_expr_allocated(apfl_parser_ptr p, struct fragment fragment)
{
struct apfl_expr *out = ALLOC_OBJ(p->allocator, struct apfl_expr);
2021-12-10 20:22:16 +00:00
if (out == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return NULL;
2021-12-10 20:22:16 +00:00
}
if (!fragment_to_expr(p, fragment, out)) {
FREE_OBJ(p->allocator, out);
return NULL;
2021-12-10 20:22:16 +00:00
}
return out;
2021-12-10 20:22:16 +00:00
}
static bool
2021-12-10 20:22:16 +00:00
parse_dict(
apfl_parser_ptr p,
struct fragment *out,
struct fragment key,
struct apfl_position mapsto_pos,
struct apfl_position start
) {
struct fragment value;
bool cleanup_key = true;
bool cleanup_value = false;
struct apfl_expr_dict dict = {
.items = NULL,
.len = 0,
.cap = 0,
2021-12-10 20:22:16 +00:00
};
goto after_mapsto;
for (;;) {
if (!skip_inner_bracket_separators(p)) {
2021-12-10 20:22:16 +00:00
goto error;
}
switch (parse_fragment(p, &key, true, FFLAG_NO_EXPAND)) {
2021-12-10 20:22:16 +00:00
case PF_OK:
cleanup_key = true;
break;
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start);
goto error;
case PF_CANT_HANDLE:
goto maybe_end;
case PF_ERROR:
goto error;
}
if (!skip_inner_bracket_separators(p)) {
2021-12-10 20:22:16 +00:00
goto error;
}
switch (read_token(p, true)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start);
goto error;
case APFL_PARSE_ERROR:
goto error;
}
if (p->token.type != APFL_TOK_MAPSTO) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
}
mapsto_pos = p->token.position;
2021-12-10 20:22:16 +00:00
after_mapsto:
if (!skip_inner_bracket_separators(p)) {
2021-12-10 20:22:16 +00:00
goto error;
}
struct fragment value;
switch (parse_fragment(p, &value, true, FFLAG_NO_EXPAND)) {
2021-12-10 20:22:16 +00:00
case PF_OK:
cleanup_value = true;
break;
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_MAPSTO, mapsto_pos);
goto error;
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
case PF_ERROR:
goto error;
}
struct apfl_expr_dict_pair pair;
if (
(pair.k = fragment_to_expr_allocated(p, fragment_move(&key))) == NULL
|| (pair.v = fragment_to_expr_allocated(p, fragment_move(&value))) == NULL
2021-12-10 20:22:16 +00:00
) {
goto error;
}
fragment_deinit(p->allocator, &key);
2021-12-10 20:22:16 +00:00
cleanup_key = false;
fragment_deinit(p->allocator, &value);
2021-12-10 20:22:16 +00:00
cleanup_value = false;
if (!apfl_resizable_append(
p->allocator,
2021-12-10 20:22:16 +00:00
sizeof(struct apfl_expr_dict_pair),
(void **)&dict.items,
2021-12-10 20:22:16 +00:00
&dict.len,
&dict.cap,
2021-12-10 20:22:16 +00:00
&pair,
1
)) {
apfl_expr_dict_pair_deinit(p->allocator, &pair);
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
2021-12-10 20:22:16 +00:00
}
}
maybe_end:
assert(!cleanup_key && !cleanup_value);
read_token_after_cant_handle(p);
if (p->token.type != APFL_TOK_RBRACKET) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
}
out->type = FRAG_EXPR,
out->expr = (struct apfl_expr) {
2021-12-10 20:22:16 +00:00
.type = APFL_EXPR_DICT,
.dict = dict,
.position = start,
};
out->position = start;
return true;
2021-12-10 20:22:16 +00:00
error:
if (cleanup_key) {
fragment_deinit(p->allocator, &key);
2021-12-10 20:22:16 +00:00
}
if (cleanup_value) {
fragment_deinit(p->allocator, &value);
2021-12-10 20:22:16 +00:00
}
apfl_expr_dict_deinit(p->allocator, &dict);
return false;
2021-12-10 20:22:16 +00:00
}
static bool
2021-12-10 20:22:16 +00:00
parse_list(
apfl_parser_ptr p,
struct fragment *out,
struct fragment first,
struct apfl_position start
) {
struct fragment_list list;
apfl_resizable_init(APFL_RESIZABLE_ARGS(list, children));
if (!append_fragment(p->allocator, &list, first)) {
fragment_deinit(p->allocator, &first);
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
fragment_list_deinit(p->allocator, &list);
return false;
2021-12-10 20:22:16 +00:00
}
for (;;) {
switch (parse_fragment_into_list(p, &list, true, 0)) {
case PF_OK:
break;
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start);
goto error;
case PF_CANT_HANDLE:
goto maybe_end;
case PF_ERROR:
goto error;
}
if (!skip_inner_bracket_separators(p)) {
goto error;
}
2021-12-10 20:22:16 +00:00
}
maybe_end:
read_token_after_cant_handle(p);
if (p->token.type != APFL_TOK_RBRACKET) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
}
out->type = FRAG_LIST;
out->list = list;
out->position = start;
return true;
2021-12-10 20:22:16 +00:00
error:
fragment_list_deinit(p->allocator, &list);
return false;
2021-12-10 20:22:16 +00:00
}
static bool
2021-12-10 20:22:16 +00:00
parse_brackets(apfl_parser_ptr p, struct fragment *out, struct apfl_position start)
{
if (!skip_inner_bracket_separators(p)) {
return false;
2021-12-10 20:22:16 +00:00
}
struct fragment first;
switch (parse_fragment(p, &first, true, 0)) {
case PF_OK:
break;
case PF_CANT_HANDLE:
return parse_empty_list_or_dict(p, out, start);
2021-12-10 20:22:16 +00:00
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start);
return false;
2021-12-10 20:22:16 +00:00
case PF_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
if (!skip_inner_bracket_separators(p)) {
2021-12-10 20:22:16 +00:00
goto error;
}
switch (read_token(p, true)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start);
goto error;
case APFL_PARSE_ERROR:
goto error;
}
if (p->token.type == APFL_TOK_MAPSTO) {
struct apfl_position mapsto_pos = p->token.position;
2021-12-10 20:22:16 +00:00
return parse_dict(p, out, first, mapsto_pos, start);
} else {
unread_token(p);
2021-12-10 20:22:16 +00:00
return parse_list(p, out, first, start);
}
error:
fragment_deinit(p->allocator, &first);
return false;
2021-12-10 20:22:16 +00:00
}
static bool
2021-12-10 20:22:16 +00:00
parse_expand(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position position)
{
struct fragment *inner = ALLOC_OBJ(p->allocator, struct fragment);
2021-12-10 20:22:16 +00:00
if (inner == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return false;
2021-12-10 20:22:16 +00:00
}
enum parse_fragment_result result = parse_fragment(p, inner, true, FFLAG_NO_EXPAND);
if (result == PF_OK) {
fragment->type = FRAG_EXPAND;
fragment->expand = inner;
fragment->position = position;
return true;
2021-12-10 20:22:16 +00:00
}
FREE_OBJ(p->allocator, inner);
2021-12-10 20:22:16 +00:00
switch (result) {
case PF_OK:
assert(false); // Already handled above
break;
case PF_CANT_HANDLE:
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
2021-12-10 20:22:16 +00:00
case PF_EOF:
p->error = err_unexpected_eof_after(APFL_TOK_EXPAND, position);
return false;
2021-12-10 20:22:16 +00:00
case PF_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
assert(false);
2021-12-10 20:22:16 +00:00
}
static bool
must_read_token_after(apfl_parser_ptr p, enum apfl_token_type want_type)
2021-12-10 20:22:16 +00:00
{
enum apfl_token_type cur_type = p->token.type;
struct apfl_position cur_pos = p->token.position;
switch (read_token(p, true)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
p->error = err_unexpected_eof_after(cur_type, cur_pos);
return false;
2021-12-10 20:22:16 +00:00
case APFL_PARSE_ERROR:
return false;
2021-12-10 20:22:16 +00:00
}
if (p->token.type != want_type) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
return false;
}
return true;
}
static bool
parse_stringify(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position position)
{
if (!must_read_token_after(p, APFL_TOK_NAME)) {
return false;
2021-12-10 20:22:16 +00:00
}
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_STRING,
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
.string = apfl_string_move(&p->token.text),
2021-12-10 20:22:16 +00:00
};
fragment->position = position;
return true;
2021-12-10 20:22:16 +00:00
}
static struct fragment
fragment_unwrap_expand(struct apfl_allocator allocator, struct fragment fragment)
{
assert(fragment.type == FRAG_EXPAND);
struct fragment tmp = fragment_move(fragment.expand);
FREE_OBJ(allocator, fragment.expand);
fragment.expand = NULL;
fragment_deinit(allocator, &fragment);
return tmp;
}
static bool fragment_to_param(apfl_parser_ptr, struct fragment, struct apfl_expr_param *);
2021-12-10 20:22:16 +00:00
static bool fragments_to_params(apfl_parser_ptr, struct fragment_list, struct apfl_expr_params *);
2021-12-10 20:22:16 +00:00
static bool
predicate_fragment_to_param(
apfl_parser_ptr p,
struct fragment_lhs_rhs *lhs_rhs,
struct apfl_expr_param *out
) {
out->type = APFL_EXPR_PARAM_PREDICATE;
out->predicate.lhs = NULL;
out->predicate.rhs = NULL;
if ((out->predicate.lhs = ALLOC_OBJ(p->allocator, struct apfl_expr_param)) == NULL) {
2021-12-10 20:22:16 +00:00
goto error;
}
if (!fragment_to_param(p, fragment_move(lhs_rhs->lhs), out->predicate.lhs)) {
FREE_OBJ(p->allocator, out->predicate.lhs);
2021-12-10 20:22:16 +00:00
out->predicate.lhs = NULL;
goto error;
}
out->predicate.rhs = fragment_to_expr_allocated(p, fragment_move(lhs_rhs->rhs));
if (out->predicate.rhs == NULL) {
goto error;
}
return true;
error:
apfl_expr_param_deinit(p->allocator, out);
2021-12-10 20:22:16 +00:00
return false;
}
static bool
fragment_to_param_inner(
2021-12-10 20:22:16 +00:00
apfl_parser_ptr p,
struct fragment *fragment,
struct apfl_expr_param *out
) {
switch (fragment->type) {
case FRAG_EXPAND:
p->error = err_unexpected_token(APFL_TOK_EXPAND, fragment->position);
return false;
case FRAG_CONSTANT:
out->type = APFL_EXPR_PARAM_CONSTANT;
out->constant = apfl_expr_const_move(&fragment->constant);
return true;
case FRAG_NAME:
out->type = APFL_EXPR_PARAM_VAR;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out->var = apfl_string_move(&fragment->name);
2021-12-10 20:22:16 +00:00
return true;
case FRAG_DOT:
p->error = err_unexpected_token(APFL_TOK_DOT, fragment->position);
return false;
case FRAG_AT:
p->error = err_unexpected_token(APFL_TOK_AT, fragment->position);
return false;
case FRAG_PREDICATE:
return predicate_fragment_to_param(p, &fragment->predicate, out);
case FRAG_EXPR:
p->error = (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_EXPRESSION,
.position = fragment->position,
};
return false;
case FRAG_LIST:
out->type = APFL_EXPR_PARAM_LIST;
return fragments_to_params(p, fragment_list_move(&fragment->list), &out->list);
case FRAG_BLANK:
out->type = APFL_EXPR_PARAM_BLANK;
return true;
2021-12-10 20:22:16 +00:00
}
assert(false);
2021-12-10 20:22:16 +00:00
}
static bool
fragment_to_param(
apfl_parser_ptr p,
struct fragment fragment,
struct apfl_expr_param *out
2021-12-10 20:22:16 +00:00
) {
bool ok = fragment_to_param_inner(p, &fragment, out);
fragment_deinit(p->allocator, &fragment);
return ok;
2021-12-10 20:22:16 +00:00
}
static bool
fragments_to_params_inner(
2021-12-10 20:22:16 +00:00
apfl_parser_ptr p,
/*borrowed*/ struct fragment_list fragments,
2021-12-10 20:22:16 +00:00
struct apfl_expr_params *out
) {
out->len = 0;
out->cap = 0;
out->params = NULL;
if (fragments.len == 0) {
return true;
}
out->params = ALLOC_LIST(p->allocator, struct apfl_expr_params_item, fragments.len);
2021-12-10 20:22:16 +00:00
if (out->params == NULL) {
goto error;
}
out->cap = fragments.len;
2021-12-10 20:22:16 +00:00
bool seen_expand = false;
for (size_t i = 0; i < fragments.len; i++) {
struct apfl_expr_params_item *out_item = &out->params[i];
struct fragment item_fragment = fragment_move(&fragments.children[i]);
if (item_fragment.type == FRAG_EXPAND) {
if (seen_expand) {
p->error = (struct apfl_error) {
.type = APFL_ERR_ONLY_ONE_EXPAND_ALLOWED,
.position = item_fragment.position,
};
fragment_deinit(p->allocator, &item_fragment);
goto error;
}
out_item->expand = true;
seen_expand = true;
item_fragment = fragment_unwrap_expand(p->allocator, item_fragment);
} else {
out_item->expand = false;
}
2021-12-10 20:22:16 +00:00
if (!fragment_to_param(
p,
fragment_move(&item_fragment),
&out_item->param
2021-12-10 20:22:16 +00:00
)) {
goto error;
}
out->len++;
}
return true;
2021-12-10 20:22:16 +00:00
error:
apfl_expr_params_deinit(p->allocator, out);
return false;
}
static bool
fragments_to_params(
apfl_parser_ptr p,
struct fragment_list fragments,
struct apfl_expr_params *out
) {
bool ok = fragments_to_params_inner(p, fragments, out);
fragment_list_deinit(p->allocator, &fragments);
return ok;
2021-12-10 20:22:16 +00:00
}
static bool fragment_to_assignable_var_or_member(
apfl_parser_ptr p,
struct fragment *fragment,
struct apfl_expr_assignable_var_or_member *out
) {
struct apfl_expr_assignable_var_or_member *lhs;
struct apfl_expr *rhs;
switch (fragment->type) {
case FRAG_EXPAND:
p->error = err_unexpected_token(APFL_TOK_EXPAND, fragment->position);
return false;
case FRAG_CONSTANT:
p->error = (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_CONSTANT_IN_MEMBER_ACCESS,
.position = fragment->position,
};
return false;
case FRAG_NAME:
out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_VAR,
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
out->var = apfl_string_move(&fragment->name);
return true;
case FRAG_DOT:
lhs = ALLOC_OBJ(p->allocator, struct apfl_expr_assignable_var_or_member);
if (lhs == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return false;
}
if (!fragment_to_assignable_var_or_member(p, fragment->dot.lhs, lhs)) {
FREE_OBJ(p->allocator, lhs);
return false;
}
out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_DOT;
out->dot = (struct apfl_expr_assignable_var_or_member_dot) {
.lhs = lhs,
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
.rhs = apfl_string_move(&fragment->dot.rhs),
};
return true;
case FRAG_AT:
lhs = ALLOC_OBJ(p->allocator, struct apfl_expr_assignable_var_or_member);
rhs = ALLOC_OBJ(p->allocator, struct apfl_expr);
if (lhs == NULL || rhs == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return false;
}
if (!fragment_to_assignable_var_or_member(p, fragment->at.lhs, lhs)) {
FREE_OBJ(p->allocator, lhs);
FREE_OBJ(p->allocator, rhs);
return false;
}
if (!fragment_to_expr(p, fragment_move(fragment->at.rhs), rhs)) {
DESTROY(p->allocator, lhs, apfl_expr_assignable_var_or_member_deinit);
FREE_OBJ(p->allocator, rhs);
return false;
}
out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_AT;
out->at = (struct apfl_expr_assignable_var_or_member_at) {
.lhs = lhs,
.rhs = rhs,
};
return true;
case FRAG_PREDICATE:
p->error = err_unexpected_token(APFL_TOK_QUESTION_MARK, fragment->position);
return false;
case FRAG_EXPR:
p->error = (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_EXPR_IN_MEMBER_ACCESS,
.position = fragment->position,
};
return false;
case FRAG_LIST:
p->error = err_unexpected_token(APFL_TOK_LBRACKET, fragment->position);
return false;
case FRAG_BLANK:
p->error = (struct apfl_error) {
.type = APFL_ERR_UNEXPECTED_BLANK_IN_MEMBER_ACCESS,
.position = fragment->position,
};
return false;
}
assert(false);
return false;
}
2021-12-10 20:22:16 +00:00
static bool
fragment_to_assignable(
apfl_parser_ptr p,
struct fragment fragment,
struct apfl_expr_assignable *out
);
static bool
fragment_list_to_assignable_list_inner(
apfl_parser_ptr p,
/*borrowed*/ struct fragment_list fragment_list,
struct apfl_expr_assignable_list *out
) {
*out = (struct apfl_expr_assignable_list) {
.items = NULL,
.len = 0,
};
if (fragment_list.len == 0) {
return true;
}
if ((out->items = ALLOC_LIST(
p->allocator,
struct apfl_expr_assignable_list_item,
fragment_list.len
)) == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return false;
}
bool expand_ok = true;
for (size_t i = 0; i < fragment_list.len; i++) {
struct apfl_expr_assignable_list_item *out_item = &out->items[i];
struct fragment item_fragment = fragment_move(&fragment_list.children[i]);
if (item_fragment.type == FRAG_EXPAND) {
if (!expand_ok) {
p->error = (struct apfl_error) {
.type = APFL_ERR_ONLY_ONE_EXPAND_ALLOWED,
.position = item_fragment.position,
};
fragment_deinit(p->allocator, &item_fragment);
goto error;
}
out_item->expand = true;
expand_ok = false;
item_fragment = fragment_unwrap_expand(p->allocator, item_fragment);
} else {
out_item->expand = false;
}
if (!fragment_to_assignable(
p,
fragment_move(&item_fragment),
&out_item->assignable
)) {
goto error;
}
out->len++;
}
return true;
error:
DEINIT_CAP_LIST(
p->allocator,
out->items,
out->len,
fragment_list.len,
apfl_expr_assignable_list_item_deinit
);
return false;
}
static bool
fragment_list_to_assignable_list(
apfl_parser_ptr p,
struct fragment_list fragment_list,
struct apfl_expr_assignable_list *out
) {
bool ok = fragment_list_to_assignable_list_inner(p, fragment_list, out);
fragment_list_deinit(p->allocator, &fragment_list);
return ok;
}
static bool
fragment_to_assignable_inner(
apfl_parser_ptr p,
struct fragment *fragment,
struct apfl_expr_assignable *out
2021-12-10 20:22:16 +00:00
) {
struct apfl_expr_assignable_var_or_member var_or_member;
switch (fragment->type) {
2021-12-10 20:22:16 +00:00
case FRAG_EXPAND:
p->error = err_unexpected_token(APFL_TOK_EXPAND, fragment->position);
goto error;
2021-12-10 20:22:16 +00:00
case FRAG_CONSTANT:
out->type = APFL_EXPR_ASSIGNABLE_CONSTANT;
out->constant = apfl_expr_const_move(&fragment->constant);
2021-12-10 20:22:16 +00:00
return true;
case FRAG_NAME:
case FRAG_DOT:
case FRAG_AT:
if (!fragment_to_assignable_var_or_member(p, fragment, &var_or_member)) {
2021-12-10 20:22:16 +00:00
goto error;
}
out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER;
out->var_or_member = var_or_member;
2021-12-10 20:22:16 +00:00
return true;
case FRAG_PREDICATE:
out->type = APFL_EXPR_ASSIGNABLE_PREDICATE;
out->predicate.lhs = ALLOC_OBJ(p->allocator, struct apfl_expr_assignable);
out->predicate.rhs = ALLOC_OBJ(p->allocator, struct apfl_expr);
2021-12-10 20:22:16 +00:00
if (out->predicate.lhs == NULL || out->predicate.rhs == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
if (!fragment_to_assignable(
p,
fragment_move(fragment->at.lhs),
2021-12-10 20:22:16 +00:00
out->predicate.lhs
)) {
FREE_OBJ(p->allocator, out->predicate.rhs);
2021-12-10 20:22:16 +00:00
out->predicate.rhs = NULL;
goto error;
}
if (!fragment_to_expr(
p,
fragment_move(fragment->at.rhs),
2021-12-10 20:22:16 +00:00
out->predicate.rhs
)) {
goto error;
}
return true;
case FRAG_EXPR:
p->error = (struct apfl_error) {
.type = APFL_ERR_INVALID_ASSIGNMENT_LHS,
.position = fragment->position,
2021-12-10 20:22:16 +00:00
};
goto error;
case FRAG_LIST:
out->type = APFL_EXPR_ASSIGNABLE_LIST;
if (!fragment_list_to_assignable_list(
p,
fragment_list_move(&fragment->list),
&out->list
)) {
goto error;
}
2021-12-10 20:22:16 +00:00
return true;
case FRAG_BLANK:
out->type = APFL_EXPR_ASSIGNABLE_BLANK;
2021-12-10 20:22:16 +00:00
return true;
}
assert(false); // Should not be reached
error:
return false;
}
static bool
fragment_to_assignable(
apfl_parser_ptr p,
struct fragment fragment,
struct apfl_expr_assignable *outptr
) {
struct apfl_expr_assignable out = { // Just a value that can be safely deinited
.type = APFL_EXPR_ASSIGNABLE_CONSTANT,
.constant = {
.type = APFL_EXPR_CONST_NIL,
},
};
bool result = fragment_to_assignable_inner(p, &fragment, &out);
fragment_deinit(p->allocator, &fragment);
if (result) {
*outptr = out;
} else {
apfl_expr_assignable_deinit(p->allocator, &out);
}
return result;
}
2021-12-15 22:35:10 +00:00
struct partial_assignment {
struct apfl_expr_assignable lhs;
bool local;
struct apfl_position position;
};
2021-12-10 20:22:16 +00:00
2021-12-15 22:35:10 +00:00
static void
partial_assignment_deinit(struct apfl_allocator allocator, struct partial_assignment *pa)
2021-12-15 22:35:10 +00:00
{
apfl_expr_assignable_deinit(allocator, &pa->lhs);
2021-12-10 20:22:16 +00:00
}
2021-12-15 22:35:10 +00:00
struct partial_assignment_list {
APFL_RESIZABLE_TRAIT(struct partial_assignment, items)
};
static void
partial_assignment_list_deinit(struct apfl_allocator allocator, struct partial_assignment_list *assignments)
{
DEINIT_CAP_LIST(
allocator,
assignments->items,
assignments->len,
assignments->cap,
partial_assignment_deinit
);
}
2021-12-10 20:22:16 +00:00
enum parse_body_or_toplevel_finalize_result {
BODY_FINALIZE_ERROR,
BODY_FINALIZE_OK,
BODY_FINALIZE_EMPTY,
};
static bool
fragment_to_list_item(
apfl_parser_ptr p,
struct fragment fragment,
struct apfl_expr_list_item *out
) {
if (fragment.type == FRAG_EXPAND) {
out->expand = true;
out->expr = fragment_to_expr_allocated(p, fragment_move(fragment.expand));
fragment_deinit(p->allocator, &fragment);
2021-12-10 20:22:16 +00:00
return out->expr != NULL;
} else {
out->expand = false;
out->expr = fragment_to_expr_allocated(p, fragment_move(&fragment));
return out->expr != NULL;
}
}
static bool
fragments_to_call(
apfl_parser_ptr p,
struct fragment_list fragments,
struct apfl_position position,
2021-12-10 20:22:16 +00:00
struct apfl_expr *out
) {
assert(fragments.len > 0);
2021-12-10 20:22:16 +00:00
out->type = APFL_EXPR_CALL;
out->position = position;
2021-12-10 20:22:16 +00:00
out->call.arguments = (struct apfl_expr_list) {
.items = NULL,
.len = 0,
};
out->call.callee = fragment_to_expr_allocated(p, fragment_move(&fragments.children[0]));
2021-12-10 20:22:16 +00:00
if (out->call.callee == NULL) {
goto error;
}
if (fragments.len == 1) {
fragment_list_deinit(p->allocator, &fragments);
2021-12-10 20:22:16 +00:00
return true;
}
if (!fragment_list_to_expr_list(
p,
&fragments,
1,
&out->call.arguments
)) {
goto error;
}
2021-12-10 20:22:16 +00:00
fragment_list_deinit(p->allocator, &fragments);
return true;
2021-12-10 20:22:16 +00:00
error:
fragment_list_deinit(p->allocator, &fragments);
apfl_expr_deinit(p->allocator, out);
2021-12-10 20:22:16 +00:00
return false;
}
static enum parse_body_or_toplevel_finalize_result
parse_body_or_toplevel_finalize(
apfl_parser_ptr p,
struct fragment_list fragments,
2021-12-15 22:35:10 +00:00
struct partial_assignment_list partial_assignments,
2021-12-10 20:22:16 +00:00
struct apfl_expr *out
) {
*out = (struct apfl_expr) {
.type = APFL_EXPR_CONSTANT,
.constant = {
.type = APFL_EXPR_CONST_NIL,
}
};
if (fragments.len == 0) {
2021-12-15 22:35:10 +00:00
if (partial_assignments.len > 0) {
2021-12-10 20:22:16 +00:00
p->error = (struct apfl_error) {
.type = APFL_ERR_EMPTY_ASSIGNMENT,
2021-12-15 22:35:10 +00:00
.position = partial_assignments.items[partial_assignments.len-1].position,
2021-12-10 20:22:16 +00:00
};
goto error;
}
fragment_list_deinit(p->allocator, &fragments);
partial_assignment_list_deinit(p->allocator, &partial_assignments);
2021-12-10 20:22:16 +00:00
return BODY_FINALIZE_EMPTY;
}
2021-12-15 22:35:10 +00:00
struct apfl_expr *dest = out;
for (size_t i = 0; i < partial_assignments.len; i++) {
struct partial_assignment *cur = &partial_assignments.items[i];
dest->type = APFL_EXPR_ASSIGNMENT;
dest->position = cur->position;
dest->assignment.local = cur->local;
dest->assignment.lhs = apfl_expr_assignable_move(&cur->lhs);
if ((dest->assignment.rhs = ALLOC_OBJ(p->allocator, struct apfl_expr)) == NULL) {
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
2021-12-15 22:35:10 +00:00
dest = dest->assignment.rhs;
2021-12-10 20:22:16 +00:00
}
partial_assignment_list_deinit(p->allocator, &partial_assignments);
if (fragments.len == 1) {
if (!fragment_to_expr(p, fragment_move(&fragments.children[0]), dest)) {
2021-12-10 20:22:16 +00:00
goto error;
}
} else {
struct apfl_position position = fragments.children[0].position;
if (!fragments_to_call(p, fragment_list_move(&fragments), position, dest)) {
2021-12-10 20:22:16 +00:00
goto error;
}
}
fragment_list_deinit(p->allocator, &fragments);
return BODY_FINALIZE_OK;
2021-12-10 20:22:16 +00:00
error:
fragment_list_deinit(p->allocator, &fragments);
partial_assignment_list_deinit(p->allocator, &partial_assignments);
apfl_expr_deinit(p->allocator, out);
2021-12-10 20:22:16 +00:00
return BODY_FINALIZE_ERROR;
}
2021-12-15 22:35:10 +00:00
static struct partial_assignment_list
partial_assignment_list_move(struct partial_assignment_list *in)
{
struct partial_assignment_list out = *in;
apfl_resizable_init(APFL_RESIZABLE_ARGS(*in, items));
return out;
}
2021-12-10 20:22:16 +00:00
static enum parse_fragment_result
parse_body_or_toplevel(
apfl_parser_ptr p,
bool handle_eof,
struct fragment_list *fragments,
struct apfl_expr *out,
bool need
2021-12-10 20:22:16 +00:00
) {
2021-12-15 22:35:10 +00:00
struct partial_assignment_list partial_assignments;
apfl_resizable_init(APFL_RESIZABLE_ARGS(partial_assignments, items));
2021-12-10 20:22:16 +00:00
bool first;
2021-12-15 22:35:10 +00:00
first = true;
2021-12-10 20:22:16 +00:00
for (;;) {
for (;;) {
switch (parse_fragment_into_list(p, fragments, need || !first, 0)) {
2021-12-10 20:22:16 +00:00
case PF_OK:
break;
case PF_CANT_HANDLE:
goto break_inner;
case PF_EOF:
if (handle_eof) {
switch (parse_body_or_toplevel_finalize(
p,
fragment_list_move(fragments),
2021-12-15 22:35:10 +00:00
partial_assignment_list_move(&partial_assignments),
2021-12-10 20:22:16 +00:00
out
)) {
case BODY_FINALIZE_OK:
return PF_OK;
case BODY_FINALIZE_ERROR:
goto error;
case BODY_FINALIZE_EMPTY:
return PF_EOF;
}
} else {
return PF_EOF;
}
case PF_ERROR:
goto error;
}
first = false;
2021-12-10 20:22:16 +00:00
}
break_inner:
read_token_after_cant_handle(p);
2021-12-10 20:22:16 +00:00
bool is_rbrace = false;
switch (p->token.type) {
2021-12-10 20:22:16 +00:00
case APFL_TOK_ASSIGN:
case APFL_TOK_LOCAL_ASSIGN:
bool local = p->token.type == APFL_TOK_LOCAL_ASSIGN;
struct apfl_position position = p->token.position;
2021-12-10 20:22:16 +00:00
if (fragments->len == 0) {
p->error = (struct apfl_error) {
.type = APFL_ERR_EMPTY_ASSIGNMENT,
.position = position,
};
goto error;
}
if (fragments->len > 1) {
p->error = err_unexpected_token(
local ? APFL_TOK_LOCAL_ASSIGN : APFL_TOK_ASSIGN,
position
);
goto error;
}
if (!apfl_resizable_ensure_cap_for_more_elements(
p->allocator,
2021-12-15 22:35:10 +00:00
sizeof(struct partial_assignment),
(void **)&partial_assignments.items,
partial_assignments.len,
&partial_assignments.cap,
2021-12-15 22:35:10 +00:00
1
)) {
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
2021-12-15 22:35:10 +00:00
struct partial_assignment *cur_partial = &partial_assignments.items[partial_assignments.len];
cur_partial->local = local;
cur_partial->position = position;
2021-12-10 20:22:16 +00:00
struct fragment fragment = fragment_move(&fragments->children[0]);
fragment_list_deinit(p->allocator, fragments); // Reset fragment list
2021-12-15 22:35:10 +00:00
if (!fragment_to_assignable(p, fragment, &cur_partial->lhs)) {
2021-12-10 20:22:16 +00:00
goto error;
}
2021-12-15 22:35:10 +00:00
partial_assignments.len++;
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_RBRACE:
is_rbrace = true;
unread_token(p);
// fallthrough
2021-12-10 20:22:16 +00:00
case APFL_TOK_LINEBREAK:
case APFL_TOK_SEMICOLON:
switch (parse_body_or_toplevel_finalize(
p,
fragment_list_move(fragments),
2021-12-15 22:35:10 +00:00
partial_assignment_list_move(&partial_assignments),
2021-12-10 20:22:16 +00:00
out
)) {
case BODY_FINALIZE_OK:
return PF_OK;
case BODY_FINALIZE_ERROR:
goto error;
case BODY_FINALIZE_EMPTY:
if (is_rbrace) {
return PF_CANT_HANDLE;
}
2021-12-10 20:22:16 +00:00
// If there was nothing to finalize, we have an empty expression
// that doesn't need to end up in the AST. So let's just
// continue with the outermost loop and try again.
break;
default:
assert(false);
2021-12-10 20:22:16 +00:00
}
break;
2021-12-10 20:22:16 +00:00
default:
2021-12-15 22:35:10 +00:00
if (partial_assignments.len > 0) {
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
}
unread_token(p);
2021-12-10 20:22:16 +00:00
return PF_CANT_HANDLE;
}
}
error:
partial_assignment_list_deinit(p->allocator, &partial_assignments);
return PF_ERROR;
2021-12-10 20:22:16 +00:00
}
2022-04-11 20:44:04 +00:00
static struct apfl_expr_body
init_body(void)
{
2022-04-11 20:44:04 +00:00
return (struct apfl_expr_body) {
.items = NULL,
.len = 0,
.cap = 0,
};
}
static bool
2021-12-10 20:22:16 +00:00
parse_braces(
apfl_parser_ptr p,
struct fragment *out,
struct apfl_position start
) {
struct apfl_expr_complex_func complex_func = {
.subfuncs = NULL,
.len = 0,
.cap = 0,
};
2021-12-10 20:22:16 +00:00
bool has_params = false;
struct apfl_expr_params params = { .params = NULL, .len = 0, .cap = 0 };
2021-12-10 20:22:16 +00:00
struct fragment_list fragments;
apfl_resizable_init(APFL_RESIZABLE_ARGS(fragments, children));
2022-04-11 20:44:04 +00:00
struct apfl_expr_body body = init_body();
2021-12-10 20:22:16 +00:00
for (;;) {
struct apfl_expr expr;
switch (parse_body_or_toplevel(
p,
false,
&fragments,
&expr,
true
2021-12-10 20:22:16 +00:00
)) {
case PF_OK:
if (!apfl_resizable_append(
p->allocator,
2021-12-10 20:22:16 +00:00
sizeof(struct apfl_expr),
2022-04-11 20:44:04 +00:00
(void **)&body.items,
&body.len,
&body.cap,
2021-12-10 20:22:16 +00:00
&expr,
1
)) {
apfl_expr_deinit(p->allocator, &expr);
2021-12-10 20:22:16 +00:00
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
break;
case PF_EOF:
p->error = apfl_error_simple(APFL_ERR_UNEXPECTED_EOF);
goto error;
case PF_ERROR:
goto error;
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
2021-12-10 20:22:16 +00:00
switch (p->token.type) {
2021-12-10 20:22:16 +00:00
case APFL_TOK_RBRACE:
fragment_list_deinit(p->allocator, &fragments);
// TODO: Rather fugly duplication
if (has_params) {
// Finalize previous subfunc and append
if (!apfl_resizable_append(
p->allocator,
sizeof(struct apfl_expr_subfunc),
(void **)&complex_func.subfuncs,
&complex_func.len,
&complex_func.cap,
&(struct apfl_expr_subfunc) {
// TODO: If apfl_resizable_append fails, we're leaking these!
.params = apfl_expr_params_move(&params),
.body = apfl_expr_body_move(&body),
},
1
)) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
params = (struct apfl_expr_params) { .params = NULL, .len = 0, .cap = 0 };
2022-04-11 20:44:04 +00:00
body = init_body();
}
out->type = FRAG_EXPR;
if (has_params) {
out->expr = (struct apfl_expr) {
.type = APFL_EXPR_COMPLEX_FUNC,
.complex_func = apfl_expr_complex_func_move(&complex_func),
.position = start,
};
} else {
out->expr = (struct apfl_expr) {
.type = APFL_EXPR_SIMPLE_FUNC,
.simple_func = apfl_expr_body_move(&body),
.position = start,
};
}
return true;
2021-12-10 20:22:16 +00:00
case APFL_TOK_MAPSTO:
2022-04-11 20:44:04 +00:00
if (body.len > 0 && !has_params) {
2021-12-10 20:22:16 +00:00
p->error = (struct apfl_error) {
.type = APFL_ERR_STATEMENTS_BEFORE_PARAMETERS,
.position = p->token.position,
2021-12-10 20:22:16 +00:00
};
goto error;
}
if (has_params) {
// Finalize previous subfunc and append
if (!apfl_resizable_append(
p->allocator,
2021-12-10 20:22:16 +00:00
sizeof(struct apfl_expr_subfunc),
(void **)&complex_func.subfuncs,
&complex_func.len,
&complex_func.cap,
2021-12-10 20:22:16 +00:00
&(struct apfl_expr_subfunc) {
.params = apfl_expr_params_move(&params),
.body = apfl_expr_body_move(&body),
2021-12-10 20:22:16 +00:00
},
1
)) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
goto error;
}
params = (struct apfl_expr_params) { .params = NULL, .len = 0, .cap = 0 };
2022-04-11 20:44:04 +00:00
body = init_body();
2021-12-10 20:22:16 +00:00
}
if (!fragments_to_params(p, fragment_list_move(&fragments), &params)) {
2021-12-10 20:22:16 +00:00
goto error;
}
has_params = true;
break;
default:
p->error = ERR_UNEXPECTED_TOKEN(p->token);
2021-12-10 20:22:16 +00:00
goto error;
}
break;
}
}
error:
2022-04-11 20:44:04 +00:00
apfl_expr_body_deinit(p->allocator, &body);
apfl_expr_params_deinit(p->allocator, &params);
fragment_list_deinit(p->allocator, &fragments);
apfl_expr_complex_func_deinit(p->allocator, &complex_func);
return false;
2021-12-10 20:22:16 +00:00
}
static enum parse_fragment_result
parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum parse_fragment_flags flags)
{
*fragment = (struct fragment) { // Something we can safely deinit
.type = FRAG_EXPAND,
.expand = NULL,
};
struct fragment *lhs = NULL;
struct fragment *rhs = NULL;
switch (read_token(p, need)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
return PF_EOF;
case APFL_PARSE_ERROR:
return PF_ERROR;
}
switch (p->token.type) {
2021-12-10 20:22:16 +00:00
case APFL_TOK_LPAREN:
if (!parse_parens(p, fragment, p->token.position)) {
goto error;
}
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_LBRACKET:
if (!parse_brackets(p, fragment, p->token.position)) {
goto error;
}
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_LBRACE:
if (!parse_braces(p, fragment, p->token.position)) {
goto error;
}
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_EXPAND:
if (flags & FFLAG_NO_EXPAND) {
unread_token(p);
2021-12-10 20:22:16 +00:00
return PF_CANT_HANDLE;
}
if (!parse_expand(p, fragment, p->token.position)) {
goto error;
}
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_STRINGIFY:
if (!parse_stringify(p, fragment, p->token.position)) {
goto error;
}
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_NUMBER:
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_NUMBER,
.number = p->token.number,
2021-12-10 20:22:16 +00:00
};
fragment->position = p->token.position;
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_NAME:
if (apfl_string_eq(p->token.text, "nil")) {
2021-12-10 20:22:16 +00:00
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_NIL,
};
} else if (apfl_string_eq(p->token.text, "true")) {
2021-12-10 20:22:16 +00:00
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_BOOLEAN,
.boolean = true,
};
} else if (apfl_string_eq(p->token.text, "false")) {
2021-12-10 20:22:16 +00:00
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_BOOLEAN,
.boolean = false,
};
} else if (apfl_string_eq(p->token.text, "_")) {
fragment->type = FRAG_BLANK;
2021-12-10 20:22:16 +00:00
} else {
fragment->type = FRAG_NAME;
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
fragment->name = apfl_string_move(&p->token.text);
2021-12-10 20:22:16 +00:00
}
fragment->position = p->token.position;
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_STRING:
fragment->type = FRAG_CONSTANT;
fragment->constant = (struct apfl_expr_const) {
.type = APFL_EXPR_CONST_STRING,
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
.string = apfl_string_move(&p->token.text),
2021-12-10 20:22:16 +00:00
};
fragment->position = p->token.position;
2021-12-10 20:22:16 +00:00
break;
default:
unread_token(p);
2021-12-10 20:22:16 +00:00
return PF_CANT_HANDLE;
}
for (; !(flags & FFLAG_NO_POSTFIXS); ) {
switch (read_token(p, need)) {
2021-12-10 20:22:16 +00:00
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
return PF_OK;
case APFL_PARSE_ERROR:
fragment_deinit(p->allocator, fragment);
return PF_ERROR;
2021-12-10 20:22:16 +00:00
}
enum apfl_token_type token_type = p->token.type;
struct apfl_position token_pos = p->token.position;
switch (p->token.type) {
2021-12-10 20:22:16 +00:00
case APFL_TOK_DOT:
if (!must_read_token_after(p, APFL_TOK_NAME)) {
return PF_ERROR;
}
if ((lhs = ALLOC_OBJ(p->allocator, struct fragment)) == NULL) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return PF_ERROR;
}
*lhs = *fragment;
fragment->type = FRAG_DOT;
fragment->position = token_pos;
MOVEPTR(fragment->dot.lhs, lhs);
Implement mark&sweep garbage collection and bytecode compilation Instead of the previous refcount base garbage collection, we're now using a basic tri-color mark&sweep collector. This is done to support cyclical value relationships in the future (functions can form cycles, all values implemented up to this point can not). The collector maintains a set of roots and a set of objects (grouped into blocks). The GC enabled objects are no longer allocated manually, but will be allocated by the GC. The GC also wraps an allocator, this way the GC knows, if we ran out of memory and will try to get out of this situation by performing a full collection cycle. The tri-color abstraction was chosen for two reasons: - We don't have to maintain a list of objects that need to be marked, we can simply grab the next grey one. - It should allow us to later implement incremental collection (right now we only do a stop-the-world collection). This also switches to a bytecode based evaluation of the code: We no longer directly evaluate the AST, but first compile it into a series of instructions, that are evaluated in a separate step. This was done in preparation for inplementing functions: We only need to turn a function body into instructions instead of evaluating the node again with each call of the function. Also, since an instruction list is implemented as a GC object, this then removes manual memory management of the function body and it's child nodes. Since the GC and the bytecode go hand in hand, this was done in one (giant) commit. As a downside, we've now lost the ability do do list matching on assignments. I've already started to work on implementing this in the new architecture, but left it out of this commit, as it's already quite a large commit :)
2022-04-11 20:24:22 +00:00
fragment->dot.rhs = apfl_string_move(&p->token.text);
2021-12-10 20:22:16 +00:00
break;
case APFL_TOK_AT:
case APFL_TOK_QUESTION_MARK:
if (
((lhs = ALLOC_OBJ(p->allocator, struct fragment)) == NULL)
|| ((rhs = ALLOC_OBJ(p->allocator, struct fragment)) == NULL)
) {
p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
return PF_ERROR;
}
switch (parse_fragment(p, rhs, true, FFLAG_NO_POSTFIXS | FFLAG_NO_EXPAND)) {
case PF_OK:
break;
case PF_ERROR:
goto error;
case PF_EOF:
p->error = err_unexpected_eof_after(token_type, token_pos);
goto error;
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
p->error = ERR_UNEXPECTED_TOKEN(p->token);
goto error;
}
*lhs = *fragment;
if (token_type == APFL_TOK_AT) {
fragment->type = FRAG_AT;
fragment->position = token_pos;
fragment->at.lhs = lhs;
fragment->at.rhs = rhs;
} else {
assert(token_type == APFL_TOK_QUESTION_MARK);
fragment->type = FRAG_PREDICATE;
fragment->position = token_pos;
fragment->at.lhs = lhs;
fragment->at.rhs = rhs;
}
2021-12-10 20:22:16 +00:00
break;
default:
unread_token(p);
return PF_OK;
2021-12-10 20:22:16 +00:00
}
}
return PF_OK;
error:
DESTROY(p->allocator, lhs, fragment_deinit);
DESTROY(p->allocator, rhs, fragment_deinit);
fragment_deinit(p->allocator, fragment);
return PF_ERROR;
2021-12-10 20:22:16 +00:00
}
enum apfl_parse_result
apfl_parser_next(apfl_parser_ptr p)
{
if (p->has_expr) {
apfl_expr_deinit(p->allocator, &p->expr);
}
struct fragment_list fragments;
apfl_resizable_init(APFL_RESIZABLE_ARGS(fragments, children));
switch (parse_body_or_toplevel(
p,
true,
&fragments,
&p->expr,
false
)) {
case PF_OK:
p->has_expr = true;
fragment_list_deinit(p->allocator, &fragments);
return APFL_PARSE_OK;
case PF_ERROR:
goto error;
case PF_EOF:
fragment_list_deinit(p->allocator, &fragments);
return APFL_PARSE_EOF;
case PF_CANT_HANDLE:
read_token_after_cant_handle(p);
p->error = ERR_UNEXPECTED_TOKEN(p->token);
goto error;
}
assert(false);
error:
fragment_list_deinit(p->allocator, &fragments);
return APFL_PARSE_ERROR;
2021-12-10 20:22:16 +00:00
}
struct apfl_error
apfl_parser_get_error(apfl_parser_ptr p)
{
return p->error;
}
struct apfl_expr
apfl_parser_get_expr(apfl_parser_ptr p)
{
p->has_expr = false;
return p->expr;
}
void
apfl_parser_destroy(apfl_parser_ptr p)
{
2021-12-15 22:35:10 +00:00
if (p == NULL) {
return;
}
if (p->has_expr) {
apfl_expr_deinit(p->allocator, &p->expr);
}
if (p->has_token) {
apfl_token_deinit(p->allocator, &p->token);
}
FREE_OBJ(p->allocator, p);
}