From d81bef91845a0e4b31f9299f79a125644b38d753 Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Tue, 18 Jan 2022 21:18:27 +0100 Subject: [PATCH] parser/tokenizer: Save textual data as refcounted strings This avoids creating refcounted strings during evaluation and makes it easier to use the same parsed string in multiple places (should be useful once we implement functions). --- src/apfl.h | 19 ++++++++----- src/eval.c | 68 ++++++++++++++------------------------------ src/expr.c | 61 ++++++++++++++++++++++++++------------- src/parser.c | 30 +++++++++---------- src/parser_test.c | 6 ++-- src/strings.c | 7 +++++ src/token.c | 4 +-- src/tokenizer.c | 44 +++++++++++++++++++++++++--- src/tokenizer_test.c | 6 ++-- 9 files changed, 147 insertions(+), 98 deletions(-) diff --git a/src/apfl.h b/src/apfl.h index c64e64d..2f2e9ff 100644 --- a/src/apfl.h +++ b/src/apfl.h @@ -95,6 +95,11 @@ apfl_refcounted_string apfl_refcounted_string_incref(apfl_refcounted_string); */ void apfl_refcounted_string_unref(apfl_refcounted_string ); +/* Like apfl_refcounted_string_unref, but accepts a pointer to a refcounted_string. + * The pointed to value will be set to NULL. + */ +void apfl_refcounted_string_unref_ptr(apfl_refcounted_string *); + // Tokens enum apfl_token_type { @@ -126,7 +131,7 @@ struct apfl_token { enum apfl_token_type type; struct apfl_position position; union { - struct apfl_string text; + apfl_refcounted_string text; apfl_number number; }; }; @@ -241,7 +246,7 @@ struct apfl_expr_const { union { // variant nil is without data bool boolean; - struct apfl_string string; + apfl_refcounted_string string; apfl_number number; }; }; @@ -273,7 +278,7 @@ struct apfl_expr_param { enum apfl_expr_param_type type; union { - struct apfl_string var; + apfl_refcounted_string var; struct apfl_expr_const constant; struct apfl_expr_param_predicate predicate; struct apfl_expr_params list; @@ -312,7 +317,7 @@ enum apfl_expr_assignable_var_or_member_type { struct apfl_expr_assignable_var_or_member_dot { struct apfl_expr_assignable_var_or_member *lhs; - struct apfl_string rhs; + apfl_refcounted_string rhs; }; struct apfl_expr_assignable_var_or_member_at { struct apfl_expr_assignable_var_or_member *lhs; @@ -323,7 +328,7 @@ struct apfl_expr_assignable_var_or_member { enum apfl_expr_assignable_var_or_member_type type; union { - struct apfl_string var; + apfl_refcounted_string var; struct apfl_expr_assignable_var_or_member_dot dot; struct apfl_expr_assignable_var_or_member_at at; }; @@ -362,7 +367,7 @@ struct apfl_expr_assignment { struct apfl_expr_dot { struct apfl_expr *lhs; - struct apfl_string rhs; + apfl_refcounted_string rhs; }; struct apfl_expr_at { @@ -383,7 +388,7 @@ struct apfl_expr { struct apfl_expr_dot dot; struct apfl_expr_at at; struct apfl_expr_const constant; - struct apfl_string var; + apfl_refcounted_string var; // blank has no further data }; diff --git a/src/eval.c b/src/eval.c index ed5dcda..c142042 100644 --- a/src/eval.c +++ b/src/eval.c @@ -254,37 +254,31 @@ ctx_get_var(apfl_ctx ctx, apfl_refcounted_string name) return ok ? var : NULL; } -static bool -constant_to_value(struct apfl_expr_const *constant, struct apfl_value *value) +static struct apfl_value +constant_to_value(struct apfl_expr_const *constant) { - apfl_refcounted_string rcstring; - switch (constant->type) { case APFL_EXPR_CONST_NIL: - value->type = APFL_VALUE_NIL; - return true; + return (struct apfl_value) { .type = APFL_VALUE_NIL }; case APFL_EXPR_CONST_BOOLEAN: - value->type = APFL_VALUE_BOOLEAN; - value->boolean = constant->boolean; - return true; + return (struct apfl_value) { + .type = APFL_VALUE_BOOLEAN, + .boolean = constant->boolean, + }; case APFL_EXPR_CONST_STRING: - // TODO: Moving the string will become a problem when we're evaluating the same AST node twice. - // The parser probably should already return rcstrings. - rcstring = apfl_string_move_into_new_refcounted(&constant->string); - if (rcstring == NULL) { - return false; - } - value->type = APFL_VALUE_STRING; - value->string = rcstring; - return true; + return (struct apfl_value) { + .type = APFL_VALUE_STRING, + .string = apfl_refcounted_string_incref(constant->string), + }; case APFL_EXPR_CONST_NUMBER: - value->type = APFL_VALUE_NUMBER; - value->number = constant->number; - return true; + return (struct apfl_value) { + .type = APFL_VALUE_NUMBER, + .number = constant->number, + }; } assert(false); - return false; + return (struct apfl_value) { .type = APFL_VALUE_NIL }; } static bool @@ -468,7 +462,6 @@ match_pattern_from_assignable_inner( struct apfl_expr_assignable *assignable, struct match_pattern *pattern ) { - struct apfl_value value; struct apfl_result result; pattern->type = MPATTERN_BLANK; @@ -482,14 +475,11 @@ next: case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER: return match_pattern_from_var_or_member(ctx, &assignable->var_or_member, pattern); case APFL_EXPR_ASSIGNABLE_CONSTANT: - if (!constant_to_value(&assignable->constant, &value)) { - return MATCH_FATAL_ERROR; - } if (!match_pattern_add_constraint( pattern, &constraints_cap, MPATTERN_CONSTRAINT_EQUALS, - value + constant_to_value(&assignable->constant) )) { return MATCH_FATAL_ERROR; } @@ -935,14 +925,9 @@ fatal(void) static struct apfl_result evaluate_constant(struct apfl_expr_const *constant) { - struct apfl_value value; - if (!constant_to_value(constant, &value)) { - return fatal(); - } - return (struct apfl_result) { .type = APFL_RESULT_OK, - .value = value, + .value = constant_to_value(constant), }; } @@ -1086,13 +1071,9 @@ evaluate_dot(apfl_ctx ctx, struct apfl_expr_dot *dot) } struct apfl_value lhs = result.value; - apfl_refcounted_string rcstring = apfl_string_move_into_new_refcounted(&dot->rhs); - if (rcstring == NULL) { - return fatal(); - } struct apfl_value key = (struct apfl_value) { .type = APFL_VALUE_STRING, - .string = rcstring, + .string = apfl_refcounted_string_incref(dot->rhs), }; struct apfl_value out; @@ -1221,14 +1202,9 @@ evaluate_assignment(apfl_ctx ctx, struct apfl_expr_assignment *assignment) } static struct apfl_result -evaluate_var(apfl_ctx ctx, struct apfl_string *varname) +evaluate_var(apfl_ctx ctx, apfl_refcounted_string varname) { - apfl_refcounted_string rcstring = apfl_string_move_into_new_refcounted(varname); - if (rcstring == NULL) { - return fatal(); - } - - variable var = ctx_get_var(ctx, rcstring); + variable var = ctx_get_var(ctx, varname); if (var == NULL) { return (struct apfl_result) { .type = APFL_RESULT_ERR }; } @@ -1258,7 +1234,7 @@ evaluate(apfl_ctx ctx, struct apfl_expr *expr) case APFL_EXPR_ASSIGNMENT: return evaluate_assignment(ctx, &expr->assignment); case APFL_EXPR_VAR: - return evaluate_var(ctx, &expr->var); + return evaluate_var(ctx, apfl_refcounted_string_incref(expr->var)); case APFL_EXPR_BLANK: return (struct apfl_result) { .type = APFL_RESULT_OK, diff --git a/src/expr.c b/src/expr.c index 0313cf9..c7c71d3 100644 --- a/src/expr.c +++ b/src/expr.c @@ -39,7 +39,8 @@ apfl_expr_deinit(struct apfl_expr *expr) apfl_expr_const_deinit(&expr->constant); break; case APFL_EXPR_VAR: - apfl_string_deinit(&expr->var); + apfl_refcounted_string_unref(expr->var); + expr->var = NULL; break; case APFL_EXPR_BLANK: // nop @@ -95,7 +96,8 @@ apfl_expr_const_deinit(struct apfl_expr_const *constant) // nop break; case APFL_EXPR_CONST_STRING: - apfl_string_deinit(&constant->string); + apfl_refcounted_string_unref(constant->string); + constant->string = NULL; break; } } @@ -135,7 +137,8 @@ apfl_expr_param_deinit(struct apfl_expr_param *param) { switch (param->type) { case APFL_EXPR_PARAM_VAR: - apfl_string_deinit(¶m->var); + apfl_refcounted_string_unref(param->var); + param->var = NULL; break; case APFL_EXPR_PARAM_CONSTANT: apfl_expr_const_deinit(¶m->constant); @@ -187,7 +190,8 @@ void apfl_expr_assignable_var_or_member_dot_deinit(struct apfl_expr_assignable_var_or_member_dot *dot) { DESTROY(dot->lhs, apfl_expr_assignable_var_or_member_deinit); - apfl_string_deinit(&dot->rhs); + apfl_refcounted_string_unref(dot->rhs); + dot->rhs = NULL; } void @@ -203,7 +207,8 @@ apfl_expr_assignable_var_or_member_deinit(struct apfl_expr_assignable_var_or_mem { switch (var_or_member->type) { case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_VAR: - apfl_string_deinit(&var_or_member->var); + apfl_refcounted_string_unref(var_or_member->var); + var_or_member->var = NULL; break; case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_DOT: apfl_expr_assignable_var_or_member_dot_deinit(&var_or_member->dot); @@ -247,7 +252,8 @@ void apfl_expr_dot_deinit(struct apfl_expr_dot *dot) { DESTROY(dot->lhs, apfl_expr_deinit); - apfl_string_deinit(&dot->rhs); + apfl_refcounted_string_unref(dot->rhs); + dot->rhs = NULL; } void @@ -291,7 +297,7 @@ apfl_expr_move(struct apfl_expr *in) out.constant = apfl_expr_const_move(&in->constant); break; case APFL_EXPR_VAR: - out.var = apfl_string_move(&in->var); + MOVEPTR(out.var, in->var); break; case APFL_EXPR_BLANK: // nop @@ -371,7 +377,7 @@ apfl_expr_const_move(struct apfl_expr_const *in) // nop break; case APFL_EXPR_CONST_STRING: - out.string = apfl_string_move(&in->string); + MOVEPTR(out.string,in->string); } return out; @@ -413,7 +419,7 @@ apfl_expr_param_move(struct apfl_expr_param *in) struct apfl_expr_param out = *in; switch (in->type) { case APFL_EXPR_PARAM_VAR: - out.var = apfl_string_move(&in->var); + MOVEPTR(out.var, in->var); break; case APFL_EXPR_PARAM_CONSTANT: out.constant = apfl_expr_const_move(&in->constant); @@ -454,11 +460,11 @@ apfl_expr_assignable_var_or_member_move(struct apfl_expr_assignable_var_or_membe struct apfl_expr_assignable_var_or_member out = *in; switch (in->type) { case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_VAR: - out.var = apfl_string_move(&in->var); + MOVEPTR(out.var, in->var); break; case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_DOT: MOVEPTR(out.dot.lhs, in->dot.lhs); - out.dot.rhs = apfl_string_move(&in->dot.rhs); + MOVEPTR(out.dot.rhs, in->dot.rhs); break; case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_AT: MOVEPTR(out.at.lhs, in->at.lhs); @@ -523,7 +529,7 @@ apfl_expr_dot_move(struct apfl_expr_dot *in) { struct apfl_expr_dot out; MOVEPTR(out.lhs, in->lhs); - out.rhs = apfl_string_move(&in->rhs); + MOVEPTR(out.rhs, in->rhs); return out; } @@ -564,6 +570,8 @@ print_body(struct apfl_expr_body *body, unsigned indent, FILE *f) static void print_constant_with_pos(struct apfl_expr_const constant, struct apfl_position pos, unsigned indent, FILE *f) { + struct apfl_string_view sv; + switch (constant.type) { case APFL_EXPR_CONST_NIL: apfl_print_indented(indent, f, "Const (nil) @ " POSFMT "\n", POSARGS(pos)); @@ -572,7 +580,8 @@ print_constant_with_pos(struct apfl_expr_const constant, struct apfl_position po apfl_print_indented(indent, f, "Const (%s) @ " POSFMT "\n", constant.boolean ? "true" : "false", POSARGS(pos)); break; case APFL_EXPR_CONST_STRING: - apfl_print_indented(indent, f, "Const (" APFL_STR_FMT ") @ " POSFMT "\n", APFL_STR_FMT_ARGS(constant.string), POSARGS(pos)); + sv = apfl_string_view_from(constant.string); + apfl_print_indented(indent, f, "Const (" APFL_STR_FMT ") @ " POSFMT "\n", APFL_STR_FMT_ARGS(sv), POSARGS(pos)); break; case APFL_EXPR_CONST_NUMBER: apfl_print_indented(indent, f, "Const (%f) @ " POSFMT "\n", constant.number, POSARGS(pos)); @@ -583,6 +592,8 @@ print_constant_with_pos(struct apfl_expr_const constant, struct apfl_position po static void print_constant(struct apfl_expr_const constant, unsigned indent, FILE *f) { + struct apfl_string_view sv; + switch (constant.type) { case APFL_EXPR_CONST_NIL: apfl_print_indented(indent, f, "Const (nil)\n"); @@ -591,7 +602,8 @@ print_constant(struct apfl_expr_const constant, unsigned indent, FILE *f) apfl_print_indented(indent, f, "Const (%s)\n", constant.boolean ? "true" : "false"); break; case APFL_EXPR_CONST_STRING: - apfl_print_indented(indent, f, "Const (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(constant.string)); + sv = apfl_string_view_from(constant.string); + apfl_print_indented(indent, f, "Const (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(sv)); break; case APFL_EXPR_CONST_NUMBER: apfl_print_indented(indent, f, "Const (%f)\n", constant.number); @@ -614,9 +626,12 @@ print_params_item(struct apfl_expr_params_item *item, unsigned indent, FILE *f) static void print_param(struct apfl_expr_param *param, unsigned indent, FILE *f) { + struct apfl_string_view sv; + switch (param->type) { case APFL_EXPR_PARAM_VAR: - apfl_print_indented(indent, f, "Var (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(param->var)); + sv = apfl_string_view_from(param->var); + apfl_print_indented(indent, f, "Var (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(sv)); break; case APFL_EXPR_PARAM_CONSTANT: print_constant(param->constant, indent, f); @@ -643,12 +658,16 @@ print_param(struct apfl_expr_param *param, unsigned indent, FILE *f) static void print_assignable_var_or_member(struct apfl_expr_assignable_var_or_member var_or_member, unsigned indent, FILE *f) { + struct apfl_string_view sv; + switch (var_or_member.type) { case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_VAR: - apfl_print_indented(indent, f, "Variable (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(var_or_member.var)); + sv = apfl_string_view_from(var_or_member.var); + apfl_print_indented(indent, f, "Variable (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(sv)); break; case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_DOT: - apfl_print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(var_or_member.dot.rhs)); + sv = apfl_string_view_from(var_or_member.dot.rhs); + apfl_print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(sv)); print_assignable_var_or_member(*var_or_member.dot.lhs, indent+1, f); break; case APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_AT: @@ -699,6 +718,8 @@ print_assignable(struct apfl_expr_assignable assignable, unsigned indent, FILE * static void print_expr(struct apfl_expr *expr, unsigned indent, FILE *f) { + struct apfl_string_view sv; + switch (expr->type) { case APFL_EXPR_LIST: apfl_print_indented(indent, f, "List @ " POSFMT "\n", POSARGS(expr->position)); @@ -746,7 +767,8 @@ print_expr(struct apfl_expr *expr, unsigned indent, FILE *f) print_expr(expr->assignment.rhs, indent+2, f); break; case APFL_EXPR_DOT: - apfl_print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(expr->dot.rhs)); + sv = apfl_string_view_from(expr->dot.rhs); + apfl_print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(sv)); print_expr(expr->dot.lhs, indent+1, f); break; case APFL_EXPR_AT: @@ -760,7 +782,8 @@ print_expr(struct apfl_expr *expr, unsigned indent, FILE *f) print_constant_with_pos(expr->constant, expr->position, indent, f); break; case APFL_EXPR_VAR: - apfl_print_indented(indent, f, "Var (" APFL_STR_FMT ") @ " POSFMT "\n", APFL_STR_FMT_ARGS(expr->var), POSARGS(expr->position)); + sv = apfl_string_view_from(expr->var); + apfl_print_indented(indent, f, "Var (" APFL_STR_FMT ") @ " POSFMT "\n", APFL_STR_FMT_ARGS(sv), POSARGS(expr->position)); break; case APFL_EXPR_BLANK: apfl_print_indented(indent, f, "Blank (_)\n"); diff --git a/src/parser.c b/src/parser.c index 7f5230b..f817d9c 100644 --- a/src/parser.c +++ b/src/parser.c @@ -46,7 +46,7 @@ enum fragment_type { struct fragment_dot { struct fragment *lhs; - struct apfl_string rhs; + apfl_refcounted_string rhs; }; struct fragment_lhs_rhs { @@ -64,7 +64,7 @@ struct fragment { union { struct fragment *expand; struct apfl_expr_const constant; - struct apfl_string name; + apfl_refcounted_string name; struct fragment_dot dot; struct fragment_lhs_rhs at; struct fragment_lhs_rhs predicate; @@ -131,11 +131,11 @@ fragment_deinit(struct fragment *fragment) apfl_expr_const_deinit(&fragment->constant); break; case FRAG_NAME: - apfl_string_deinit(&fragment->name); + apfl_refcounted_string_unref_ptr(&fragment->name); break; case FRAG_DOT: DESTROY(fragment->dot.lhs, fragment_deinit); - apfl_string_deinit(&fragment->dot.rhs); + apfl_refcounted_string_unref_ptr(&fragment->dot.rhs); break; case FRAG_AT: deinit_fragment_lhs_rhs(&fragment->at); @@ -160,7 +160,7 @@ fragment_dot_move(struct fragment_dot *in) { struct fragment_dot out; MOVEPTR(out.lhs, in->lhs); - out.rhs = apfl_string_move(&in->rhs); + MOVEPTR(out.rhs, in->rhs); return out; } @@ -196,7 +196,7 @@ fragment_move(struct fragment *in) out.constant = apfl_expr_const_move(&in->constant); break; case FRAG_NAME: - out.name = apfl_string_move(&in->name); + MOVEPTR(out.name, in->name); break; case FRAG_DOT: out.dot = fragment_dot_move(&in->dot); @@ -591,7 +591,7 @@ fragment_to_expr_inner(apfl_parser_ptr p, struct fragment *fragment, struct apfl return true; case FRAG_NAME: out->type = APFL_EXPR_VAR; - out->var = apfl_string_move(&fragment->name); + out->var = apfl_refcounted_string_incref(fragment->name); out->position = fragment->position; return true; case FRAG_DOT: @@ -599,7 +599,7 @@ fragment_to_expr_inner(apfl_parser_ptr p, struct fragment *fragment, struct apfl if ((out->dot.lhs = fragment_to_expr_allocated(p, fragment_move(fragment->dot.lhs))) == NULL) { return false; } - out->dot.rhs = apfl_string_move(&fragment->dot.rhs); + out->dot.rhs = apfl_refcounted_string_incref(fragment->dot.rhs); out->position = fragment->position; return true; case FRAG_AT: @@ -977,7 +977,7 @@ parse_stringify(apfl_parser_ptr p, struct fragment *fragment, struct apfl_positi fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_STRING, - .string = apfl_string_move(&p->token.text), + .string = apfl_refcounted_string_incref(p->token.text), }; fragment->position = position; return true; @@ -1046,7 +1046,7 @@ fragment_to_param_inner( return true; case FRAG_NAME: out->type = APFL_EXPR_PARAM_VAR; - out->var = apfl_string_move(&fragment->name); + out->var = apfl_refcounted_string_incref(fragment->name); return true; case FRAG_DOT: p->error = err_unexpected_token(APFL_TOK_DOT, fragment->position); @@ -1174,7 +1174,7 @@ static bool fragment_to_assignable_var_or_member( return false; case FRAG_NAME: out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_VAR, - out->var = apfl_string_move(&fragment->name); + out->var = apfl_refcounted_string_incref(fragment->name); return true; case FRAG_DOT: lhs = ALLOC(struct apfl_expr_assignable_var_or_member); @@ -1189,7 +1189,7 @@ static bool fragment_to_assignable_var_or_member( out->type = APFL_EXPR_ASSIGNABLE_VAR_OR_MEMBER_DOT; out->dot = (struct apfl_expr_assignable_var_or_member_dot) { .lhs = lhs, - .rhs = apfl_string_move(&fragment->dot.rhs), + .rhs = apfl_refcounted_string_incref(fragment->dot.rhs), }; return true; case FRAG_AT: @@ -1939,7 +1939,7 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par fragment->type = FRAG_BLANK; } else { fragment->type = FRAG_NAME; - fragment->name = apfl_string_move(&p->token.text); + fragment->name = apfl_refcounted_string_incref(p->token.text); } fragment->position = p->token.position; break; @@ -1947,7 +1947,7 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_STRING, - .string = apfl_string_move(&p->token.text), + .string = apfl_refcounted_string_incref(p->token.text), }; fragment->position = p->token.position; break; @@ -1984,7 +1984,7 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par fragment->type = FRAG_DOT; fragment->position = token_pos; MOVEPTR(fragment->dot.lhs, lhs); - fragment->dot.rhs = apfl_string_move(&p->token.text); + fragment->dot.rhs = apfl_refcounted_string_incref(p->token.text); break; case APFL_TOK_AT: diff --git a/src/parser_test.c b/src/parser_test.c index 6176422..a678519 100644 --- a/src/parser_test.c +++ b/src/parser_test.c @@ -108,11 +108,11 @@ expect_error_of_type(struct parser_test *pt, enum apfl_error_type want) } } -static struct apfl_string +static apfl_refcounted_string new_string(struct parser_test *pt, const char *in) { - struct apfl_string out = apfl_string_blank(); - if (!apfl_string_copy(&out, apfl_string_view_from(in))) { + apfl_refcounted_string out = apfl_string_copy_into_new_refcounted(apfl_string_view_from(in)); + if (out == NULL) { test_fatalf(pt->t, "Failed copying string in new_string"); } return out; diff --git a/src/strings.c b/src/strings.c index 2e524af..9bbb288 100644 --- a/src/strings.c +++ b/src/strings.c @@ -208,3 +208,10 @@ apfl_refcounted_string_unref(apfl_refcounted_string rcstring) free(rcstring); } } + +void +apfl_refcounted_string_unref_ptr(apfl_refcounted_string *rcstring_ptr) +{ + apfl_refcounted_string_unref(*rcstring_ptr); + *rcstring_ptr = NULL; +} diff --git a/src/token.c b/src/token.c index 641def2..e95aeb8 100644 --- a/src/token.c +++ b/src/token.c @@ -32,7 +32,7 @@ void apfl_token_deinit(struct apfl_token *token) { if (has_text_data(token->type)) { - apfl_string_deinit(&token->text); + apfl_refcounted_string_unref_ptr(&token->text); } } @@ -97,7 +97,7 @@ apfl_token_print(struct apfl_token token, FILE *file) file, "%s (" APFL_STR_FMT ") @ (%d:%d)\n", apfl_token_type_name(token.type), - APFL_STR_FMT_ARGS(token.text), + APFL_STR_FMT_ARGS(apfl_string_view_from(token.text)), token.position.line, token.position.col ); diff --git a/src/tokenizer.c b/src/tokenizer.c index 4e503db..5639596 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -256,6 +256,18 @@ apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need) } } +static apfl_refcounted_string +rcstring_from_string_builder(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *builder) +{ + struct apfl_string string = apfl_string_builder_move_string(builder); + apfl_refcounted_string rcstring = apfl_string_move_into_new_refcounted(&string); + if (rcstring == NULL) { + apfl_string_deinit(&string); + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + } + return rcstring; +} + static enum apfl_parse_result comment(apfl_tokenizer_ptr tokenizer) { @@ -268,6 +280,8 @@ comment(apfl_tokenizer_ptr tokenizer) apfl_string_builder_init(&text); for (;;) { + apfl_refcounted_string rcstring; + last_pos = tokenizer->position; switch (read_byte(tokenizer, &byte, true)) { @@ -276,11 +290,16 @@ comment(apfl_tokenizer_ptr tokenizer) case RR_ERR: return APFL_PARSE_ERROR; case RR_EOF: + rcstring = rcstring_from_string_builder(tokenizer, &text); + if (rcstring == NULL) { + return APFL_PARSE_ERROR; + } + tokenizer->next_mode = NM_EOF; tokenizer->token = (struct apfl_token) { .type = APFL_TOK_COMMENT, .position = pos, - .text = apfl_string_builder_move_string(&text), + .text = rcstring, }; return APFL_PARSE_OK; } @@ -288,10 +307,15 @@ comment(apfl_tokenizer_ptr tokenizer) if (byte == '\n') { unread_byte(tokenizer, last_pos); + rcstring = rcstring_from_string_builder(tokenizer, &text); + if (rcstring == NULL) { + return APFL_PARSE_ERROR; + } + tokenizer->token = (struct apfl_token) { .type = APFL_TOK_COMMENT, .position = pos, - .text = apfl_string_builder_move_string(&text), + .text = rcstring, }; return APFL_PARSE_OK; } @@ -564,12 +588,19 @@ inner_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text) return APFL_PARSE_ERROR; } + apfl_refcounted_string rcstring; + switch (byte) { case '"': + rcstring = rcstring_from_string_builder(tokenizer, text); + if (rcstring == NULL) { + return APFL_PARSE_ERROR; + } + tokenizer->token = (struct apfl_token) { .type = APFL_TOK_STRING, .position = pos, - .text = apfl_string_builder_move_string(text), + .text = rcstring, }; return APFL_PARSE_OK; case '\\': @@ -613,10 +644,15 @@ finalize_maybe_name( .position = pos, }; } else { + apfl_refcounted_string rcstring = rcstring_from_string_builder(tokenizer, text); + if (rcstring == NULL) { + return APFL_PARSE_ERROR; + } + tokenizer->token = (struct apfl_token) { .type = APFL_TOK_NAME, .position = pos, - .text = apfl_string_builder_move_string(text), + .text = rcstring, }; } diff --git a/src/tokenizer_test.c b/src/tokenizer_test.c index 0dab30f..7fb9786 100644 --- a/src/tokenizer_test.c +++ b/src/tokenizer_test.c @@ -117,7 +117,8 @@ expect_text_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_ struct apfl_token tok; if (expect_token(tt, line, col, type, &tok)) { if (!apfl_string_eq(text, tok.text)) { - test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(tok.text), text); + struct apfl_string_view sv = apfl_string_view_from(tok.text); + test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text); } apfl_token_deinit(&tok); } @@ -129,7 +130,8 @@ expect_text_token_sv(struct tokenizer_test *tt, int line, int col, enum apfl_tok struct apfl_token tok; if (expect_token(tt, line, col, type, &tok)) { if (!apfl_string_eq(text, tok.text)) { - test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(tok.text), text); + struct apfl_string_view sv = apfl_string_view_from(tok.text); + test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text); } apfl_token_deinit(&tok); }