From 09d51b90804295f265197a830806446ba016599c Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Fri, 10 Dec 2021 22:13:39 +0100 Subject: [PATCH] Parser: Make the parser object own the token This simplifies the code a bit, since the methods now don't have to deinit the token themselves any more. --- src/parser.c | 211 ++++++++++++++++++++++----------------------------- 1 file changed, 89 insertions(+), 122 deletions(-) diff --git a/src/parser.c b/src/parser.c index dd4e104..d0686dc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -13,8 +13,9 @@ struct apfl_parser { struct apfl_error error; bool eof; + bool has_token; bool has_unread; - struct apfl_token unread_buf; + struct apfl_token token; }; enum parse_fragment_result { @@ -217,6 +218,7 @@ apfl_parser_new(struct apfl_parser_token_source token_source) p->token_source = token_source; p->eof = false; + p->has_token = false; p->has_unread = false; return p; @@ -303,45 +305,46 @@ get_preprocessed_token(apfl_parser_ptr p, struct apfl_token *token, bool need) } static enum apfl_parse_result -read_token(apfl_parser_ptr p, struct apfl_token *token, bool need) +read_token(apfl_parser_ptr p, bool need) { if (p->eof) { return APFL_PARSE_EOF; } if (p->has_unread) { - *token = p->unread_buf; p->has_unread = false; return APFL_PARSE_OK; } - enum apfl_parse_result result = get_preprocessed_token(p, token, need); + + if (p->has_token) { + apfl_token_deinit(&p->token); + } + + enum apfl_parse_result result = get_preprocessed_token(p, &p->token, need); p->eof = result == APFL_PARSE_EOF; + p->has_token = result == APFL_PARSE_OK; return result; } static void -unread_token(apfl_parser_ptr p, struct apfl_token token) +unread_token(apfl_parser_ptr p) { - if (p->has_unread) { - assert(false); // TODO: Or should we return a regular error? - } - - p->unread_buf = token; + assert(!p->eof); + assert(p->has_token); + assert(!p->has_unread); p->has_unread = true; } // Must only be called after an PF_CANT_HANDLE! -static struct apfl_token +static void read_token_after_cant_handle(apfl_parser_ptr p) { - struct apfl_token token; - // A function that returns PF_CANT_HANDLE always unreads a token, so we are // guaranteed to have at least one token. - assert(read_token(p, &token, true) == APFL_PARSE_OK); + assert(read_token(p, true) == APFL_PARSE_OK); - return token; + return &p->token; } static struct apfl_error @@ -360,10 +363,7 @@ err_unexpected_token(enum apfl_token_type token_type, struct apfl_position pos) static enum parse_fragment_result unexpected_cant_handle(apfl_parser_ptr p) { - struct apfl_token token = read_token_after_cant_handle(p); - - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + p->error = ERR_UNEXPECTED_TOKEN(p->token); return PF_ERROR; } @@ -415,9 +415,9 @@ parse_parens_tail(apfl_parser_ptr p, struct fragment_list *children, struct apfl } assert(result == PF_CANT_HANDLE); - struct apfl_token token = read_token_after_cant_handle(p); + read_token_after_cant_handle(p); - if (token->type == APFL_TOK_RPAREN) { + if (p->token.type == APFL_TOK_RPAREN) { result = PF_OK; // \mystuff\TODO:finalize list somehow? } else { @@ -427,7 +427,6 @@ parse_parens_tail(apfl_parser_ptr p, struct fragment_list *children, struct apfl result = PF_ERROR; } - apfl_token_deinit(&token); return result; } @@ -464,17 +463,18 @@ fail: static enum parse_fragment_result skip_inner_bracket_separators(apfl_parser_ptr p) { - struct apfl_token token; - for (;;) { - switch (read_token(t, &token, true)) { + switch (read_token(p, true)) { case APFL_PARSE_OK: - if (token.type == APFL_TOK_COMMA || token.type == APFL_TOK_LINEBREAK || token.type == APFL_TOK_SEMICOLON) { - apfl_token_deinit(&token); + if ( + p->token.type == APFL_TOK_COMMA + || p->token.type == APFL_TOK_LINEBREAK + || p->token.type == APFL_TOK_SEMICOLON + ) { break; // Note: breaks switch, continues loop } - unread_token(p, token); + unread_token(p); return PF_OK; case APFL_PARSE_EOF: return PF_OK; @@ -496,8 +496,7 @@ parse_empty_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position p return result; } - struct apfl_token token; - switch (read_token(p, &token, true)) { + switch (read_token(p, true)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -508,26 +507,23 @@ parse_empty_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position p return PF_ERROR; } - if (token.type != APFL_TOK_RBRACKET) { - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + if (p->token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(p->token); return PF_ERROR; } - apfl_token_deinit(&token); out->type = FRAG_EMPTY_DICT; out->position = position; return PF_OK; } +// Must only be called after PF_CANT_HANDLE static enum parse_fragment_result -parse_empty_list_or_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_token token, struct apfl_position position) +parse_empty_list_or_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position position) { - enum apfl_token_type token_type = token.type; - struct apfl_position token_position = token.position; - apfl_token_deinit(&token); + assert(p->has_token); - switch (token_type) { + switch (p->token.type) { case APFL_TOK_RBRACKET: out->type = FRAG_EMPTY_LIST; out->position = position; @@ -535,11 +531,9 @@ parse_empty_list_or_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_to case APFL_TOK_MAPSTO: return parse_empty_dict(p, out, position); default: - p->error = err_unexpected_token(token_type, token_position); + p->error = ERR_UNEXPECTED_TOKEN(p->token); return PF_ERROR; } - - apfl_token_deinit(&token); } static struct apfl_error @@ -582,8 +576,6 @@ parse_dict( struct apfl_position mapsto_pos, struct apfl_position start ) { - struct apfl_token token; - struct fragment value; bool cleanup_key = true; @@ -624,7 +616,7 @@ parse_dict( goto error; } - switch (read_token(p, &token, true)) { + switch (read_token(p, true)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -636,13 +628,12 @@ parse_dict( goto error; } - if (token.type != APFL_TOK_MAPSTO) { - unread_token(p, token); + if (p->token.type != APFL_TOK_MAPSTO) { + unread_token(p); goto error; } - mapsto_pos = token.position; - apfl_token_deinit(&token); + mapsto_pos = p->token.position; after_mapsto: result = skip_inner_bracket_separators(p); @@ -697,15 +688,12 @@ after_mapsto: maybe_end: assert(!cleanup_key && !cleanup_value); - token = read_token_after_cant_handle(p); - if (token.type != APFL_TOK_RBRACKET) { - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + read_token_after_cant_handle(p); + if (p->token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(p->token); goto error; } - apfl_token_deinit(&token); - out->type = FRAG_EXPR, out->expr.type = (struct apfl_expr) { .type = APFL_EXPR_DICT, @@ -757,13 +745,11 @@ parse_list( } maybe_end: - struct apfl_token token = read_token_after_cant_handle(p); - if (token.type != APFL_TOK_RBRACKET) { - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + read_token_after_cant_handle(p); + if (p->token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(p->token); goto error; } - apfl_token_deinit(&token); out->type = FRAG_LIST; out->list = list; @@ -790,7 +776,7 @@ parse_brackets(apfl_parser_ptr p, struct fragment *out, struct apfl_position sta case PF_OK: break; case PF_CANT_HANDLE: - return parse_empty_list_or_dict(p, out, read_token_after_cant_handle(p), start); + return parse_empty_list_or_dict(p, out, start); case PF_EOF: p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); return PF_ERROR; @@ -803,8 +789,7 @@ parse_brackets(apfl_parser_ptr p, struct fragment *out, struct apfl_position sta goto error; } - struct apfl_token token; - switch (read_token(p, &token, true)) { + switch (read_token(p, true)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -816,12 +801,11 @@ parse_brackets(apfl_parser_ptr p, struct fragment *out, struct apfl_position sta goto error; } - if (token.type == APFL_TOK_MAPSTO) { - struct apfl_position mapsto_pos = token.position; - apfl_token_deinit(&token); + if (p->token.type == APFL_TOK_MAPSTO) { + struct apfl_position mapsto_pos = p->token.position; return parse_dict(p, out, first, mapsto_pos, start); } else { - unread_token(p, token); + unread_token(p); return parse_list(p, out, first, start); } @@ -866,8 +850,7 @@ parse_expand(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position static enum parse_fragment_result parse_stringify(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position position) { - struct apfl_token token; - switch (read_token(p, &token, true)) { + switch (read_token(p, true)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -878,16 +861,15 @@ parse_stringify(apfl_parser_ptr p, struct fragment *fragment, struct apfl_positi return PF_ERROR; } - if (token.type != APFL_TOK_NAME) { - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + if (p->token.type != APFL_TOK_NAME) { + p->error = ERR_UNEXPECTED_TOKEN(p->token); return PF_ERROR; } fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_STRING, - .string = apfl_string_move(&token.text), + .string = apfl_string_move(&p->token.text), }; fragment->position = position; return PF_OK; @@ -1371,14 +1353,13 @@ parse_body_or_toplevel( } break_inner: - struct apfl_token token = read_token_after_cant_handle(p); + read_token_after_cant_handle(p); - switch (token.type) { + switch (p->token.type) { case APFL_TOK_ASSIGN: case APFL_TOK_LOCAL_ASSIGN: - bool local = token.type == APFL_TOK_LOCAL_ASSIGN; - struct apfl_position position = token.position; - apfl_token_deinit(&token); + bool local = p->token.type == APFL_TOK_LOCAL_ASSIGN; + struct apfl_position position = p->token.position; if (fragments->len == 0) { p->error = (struct apfl_error) { @@ -1449,12 +1430,11 @@ break_inner: default: if (leftmost_assignment_expr != NULL) { - p->error = ERR_UNEXPECTED_TOKEN(token); - apfl_token_deinit(&token); + p->error = ERR_UNEXPECTED_TOKEN(p->token); goto error; } - unread_token(p, token); + unread_token(p); return PF_CANT_HANDLE; } } @@ -1520,17 +1500,9 @@ parse_braces( case PF_ERROR: goto error; case PF_CANT_HANDLE: - struct apfl_token token = read_token_after_cant_handle(p); + read_token_after_cant_handle(p); - // TODO: Something similar to this is done all around in this file. - // It would probably be better, if the parser object owns the - // current token. Then we wouldn't have to clean up the - // current token all the time. - enum apfl_token_type token_type = token.type; - struct apfl_position position = token.position; - apfl_token_deinit(&token); - - switch (token_type) { + switch (p->token.type) { case APFL_TOK_RBRACE: // \mystuff\TODO: break; @@ -1538,7 +1510,7 @@ parse_braces( if (body.len > 0 && !has_params) { p->error = (struct apfl_error) { .type = APFL_ERR_STATEMENTS_BEFORE_PARAMETERS, - .position = position, + .position = p->token.position, }; goto error; } @@ -1574,7 +1546,7 @@ parse_braces( break; default: - p->error = err_unexpected_token(token_type, position); + p->error = ERR_UNEXPECTED_TOKEN(p->token); goto error; } @@ -1590,9 +1562,7 @@ error: static enum parse_fragment_result parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum parse_fragment_flags flags) { - struct apfl_token token; - - switch (read_token(p, &token, need)) { + switch (read_token(p, need)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -1603,49 +1573,49 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par enum parse_fragment_result result; - switch (token.type) { + switch (p->token.type) { case APFL_TOK_LPAREN: - result = parse_parens(p, fragment, token.position); + result = parse_parens(p, fragment, p->token.position); break; case APFL_TOK_LBRACKET: - result = parse_brackets(p, fragment, token.position); + result = parse_brackets(p, fragment, p->token.position); break; case APFL_TOK_LBRACE: - result = parse_braces(p, fragment, token.position); + result = parse_braces(p, fragment, p->token.position); break; case APFL_TOK_EXPAND: if (flags & FFLAG_NO_EXPAND) { - unread_token(p, token); + unread_token(p); return PF_CANT_HANDLE; } - result = parse_expand(p, fragment, token.position); + result = parse_expand(p, fragment, p->token.position); break; case APFL_TOK_STRINGIFY: - result = parse_stringify(p, fragment, token.position); + result = parse_stringify(p, fragment, p->token.position); break; case APFL_TOK_NUMBER: fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_NUMBER, - .number = token.number, + .number = p->token.number, }; - fragment->position = token.position; + fragment->position = p->token.position; result = PF_OK; break; case APFL_TOK_NAME: - if (apfl_string_cmp(token.text, "nil") == 0) { + if (apfl_string_cmp(p->token.text, "nil") == 0) { fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_NIL, }; - } else if (apfl_string_cmp(token.text, "true") == 0) { + } else if (apfl_string_cmp(p->token.text, "true") == 0) { fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_BOOLEAN, .boolean = true, }; - } else if (apfl_string_cmp(token.text, "false") == 0) { + } else if (apfl_string_cmp(p->token.text, "false") == 0) { fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_BOOLEAN, @@ -1653,29 +1623,27 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par }; } else { fragment->type = FRAG_NAME; - fragment->name = apfl_string_move(&token.text); + fragment->name = apfl_string_move(&p->token.text); } - fragment->position = token.position; + fragment->position = p->token.position; result = PF_OK; break; case APFL_TOK_STRING: fragment->type = FRAG_CONSTANT; fragment->constant = (struct apfl_expr_const) { .type = APFL_EXPR_CONST_STRING, - .string = apfl_string_move(&token.text), + .string = apfl_string_move(&p->token.text), }; - fragment->position = token.position; + fragment->position = p->token.position; result = PF_OK; break; default: - unread_token(p, token); + unread_token(p); return PF_CANT_HANDLE; } if (result == PF_OK) { - apfl_token_deinit(&token); - - switch (read_token(p, &token, need)) { + switch (read_token(p, need)) { case APFL_PARSE_OK: break; case APFL_PARSE_EOF: @@ -1684,23 +1652,22 @@ parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum par return PF_ERROR; // \mystuff\TODO:destroy fragment in case of errors } - switch (token.type) { + switch (p->token.type) { case APFL_TOK_DOT: - result = parse_dot(p, fragment, token.position); + result = parse_dot(p, fragment, p->token.position); break; case APFL_TOK_AT: - result = parse_at(p, fragment, token.position); + result = parse_at(p, fragment, p->token.position); break; case APFL_TOK_QUESTION_MARK: - result = parse_predicate(p, fragment, token.position); + result = parse_predicate(p, fragment, p->token.position); break; default: - unread_token(p, token); + unread_token(p); return result; } } - apfl_token_deinit(&token); return result; }