tokenizer: Manage last position automatically when unreading byte

This commit is contained in:
Laria 2023-09-03 16:24:15 +02:00
parent 1866963738
commit 52ea737975

View file

@ -33,6 +33,7 @@ struct apfl_tokenizer {
char first_digit_for_negative_number;
struct apfl_position position;
struct apfl_position last_position;
bool last_byte_was_linebreak;
union {
@ -115,6 +116,8 @@ read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need)
}
}
tokenizer->last_position = tokenizer->position;
if (tokenizer->last_byte_was_linebreak) {
tokenizer->position.line++;
tokenizer->position.col = 0;
@ -131,9 +134,9 @@ read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need)
// Only at most 1 unread_byte() call is allowed after a read_byte() call!
static void
unread_byte(apfl_tokenizer_ptr tokenizer, struct apfl_position pos)
unread_byte(apfl_tokenizer_ptr tokenizer)
{
tokenizer->position = pos;
tokenizer->position = tokenizer->last_position;
tokenizer->buf_pos--;
tokenizer->last_byte_was_linebreak = false;
}
@ -259,13 +262,10 @@ comment(apfl_tokenizer_ptr tokenizer)
char byte;
struct apfl_position pos = tokenizer->position;
struct apfl_position last_pos;
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
for (;;) {
last_pos = tokenizer->position;
switch (read_byte(tokenizer, &byte, true)) {
case RR_OK:
break;
@ -282,7 +282,7 @@ comment(apfl_tokenizer_ptr tokenizer)
}
if (byte == '\n') {
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
tokenizer->token = (struct apfl_token) {
.type = APFL_TOK_COMMENT,
@ -320,7 +320,7 @@ colon(apfl_tokenizer_ptr tokenizer)
case ':':
return yield_simple_token(tokenizer, APFL_TOK_DOUBLE_COLON, pos);
default:
unread_byte(tokenizer, pos);
unread_byte(tokenizer);
return yield_simple_token(tokenizer, APFL_TOK_COLON, pos);
}
}
@ -674,7 +674,7 @@ maybe_name_inner(
case ' ':
case '\r':
case '\t':
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
return finalize_maybe_name(tokenizer, text, pos);
case '=':
if (is_word_byte(last_byte)) {
@ -699,7 +699,7 @@ maybe_name_inner(
default:
if (is_control_byte(byte)) {
// Disallow ASCII control characters in names
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
return finalize_maybe_name(tokenizer, text, pos);
}
@ -760,14 +760,12 @@ non_decimal_number(
int shift,
int (*byte_to_digit)(char))
{
struct apfl_position last_pos;
bool no_digits_yet = true;
char byte;
uint64_t num = 0;
for (;;) {
last_pos = tokenizer->position;
switch (read_byte(tokenizer, &byte, no_digits_yet || need)) {
case RR_OK:
break;
@ -812,7 +810,7 @@ non_decimal_number(
return APFL_PARSE_ERROR;
}
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
tokenizer->token = build_number_token((double)num, position, negative);
return APFL_PARSE_OK;
}
@ -852,12 +850,10 @@ number(
double divider = 1;
bool first_iteration = true;
bool seen_dot = false;
struct apfl_position last_pos;
for (;; first_iteration = false) {
char byte;
last_pos = tokenizer->position;
switch (read_byte(tokenizer, &byte, need)) {
case RR_OK:
break;
@ -897,7 +893,7 @@ number(
if (byte == '.') {
if (seen_dot) {
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
tokenizer->token = build_number_token(num / divider, position, negative);
return APFL_PARSE_OK;
} else {
@ -915,7 +911,7 @@ number(
return APFL_PARSE_ERROR;
}
unread_byte(tokenizer, last_pos);
unread_byte(tokenizer);
tokenizer->token = build_number_token(num / divider, position, negative);
return APFL_PARSE_OK;
}