tokenizer: Manage last position automatically when unreading byte
This commit is contained in:
parent
1866963738
commit
52ea737975
1 changed files with 12 additions and 16 deletions
|
|
@ -33,6 +33,7 @@ struct apfl_tokenizer {
|
|||
char first_digit_for_negative_number;
|
||||
|
||||
struct apfl_position position;
|
||||
struct apfl_position last_position;
|
||||
bool last_byte_was_linebreak;
|
||||
|
||||
union {
|
||||
|
|
@ -115,6 +116,8 @@ read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need)
|
|||
}
|
||||
}
|
||||
|
||||
tokenizer->last_position = tokenizer->position;
|
||||
|
||||
if (tokenizer->last_byte_was_linebreak) {
|
||||
tokenizer->position.line++;
|
||||
tokenizer->position.col = 0;
|
||||
|
|
@ -131,9 +134,9 @@ read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need)
|
|||
|
||||
// Only at most 1 unread_byte() call is allowed after a read_byte() call!
|
||||
static void
|
||||
unread_byte(apfl_tokenizer_ptr tokenizer, struct apfl_position pos)
|
||||
unread_byte(apfl_tokenizer_ptr tokenizer)
|
||||
{
|
||||
tokenizer->position = pos;
|
||||
tokenizer->position = tokenizer->last_position;
|
||||
tokenizer->buf_pos--;
|
||||
tokenizer->last_byte_was_linebreak = false;
|
||||
}
|
||||
|
|
@ -259,13 +262,10 @@ comment(apfl_tokenizer_ptr tokenizer)
|
|||
char byte;
|
||||
|
||||
struct apfl_position pos = tokenizer->position;
|
||||
struct apfl_position last_pos;
|
||||
|
||||
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
||||
|
||||
for (;;) {
|
||||
last_pos = tokenizer->position;
|
||||
|
||||
switch (read_byte(tokenizer, &byte, true)) {
|
||||
case RR_OK:
|
||||
break;
|
||||
|
|
@ -282,7 +282,7 @@ comment(apfl_tokenizer_ptr tokenizer)
|
|||
}
|
||||
|
||||
if (byte == '\n') {
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
|
||||
tokenizer->token = (struct apfl_token) {
|
||||
.type = APFL_TOK_COMMENT,
|
||||
|
|
@ -320,7 +320,7 @@ colon(apfl_tokenizer_ptr tokenizer)
|
|||
case ':':
|
||||
return yield_simple_token(tokenizer, APFL_TOK_DOUBLE_COLON, pos);
|
||||
default:
|
||||
unread_byte(tokenizer, pos);
|
||||
unread_byte(tokenizer);
|
||||
return yield_simple_token(tokenizer, APFL_TOK_COLON, pos);
|
||||
}
|
||||
}
|
||||
|
|
@ -674,7 +674,7 @@ maybe_name_inner(
|
|||
case ' ':
|
||||
case '\r':
|
||||
case '\t':
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
return finalize_maybe_name(tokenizer, text, pos);
|
||||
case '=':
|
||||
if (is_word_byte(last_byte)) {
|
||||
|
|
@ -699,7 +699,7 @@ maybe_name_inner(
|
|||
default:
|
||||
if (is_control_byte(byte)) {
|
||||
// Disallow ASCII control characters in names
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
return finalize_maybe_name(tokenizer, text, pos);
|
||||
}
|
||||
|
||||
|
|
@ -760,14 +760,12 @@ non_decimal_number(
|
|||
int shift,
|
||||
int (*byte_to_digit)(char))
|
||||
{
|
||||
struct apfl_position last_pos;
|
||||
bool no_digits_yet = true;
|
||||
char byte;
|
||||
|
||||
uint64_t num = 0;
|
||||
|
||||
for (;;) {
|
||||
last_pos = tokenizer->position;
|
||||
switch (read_byte(tokenizer, &byte, no_digits_yet || need)) {
|
||||
case RR_OK:
|
||||
break;
|
||||
|
|
@ -812,7 +810,7 @@ non_decimal_number(
|
|||
return APFL_PARSE_ERROR;
|
||||
}
|
||||
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
tokenizer->token = build_number_token((double)num, position, negative);
|
||||
return APFL_PARSE_OK;
|
||||
}
|
||||
|
|
@ -852,12 +850,10 @@ number(
|
|||
double divider = 1;
|
||||
bool first_iteration = true;
|
||||
bool seen_dot = false;
|
||||
struct apfl_position last_pos;
|
||||
|
||||
for (;; first_iteration = false) {
|
||||
char byte;
|
||||
|
||||
last_pos = tokenizer->position;
|
||||
switch (read_byte(tokenizer, &byte, need)) {
|
||||
case RR_OK:
|
||||
break;
|
||||
|
|
@ -897,7 +893,7 @@ number(
|
|||
|
||||
if (byte == '.') {
|
||||
if (seen_dot) {
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
tokenizer->token = build_number_token(num / divider, position, negative);
|
||||
return APFL_PARSE_OK;
|
||||
} else {
|
||||
|
|
@ -915,7 +911,7 @@ number(
|
|||
return APFL_PARSE_ERROR;
|
||||
}
|
||||
|
||||
unread_byte(tokenizer, last_pos);
|
||||
unread_byte(tokenizer);
|
||||
tokenizer->token = build_number_token(num / divider, position, negative);
|
||||
return APFL_PARSE_OK;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue