2021-12-10 20:22:16 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <limits.h>
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
|
|
#include "apfl.h"
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
#include "alloc.h"
|
2023-07-03 21:33:19 +00:00
|
|
|
#include "parsing.h"
|
2022-02-08 21:53:13 +00:00
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
#define BUFSIZE 4096
|
|
|
|
|
typedef int buf_offset;
|
|
|
|
|
static_assert(INT_MAX >= BUFSIZE, "BUFSIZE is too large for type buf_offset");
|
2023-09-04 12:04:40 +00:00
|
|
|
static_assert(BUFSIZE >= 2, "BUFSIZE must be at least 2");
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
struct apfl_tokenizer {
|
2022-02-08 21:53:13 +00:00
|
|
|
struct apfl_allocator allocator;
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
struct apfl_io_reader source_reader;
|
2023-02-13 21:31:18 +00:00
|
|
|
unsigned char *buf;
|
2021-12-10 20:22:16 +00:00
|
|
|
buf_offset buf_pos;
|
|
|
|
|
buf_offset buf_len;
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
NM_REGULAR,
|
|
|
|
|
NM_MAPSTO,
|
|
|
|
|
NM_ASSIGN,
|
|
|
|
|
NM_EOF,
|
|
|
|
|
} next_mode;
|
|
|
|
|
struct apfl_position pos_for_mapsto;
|
|
|
|
|
|
|
|
|
|
struct apfl_position position;
|
2023-09-03 14:24:15 +00:00
|
|
|
struct apfl_position last_position;
|
2021-12-10 20:22:16 +00:00
|
|
|
bool last_byte_was_linebreak;
|
2023-09-04 12:04:40 +00:00
|
|
|
bool prev_last_byte_was_linebreak;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
struct apfl_token token;
|
|
|
|
|
struct apfl_error error;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
apfl_tokenizer_ptr
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
apfl_tokenizer_new(struct apfl_allocator allocator, struct apfl_io_reader source_reader)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
2022-02-08 21:53:13 +00:00
|
|
|
apfl_tokenizer_ptr tokenizer = ALLOC_OBJ(allocator, struct apfl_tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
if (tokenizer == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
tokenizer->allocator = allocator;
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->source_reader = source_reader;
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
if ((tokenizer->buf = ALLOC_BYTES(allocator, BUFSIZE)) == NULL) {
|
|
|
|
|
FREE_OBJ(allocator, tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->buf_pos = 0;
|
|
|
|
|
tokenizer->buf_len = 0;
|
|
|
|
|
|
|
|
|
|
tokenizer->position = (struct apfl_position) {
|
|
|
|
|
.line = 1,
|
|
|
|
|
.col = 0, // The first character was not yet read
|
|
|
|
|
};
|
|
|
|
|
tokenizer->last_byte_was_linebreak = false;
|
2023-09-04 12:04:40 +00:00
|
|
|
tokenizer->prev_last_byte_was_linebreak = false;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
|
|
|
|
|
return tokenizer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_tokenizer_destroy(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
if (tokenizer == NULL) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
FREE_BYTES(tokenizer->allocator, tokenizer->buf, BUFSIZE);
|
|
|
|
|
FREE_OBJ(tokenizer->allocator, tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_token
|
|
|
|
|
apfl_tokenizer_get_token(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
return tokenizer->token;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_error
|
|
|
|
|
apfl_tokenizer_get_error(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
return tokenizer->error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum read_result
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
read_byte(apfl_tokenizer_ptr tokenizer, unsigned char *byte)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
|
|
|
|
if (tokenizer->buf_pos >= tokenizer->buf_len) {
|
2023-09-04 12:04:40 +00:00
|
|
|
size_t off = 0;
|
|
|
|
|
if (tokenizer->buf_len > 0) {
|
|
|
|
|
off = 1;
|
|
|
|
|
tokenizer->buf[0] = tokenizer->buf[tokenizer->buf_len - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t len = BUFSIZE - off;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
tokenizer->buf_pos = off;
|
|
|
|
|
tokenizer->buf_len = off;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
if (!apfl_io_read_bytes(tokenizer->source_reader, tokenizer->buf+off, &len)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->error.type = APFL_ERR_INPUT_ERROR;
|
|
|
|
|
return RR_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
tokenizer->buf_len = len + off;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
if (len == 0) {
|
|
|
|
|
return RR_EOF;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
tokenizer->prev_last_byte_was_linebreak = tokenizer->last_byte_was_linebreak;
|
2023-09-03 14:24:15 +00:00
|
|
|
tokenizer->last_position = tokenizer->position;
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
if (tokenizer->last_byte_was_linebreak) {
|
|
|
|
|
tokenizer->position.line++;
|
|
|
|
|
tokenizer->position.col = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*byte = tokenizer->buf[tokenizer->buf_pos];
|
|
|
|
|
tokenizer->buf_pos++;
|
|
|
|
|
|
|
|
|
|
tokenizer->last_byte_was_linebreak = (*byte == '\n');
|
|
|
|
|
tokenizer->position.col++;
|
|
|
|
|
|
|
|
|
|
return RR_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only at most 1 unread_byte() call is allowed after a read_byte() call!
|
|
|
|
|
static void
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(apfl_tokenizer_ptr tokenizer)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
2023-09-03 14:24:15 +00:00
|
|
|
tokenizer->position = tokenizer->last_position;
|
2023-09-04 12:04:40 +00:00
|
|
|
tokenizer->last_byte_was_linebreak = tokenizer->prev_last_byte_was_linebreak;
|
|
|
|
|
|
|
|
|
|
assert(tokenizer->buf_pos > 0);
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->buf_pos--;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
yield_simple_token(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
enum apfl_token_type type,
|
|
|
|
|
struct apfl_position pos
|
|
|
|
|
) {
|
|
|
|
|
tokenizer->token.type = type;
|
|
|
|
|
tokenizer->token.position = pos;
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result comment(apfl_tokenizer_ptr);
|
|
|
|
|
static enum apfl_parse_result colon(apfl_tokenizer_ptr);
|
|
|
|
|
static enum apfl_parse_result string(apfl_tokenizer_ptr);
|
2023-11-07 20:40:02 +00:00
|
|
|
static enum apfl_parse_result backtick_string(apfl_tokenizer_ptr);
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
static enum apfl_parse_result maybe_name(apfl_tokenizer_ptr, unsigned char);
|
2023-09-04 12:04:40 +00:00
|
|
|
static enum apfl_parse_result number(apfl_tokenizer_ptr, unsigned, struct apfl_position, bool);
|
|
|
|
|
static enum apfl_parse_result zero(apfl_tokenizer_ptr, struct apfl_position, bool);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
2022-01-07 22:39:06 +00:00
|
|
|
static bool
|
|
|
|
|
is_control_byte(unsigned char byte)
|
|
|
|
|
{
|
|
|
|
|
return byte < 0x20 || byte == 0x7F;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
static enum apfl_parse_result
|
|
|
|
|
minus(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
unsigned char byte;
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2023-09-04 12:04:40 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
struct apfl_string str = apfl_string_blank();
|
|
|
|
|
if (!apfl_string_copy(tokenizer->allocator, &str, apfl_string_view_from("-"))) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_NAME,
|
|
|
|
|
.position = pos,
|
|
|
|
|
.text = str,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '0':
|
|
|
|
|
return zero(tokenizer, pos, true);
|
|
|
|
|
case '>':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, pos);
|
|
|
|
|
default:
|
|
|
|
|
unread_byte(tokenizer);
|
|
|
|
|
if (isdigit(byte)) {
|
|
|
|
|
return number(tokenizer, 10, pos, true);
|
|
|
|
|
} else {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
return maybe_name(tokenizer, '-');
|
2023-09-04 12:04:40 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
enum apfl_parse_result
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
|
|
|
|
switch (tokenizer->next_mode) {
|
|
|
|
|
case NM_REGULAR:
|
|
|
|
|
break;
|
|
|
|
|
case NM_MAPSTO:
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, tokenizer->pos_for_mapsto);
|
|
|
|
|
case NM_ASSIGN:
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_ASSIGN, tokenizer->position);
|
|
|
|
|
case NM_EOF:
|
|
|
|
|
return APFL_PARSE_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
for (;;) {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
return APFL_PARSE_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '(':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LPAREN, tokenizer->position);
|
|
|
|
|
case ')':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RPAREN, tokenizer->position);
|
|
|
|
|
case '[':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LBRACKET, tokenizer->position);
|
|
|
|
|
case ']':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RBRACKET, tokenizer->position);
|
|
|
|
|
case '{':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LBRACE, tokenizer->position);
|
|
|
|
|
case '}':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RBRACE, tokenizer->position);
|
|
|
|
|
case '~':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_EXPAND, tokenizer->position);
|
|
|
|
|
case '.':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_DOT, tokenizer->position);
|
|
|
|
|
case '@':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_AT, tokenizer->position);
|
|
|
|
|
case ';':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_SEMICOLON, tokenizer->position);
|
|
|
|
|
case '\n':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LINEBREAK, tokenizer->position);
|
|
|
|
|
case '\\':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_CONTINUE_LINE, tokenizer->position);
|
|
|
|
|
case ',':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_COMMA, tokenizer->position);
|
|
|
|
|
case '?':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_QUESTION_MARK, tokenizer->position);
|
|
|
|
|
case '\'':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_STRINGIFY, tokenizer->position);
|
2023-11-07 20:40:02 +00:00
|
|
|
case '`':
|
|
|
|
|
return backtick_string(tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
case '#':
|
|
|
|
|
return comment(tokenizer);
|
|
|
|
|
case ':':
|
|
|
|
|
return colon(tokenizer);
|
|
|
|
|
case '"':
|
|
|
|
|
return string(tokenizer);
|
2023-09-04 12:04:40 +00:00
|
|
|
case '-':
|
|
|
|
|
return minus(tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
case ' ':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\t':
|
|
|
|
|
// Skip whitespace
|
|
|
|
|
break;
|
2023-09-04 12:04:40 +00:00
|
|
|
case '0':
|
|
|
|
|
return zero(tokenizer, tokenizer->position, false);
|
2021-12-10 20:22:16 +00:00
|
|
|
default:
|
2022-01-07 22:39:06 +00:00
|
|
|
if (is_control_byte(byte)) {
|
|
|
|
|
// Disallow ASCII control characters here
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_BYTE,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
} else if (isdigit(byte)) {
|
2023-09-04 12:04:40 +00:00
|
|
|
struct apfl_position position = tokenizer->position;
|
|
|
|
|
unread_byte(tokenizer);
|
|
|
|
|
return number(tokenizer, 10, position, false);
|
2022-01-07 22:39:06 +00:00
|
|
|
} else {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
return maybe_name(tokenizer, byte);
|
2022-01-07 22:39:06 +00:00
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
comment(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
2022-10-30 21:15:29 +00:00
|
|
|
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
for (;;) {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_COMMENT,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(&text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte == '\n') {
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_COMMENT,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(&text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(&text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
colon(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
2023-03-22 22:54:03 +00:00
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_COLON, pos);
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 22:54:03 +00:00
|
|
|
switch (byte) {
|
|
|
|
|
case '=':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LOCAL_ASSIGN, pos);
|
|
|
|
|
case ':':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_DOUBLE_COLON, pos);
|
|
|
|
|
default:
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(tokenizer);
|
2023-03-22 22:54:03 +00:00
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_COLON, pos);
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
append_single_byte(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text,
|
|
|
|
|
char byte
|
|
|
|
|
) {
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
hex_escape(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text
|
|
|
|
|
) {
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char escaped_byte = 0;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
int nibble = apfl_parse_digit(byte);
|
|
|
|
|
if (nibble < 0 || nibble > 0xF) {
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
escaped_byte <<= 4;
|
|
|
|
|
escaped_byte |= 0xF & nibble;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return append_single_byte(tokenizer, text, escaped_byte);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
escape_sequence(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case 'x':
|
|
|
|
|
case 'X':
|
|
|
|
|
return hex_escape(tokenizer, text);
|
|
|
|
|
// case 'u':
|
|
|
|
|
// case 'U':
|
|
|
|
|
// return unicode_escape(tokenizer, pos, text);
|
|
|
|
|
case '\\':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\\');
|
|
|
|
|
case 'n':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\n');
|
|
|
|
|
case 'r':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\r');
|
|
|
|
|
case 't':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\t');
|
|
|
|
|
case '"':
|
|
|
|
|
return append_single_byte(tokenizer, text, '"');
|
|
|
|
|
case '0':
|
|
|
|
|
return append_single_byte(tokenizer, text, 0);
|
|
|
|
|
default:
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_INVALID_ESCAPE_SEQUENCE,
|
|
|
|
|
.position = pos,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
inner_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
enum apfl_parse_result subresult;
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '"':
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_STRING,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
case '\\':
|
|
|
|
|
if ((subresult = escape_sequence(tokenizer, text)) != APFL_PARSE_OK) {
|
|
|
|
|
return subresult;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
string(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
2022-10-30 21:15:29 +00:00
|
|
|
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
enum apfl_parse_result out = inner_string(tokenizer, &text);
|
|
|
|
|
|
|
|
|
|
apfl_string_builder_deinit(&text);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
2023-11-07 20:40:02 +00:00
|
|
|
static enum apfl_parse_result
|
|
|
|
|
inner_backtick_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
unsigned char byte;
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2023-11-07 20:40:02 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte != '`') {
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2023-11-07 20:40:02 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
goto finalize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte == '`') {
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, '`')) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unread_byte(tokenizer);
|
|
|
|
|
|
|
|
|
|
goto finalize;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finalize:
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_STRING,
|
|
|
|
|
.position = pos,
|
|
|
|
|
.text = apfl_string_builder_move_string(text),
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
backtick_string(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
|
|
|
|
|
|
|
|
|
enum apfl_parse_result out = inner_backtick_string(tokenizer, &text);
|
|
|
|
|
|
|
|
|
|
apfl_string_builder_deinit(&text);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
static enum apfl_parse_result
|
|
|
|
|
finalize_maybe_name(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text,
|
|
|
|
|
struct apfl_position pos
|
|
|
|
|
) {
|
|
|
|
|
assert(text->len > 0);
|
|
|
|
|
|
|
|
|
|
if (text->len == 1 && text->bytes[0] == '=') {
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_ASSIGN,
|
|
|
|
|
.position = pos,
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_NAME,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_word_byte(unsigned char byte)
|
|
|
|
|
{
|
|
|
|
|
return isalnum(byte) || byte > 0x7F;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
maybe_name_inner(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte,
|
2021-12-10 20:22:16 +00:00
|
|
|
struct apfl_string_builder *text
|
|
|
|
|
) {
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
struct apfl_position last_pos;
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char last_byte;
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
last_byte = byte;
|
|
|
|
|
last_pos = tokenizer->position;
|
|
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '(':
|
|
|
|
|
case ')':
|
|
|
|
|
case '[':
|
|
|
|
|
case ']':
|
|
|
|
|
case '{':
|
|
|
|
|
case '}':
|
|
|
|
|
case '~':
|
|
|
|
|
case '.':
|
|
|
|
|
case '@':
|
|
|
|
|
case ';':
|
|
|
|
|
case '\n':
|
|
|
|
|
case '\\':
|
|
|
|
|
case ',':
|
|
|
|
|
case '?':
|
|
|
|
|
case '\'':
|
|
|
|
|
case '#':
|
|
|
|
|
case ':':
|
|
|
|
|
case '"':
|
2023-11-07 20:40:02 +00:00
|
|
|
case '`':
|
2021-12-10 20:22:16 +00:00
|
|
|
case ' ':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\t':
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
case '=':
|
|
|
|
|
if (is_word_byte(last_byte)) {
|
|
|
|
|
tokenizer->next_mode = NM_ASSIGN;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case '>':
|
|
|
|
|
if (last_byte == '-') {
|
|
|
|
|
text->len--; // This removes the '-' from the end of text
|
|
|
|
|
if (text->len == 0) {
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, last_pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->next_mode = NM_MAPSTO;
|
|
|
|
|
tokenizer->pos_for_mapsto = last_pos;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2022-01-07 22:39:06 +00:00
|
|
|
if (is_control_byte(byte)) {
|
|
|
|
|
// Disallow ASCII control characters in names
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(tokenizer);
|
2022-01-07 22:39:06 +00:00
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
maybe_name(apfl_tokenizer_ptr tokenizer, unsigned char first_byte)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
2022-10-30 21:15:29 +00:00
|
|
|
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
enum apfl_parse_result out = maybe_name_inner(tokenizer, first_byte, &text);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
apfl_string_builder_deinit(&text);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_token
|
|
|
|
|
build_number_token(double number, struct apfl_position position, bool negative)
|
|
|
|
|
{
|
|
|
|
|
if (negative) {
|
|
|
|
|
number *= -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_NUMBER,
|
|
|
|
|
.position = position,
|
|
|
|
|
.number = (apfl_number)number,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
2023-09-04 12:04:40 +00:00
|
|
|
zero(apfl_tokenizer_ptr tokenizer, struct apfl_position position, bool negative)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
2023-09-03 14:36:10 +00:00
|
|
|
unsigned char byte;
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2023-09-04 12:04:40 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->token = build_number_token(0, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
switch (byte) {
|
|
|
|
|
case 'x':
|
|
|
|
|
case 'X':
|
|
|
|
|
return number(tokenizer, 16, position, negative);
|
|
|
|
|
case 'o':
|
|
|
|
|
case 'O':
|
|
|
|
|
return number(tokenizer, 8, position, negative);
|
|
|
|
|
case 'b':
|
|
|
|
|
case 'B':
|
|
|
|
|
return number(tokenizer, 2, position, negative);
|
|
|
|
|
default:
|
2023-09-03 14:24:15 +00:00
|
|
|
unread_byte(tokenizer);
|
2023-09-04 12:04:40 +00:00
|
|
|
return number(tokenizer, 10, position, negative);
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
static enum read_result
|
|
|
|
|
read_for_parse_number(void *opaque, unsigned char *byte)
|
|
|
|
|
{
|
|
|
|
|
apfl_tokenizer_ptr tokenizer = opaque;
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
return read_byte(tokenizer, byte);
|
2023-09-04 12:04:40 +00:00
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
static void
|
|
|
|
|
unread_for_parse_number(void *opaque)
|
|
|
|
|
{
|
|
|
|
|
apfl_tokenizer_ptr tokenizer = opaque;
|
|
|
|
|
unread_byte(tokenizer);
|
|
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
2023-09-04 12:04:40 +00:00
|
|
|
number(apfl_tokenizer_ptr tokenizer, unsigned base, struct apfl_position pos, bool negative)
|
|
|
|
|
{
|
|
|
|
|
apfl_number num;
|
|
|
|
|
if (!apfl_parse_number(
|
|
|
|
|
base,
|
|
|
|
|
read_for_parse_number,
|
|
|
|
|
unread_for_parse_number,
|
|
|
|
|
tokenizer,
|
|
|
|
|
&num
|
|
|
|
|
)) {
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
|
2023-09-04 12:04:40 +00:00
|
|
|
unsigned char byte;
|
Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.
Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.
To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.
I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.
One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 21:23:40 +00:00
|
|
|
switch (read_byte(tokenizer, &byte)) {
|
2023-09-04 12:04:40 +00:00
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->token = build_number_token(num, pos, negative);
|
2021-12-10 20:22:16 +00:00
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
2023-09-04 12:04:40 +00:00
|
|
|
|
|
|
|
|
if (is_word_byte(byte)) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unread_byte(tokenizer);
|
|
|
|
|
tokenizer->token = build_number_token(num, pos, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|