2021-12-10 20:22:16 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <limits.h>
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
|
|
#include "apfl.h"
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
#include "alloc.h"
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
#define BUFSIZE 4096
|
|
|
|
|
typedef int buf_offset;
|
|
|
|
|
static_assert(INT_MAX >= BUFSIZE, "BUFSIZE is too large for type buf_offset");
|
|
|
|
|
|
|
|
|
|
struct apfl_tokenizer {
|
2022-02-08 21:53:13 +00:00
|
|
|
struct apfl_allocator allocator;
|
2022-04-15 20:35:36 +00:00
|
|
|
struct apfl_source_reader source_reader;
|
2021-12-10 20:22:16 +00:00
|
|
|
char *buf;
|
|
|
|
|
buf_offset buf_pos;
|
|
|
|
|
buf_offset buf_len;
|
|
|
|
|
|
|
|
|
|
enum {
|
|
|
|
|
NM_REGULAR,
|
|
|
|
|
NM_NEGATIVE_NUMBER,
|
|
|
|
|
NM_MAPSTO,
|
|
|
|
|
NM_ASSIGN,
|
|
|
|
|
NM_EOF,
|
|
|
|
|
} next_mode;
|
|
|
|
|
struct apfl_position pos_for_mapsto;
|
|
|
|
|
char first_digit_for_negative_number;
|
|
|
|
|
|
|
|
|
|
struct apfl_position position;
|
|
|
|
|
bool last_byte_was_linebreak;
|
|
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
struct apfl_token token;
|
|
|
|
|
struct apfl_error error;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
apfl_tokenizer_ptr
|
2022-04-15 20:35:36 +00:00
|
|
|
apfl_tokenizer_new(struct apfl_allocator allocator, struct apfl_source_reader source_reader)
|
2021-12-10 20:22:16 +00:00
|
|
|
{
|
2022-02-08 21:53:13 +00:00
|
|
|
apfl_tokenizer_ptr tokenizer = ALLOC_OBJ(allocator, struct apfl_tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
if (tokenizer == NULL) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
tokenizer->allocator = allocator;
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->source_reader = source_reader;
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
if ((tokenizer->buf = ALLOC_BYTES(allocator, BUFSIZE)) == NULL) {
|
|
|
|
|
FREE_OBJ(allocator, tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->buf_pos = 0;
|
|
|
|
|
tokenizer->buf_len = 0;
|
|
|
|
|
|
|
|
|
|
tokenizer->position = (struct apfl_position) {
|
|
|
|
|
.line = 1,
|
|
|
|
|
.col = 0, // The first character was not yet read
|
|
|
|
|
};
|
|
|
|
|
tokenizer->last_byte_was_linebreak = false;
|
|
|
|
|
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
|
|
|
|
|
return tokenizer;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
apfl_tokenizer_destroy(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
if (tokenizer == NULL) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-08 21:53:13 +00:00
|
|
|
FREE_BYTES(tokenizer->allocator, tokenizer->buf, BUFSIZE);
|
|
|
|
|
FREE_OBJ(tokenizer->allocator, tokenizer);
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_token
|
|
|
|
|
apfl_tokenizer_get_token(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
return tokenizer->token;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct apfl_error
|
|
|
|
|
apfl_tokenizer_get_error(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
return tokenizer->error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum read_result {
|
|
|
|
|
RR_OK,
|
|
|
|
|
RR_ERR,
|
|
|
|
|
RR_EOF,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static enum read_result
|
|
|
|
|
read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need)
|
|
|
|
|
{
|
|
|
|
|
if (tokenizer->buf_pos >= tokenizer->buf_len) {
|
|
|
|
|
size_t len = BUFSIZE;
|
|
|
|
|
|
|
|
|
|
tokenizer->buf_pos = 0;
|
|
|
|
|
tokenizer->buf_len = 0;
|
|
|
|
|
|
2022-04-15 20:35:36 +00:00
|
|
|
if (!tokenizer->source_reader.callback(tokenizer->source_reader.opaque, tokenizer->buf, &len, need)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
tokenizer->error.type = APFL_ERR_INPUT_ERROR;
|
|
|
|
|
return RR_ERR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->buf_len = len;
|
|
|
|
|
|
|
|
|
|
if (len == 0) {
|
|
|
|
|
return RR_EOF;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tokenizer->last_byte_was_linebreak) {
|
|
|
|
|
tokenizer->position.line++;
|
|
|
|
|
tokenizer->position.col = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*byte = tokenizer->buf[tokenizer->buf_pos];
|
|
|
|
|
tokenizer->buf_pos++;
|
|
|
|
|
|
|
|
|
|
tokenizer->last_byte_was_linebreak = (*byte == '\n');
|
|
|
|
|
tokenizer->position.col++;
|
|
|
|
|
|
|
|
|
|
return RR_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Only at most 1 unread_byte() call is allowed after a read_byte() call!
|
|
|
|
|
static void
|
|
|
|
|
unread_byte(apfl_tokenizer_ptr tokenizer, struct apfl_position pos)
|
|
|
|
|
{
|
|
|
|
|
tokenizer->position = pos;
|
|
|
|
|
tokenizer->buf_pos--;
|
|
|
|
|
tokenizer->last_byte_was_linebreak = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
yield_simple_token(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
enum apfl_token_type type,
|
|
|
|
|
struct apfl_position pos
|
|
|
|
|
) {
|
|
|
|
|
tokenizer->token.type = type;
|
|
|
|
|
tokenizer->token.position = pos;
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result comment(apfl_tokenizer_ptr);
|
|
|
|
|
static enum apfl_parse_result colon(apfl_tokenizer_ptr);
|
|
|
|
|
static enum apfl_parse_result string(apfl_tokenizer_ptr);
|
|
|
|
|
static enum apfl_parse_result maybe_name(apfl_tokenizer_ptr, bool, char);
|
|
|
|
|
static enum apfl_parse_result number(apfl_tokenizer_ptr, bool, struct apfl_position, char, bool);
|
|
|
|
|
|
2022-01-07 22:39:06 +00:00
|
|
|
static bool
|
|
|
|
|
is_control_byte(unsigned char byte)
|
|
|
|
|
{
|
|
|
|
|
return byte < 0x20 || byte == 0x7F;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
enum apfl_parse_result
|
|
|
|
|
apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need)
|
|
|
|
|
{
|
|
|
|
|
switch (tokenizer->next_mode) {
|
|
|
|
|
case NM_REGULAR:
|
|
|
|
|
break;
|
|
|
|
|
case NM_MAPSTO:
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, tokenizer->pos_for_mapsto);
|
|
|
|
|
case NM_NEGATIVE_NUMBER:
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
return number(tokenizer, need, tokenizer->position, tokenizer->first_digit_for_negative_number, true);
|
|
|
|
|
case NM_ASSIGN:
|
|
|
|
|
tokenizer->next_mode = NM_REGULAR;
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_ASSIGN, tokenizer->position);
|
|
|
|
|
case NM_EOF:
|
|
|
|
|
return APFL_PARSE_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
switch (read_byte(tokenizer, &byte, need)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
return APFL_PARSE_EOF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '(':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LPAREN, tokenizer->position);
|
|
|
|
|
case ')':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RPAREN, tokenizer->position);
|
|
|
|
|
case '[':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LBRACKET, tokenizer->position);
|
|
|
|
|
case ']':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RBRACKET, tokenizer->position);
|
|
|
|
|
case '{':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LBRACE, tokenizer->position);
|
|
|
|
|
case '}':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_RBRACE, tokenizer->position);
|
|
|
|
|
case '~':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_EXPAND, tokenizer->position);
|
|
|
|
|
case '.':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_DOT, tokenizer->position);
|
|
|
|
|
case '@':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_AT, tokenizer->position);
|
|
|
|
|
case ';':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_SEMICOLON, tokenizer->position);
|
|
|
|
|
case '\n':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LINEBREAK, tokenizer->position);
|
|
|
|
|
case '\\':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_CONTINUE_LINE, tokenizer->position);
|
|
|
|
|
case ',':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_COMMA, tokenizer->position);
|
|
|
|
|
case '?':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_QUESTION_MARK, tokenizer->position);
|
|
|
|
|
case '\'':
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_STRINGIFY, tokenizer->position);
|
|
|
|
|
case '#':
|
|
|
|
|
return comment(tokenizer);
|
|
|
|
|
case ':':
|
|
|
|
|
return colon(tokenizer);
|
|
|
|
|
case '"':
|
|
|
|
|
return string(tokenizer);
|
|
|
|
|
case ' ':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\t':
|
|
|
|
|
// Skip whitespace
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2022-01-07 22:39:06 +00:00
|
|
|
if (is_control_byte(byte)) {
|
|
|
|
|
// Disallow ASCII control characters here
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_BYTE,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
} else if (isdigit(byte)) {
|
2021-12-10 20:22:16 +00:00
|
|
|
return number(tokenizer, need, tokenizer->position, byte, false);
|
2022-01-07 22:39:06 +00:00
|
|
|
} else {
|
2021-12-10 20:22:16 +00:00
|
|
|
return maybe_name(tokenizer, need, byte);
|
2022-01-07 22:39:06 +00:00
|
|
|
}
|
2021-12-10 20:22:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
comment(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
struct apfl_position last_pos;
|
|
|
|
|
|
|
|
|
|
struct apfl_string_builder text;
|
2022-02-08 21:53:13 +00:00
|
|
|
apfl_string_builder_init(tokenizer->allocator, &text);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
last_pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
switch (read_byte(tokenizer, &byte, true)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_COMMENT,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(&text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte == '\n') {
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_COMMENT,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(&text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(&text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
colon(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
char byte;
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
switch (read_byte(tokenizer, &byte, true)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte != '=') {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_EXPECTED_EQ_AFTER_COLON,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_LOCAL_ASSIGN, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
append_single_byte(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text,
|
|
|
|
|
char byte
|
|
|
|
|
) {
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
unhex(char byte)
|
|
|
|
|
{
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '0':
|
|
|
|
|
return 0x0;
|
|
|
|
|
case '1':
|
|
|
|
|
return 0x1;
|
|
|
|
|
case '2':
|
|
|
|
|
return 0x2;
|
|
|
|
|
case '3':
|
|
|
|
|
return 0x3;
|
|
|
|
|
case '4':
|
|
|
|
|
return 0x4;
|
|
|
|
|
case '5':
|
|
|
|
|
return 0x5;
|
|
|
|
|
case '6':
|
|
|
|
|
return 0x6;
|
|
|
|
|
case '7':
|
|
|
|
|
return 0x7;
|
|
|
|
|
case '8':
|
|
|
|
|
return 0x8;
|
|
|
|
|
case '9':
|
|
|
|
|
return 0x9;
|
|
|
|
|
case 'a':
|
|
|
|
|
case 'A':
|
|
|
|
|
return 0xA;
|
|
|
|
|
case 'b':
|
|
|
|
|
case 'B':
|
|
|
|
|
return 0xB;
|
|
|
|
|
case 'c':
|
|
|
|
|
case 'C':
|
|
|
|
|
return 0xC;
|
|
|
|
|
case 'd':
|
|
|
|
|
case 'D':
|
|
|
|
|
return 0xD;
|
|
|
|
|
case 'e':
|
|
|
|
|
case 'E':
|
|
|
|
|
return 0xE;
|
|
|
|
|
case 'f':
|
|
|
|
|
case 'F':
|
|
|
|
|
return 0xF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
undec(char byte)
|
|
|
|
|
{
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '0':
|
|
|
|
|
return 0;
|
|
|
|
|
case '1':
|
|
|
|
|
return 1;
|
|
|
|
|
case '2':
|
|
|
|
|
return 2;
|
|
|
|
|
case '3':
|
|
|
|
|
return 3;
|
|
|
|
|
case '4':
|
|
|
|
|
return 4;
|
|
|
|
|
case '5':
|
|
|
|
|
return 5;
|
|
|
|
|
case '6':
|
|
|
|
|
return 6;
|
|
|
|
|
case '7':
|
|
|
|
|
return 7;
|
|
|
|
|
case '8':
|
|
|
|
|
return 8;
|
|
|
|
|
case '9':
|
|
|
|
|
return 9;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
unoct(char byte)
|
|
|
|
|
{
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '0':
|
|
|
|
|
return 0;
|
|
|
|
|
case '1':
|
|
|
|
|
return 1;
|
|
|
|
|
case '2':
|
|
|
|
|
return 2;
|
|
|
|
|
case '3':
|
|
|
|
|
return 3;
|
|
|
|
|
case '4':
|
|
|
|
|
return 4;
|
|
|
|
|
case '5':
|
|
|
|
|
return 5;
|
|
|
|
|
case '6':
|
|
|
|
|
return 6;
|
|
|
|
|
case '7':
|
|
|
|
|
return 7;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
unbin(char byte)
|
|
|
|
|
{
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '0':
|
|
|
|
|
return 0;
|
|
|
|
|
case '1':
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
hex_escape(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text
|
|
|
|
|
) {
|
|
|
|
|
char escaped_byte = 0;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
switch (read_byte(tokenizer, &byte, true)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int nibble = unhex(byte);
|
|
|
|
|
if (nibble < 0) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
escaped_byte <<= 4;
|
|
|
|
|
escaped_byte |= 0xF & nibble;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return append_single_byte(tokenizer, text, escaped_byte);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
escape_sequence(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
switch (read_byte(tokenizer, &byte, true)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case 'x':
|
|
|
|
|
case 'X':
|
|
|
|
|
return hex_escape(tokenizer, text);
|
|
|
|
|
// case 'u':
|
|
|
|
|
// case 'U':
|
|
|
|
|
// return unicode_escape(tokenizer, pos, text);
|
|
|
|
|
case '\\':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\\');
|
|
|
|
|
case 'n':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\n');
|
|
|
|
|
case 'r':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\r');
|
|
|
|
|
case 't':
|
|
|
|
|
return append_single_byte(tokenizer, text, '\t');
|
|
|
|
|
case '"':
|
|
|
|
|
return append_single_byte(tokenizer, text, '"');
|
|
|
|
|
case '0':
|
|
|
|
|
return append_single_byte(tokenizer, text, 0);
|
|
|
|
|
default:
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_INVALID_ESCAPE_SEQUENCE,
|
|
|
|
|
.position = pos,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
inner_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
enum apfl_parse_result subresult;
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
switch (read_byte(tokenizer, &byte, true)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '"':
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_STRING,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
case '\\':
|
|
|
|
|
if ((subresult = escape_sequence(tokenizer, text)) != APFL_PARSE_OK) {
|
|
|
|
|
return subresult;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
string(apfl_tokenizer_ptr tokenizer)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_string_builder text;
|
2022-02-08 21:53:13 +00:00
|
|
|
apfl_string_builder_init(tokenizer->allocator, &text);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
enum apfl_parse_result out = inner_string(tokenizer, &text);
|
|
|
|
|
|
|
|
|
|
apfl_string_builder_deinit(&text);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
finalize_maybe_name(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
struct apfl_string_builder *text,
|
|
|
|
|
struct apfl_position pos
|
|
|
|
|
) {
|
|
|
|
|
assert(text->len > 0);
|
|
|
|
|
|
|
|
|
|
if (text->len == 1 && text->bytes[0] == '=') {
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_ASSIGN,
|
|
|
|
|
.position = pos,
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
tokenizer->token = (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_NAME,
|
|
|
|
|
.position = pos,
|
Implement mark&sweep garbage collection and bytecode compilation
Instead of the previous refcount base garbage collection, we're now using
a basic tri-color mark&sweep collector. This is done to support cyclical
value relationships in the future (functions can form cycles, all values
implemented up to this point can not).
The collector maintains a set of roots and a set of objects (grouped into
blocks). The GC enabled objects are no longer allocated manually, but will
be allocated by the GC. The GC also wraps an allocator, this way the GC
knows, if we ran out of memory and will try to get out of this situation by
performing a full collection cycle.
The tri-color abstraction was chosen for two reasons:
- We don't have to maintain a list of objects that need to be marked, we
can simply grab the next grey one.
- It should allow us to later implement incremental collection (right now
we only do a stop-the-world collection).
This also switches to a bytecode based evaluation of the code: We no longer
directly evaluate the AST, but first compile it into a series of
instructions, that are evaluated in a separate step. This was done in
preparation for inplementing functions: We only need to turn a function
body into instructions instead of evaluating the node again with each call
of the function. Also, since an instruction list is implemented as a GC
object, this then removes manual memory management of the function body and
it's child nodes. Since the GC and the bytecode go hand in hand, this was
done in one (giant) commit.
As a downside, we've now lost the ability do do list matching on
assignments. I've already started to work on implementing this in the new
architecture, but left it out of this commit, as it's already quite a large
commit :)
2022-04-11 20:24:22 +00:00
|
|
|
.text = apfl_string_builder_move_string(text),
|
2021-12-10 20:22:16 +00:00
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
is_word_byte(unsigned char byte)
|
|
|
|
|
{
|
|
|
|
|
return isalnum(byte) || byte > 0x7F;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
maybe_name_inner(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
bool need,
|
|
|
|
|
char byte,
|
|
|
|
|
struct apfl_string_builder *text
|
|
|
|
|
) {
|
|
|
|
|
struct apfl_position pos = tokenizer->position;
|
|
|
|
|
struct apfl_position last_pos;
|
|
|
|
|
char last_byte;
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
last_byte = byte;
|
|
|
|
|
last_pos = tokenizer->position;
|
|
|
|
|
|
|
|
|
|
switch (read_byte(tokenizer, &byte, need)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case '(':
|
|
|
|
|
case ')':
|
|
|
|
|
case '[':
|
|
|
|
|
case ']':
|
|
|
|
|
case '{':
|
|
|
|
|
case '}':
|
|
|
|
|
case '~':
|
|
|
|
|
case '.':
|
|
|
|
|
case '@':
|
|
|
|
|
case ';':
|
|
|
|
|
case '\n':
|
|
|
|
|
case '\\':
|
|
|
|
|
case ',':
|
|
|
|
|
case '?':
|
|
|
|
|
case '\'':
|
|
|
|
|
case '#':
|
|
|
|
|
case ':':
|
|
|
|
|
case '"':
|
|
|
|
|
case ' ':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\t':
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
case '=':
|
|
|
|
|
if (is_word_byte(last_byte)) {
|
|
|
|
|
tokenizer->next_mode = NM_ASSIGN;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
case '>':
|
|
|
|
|
if (last_byte == '-') {
|
|
|
|
|
text->len--; // This removes the '-' from the end of text
|
|
|
|
|
if (text->len == 0) {
|
|
|
|
|
return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, last_pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->next_mode = NM_MAPSTO;
|
|
|
|
|
tokenizer->pos_for_mapsto = last_pos;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
default:
|
2022-01-07 22:39:06 +00:00
|
|
|
if (is_control_byte(byte)) {
|
|
|
|
|
// Disallow ASCII control characters in names
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-10 20:22:16 +00:00
|
|
|
if (isdigit(byte) && last_byte == '-') {
|
|
|
|
|
text->len--; // This removes the '-' from the end of text
|
|
|
|
|
|
|
|
|
|
if (text->len == 0) {
|
|
|
|
|
return number(tokenizer, need, pos, byte, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tokenizer->next_mode = NM_NEGATIVE_NUMBER;
|
|
|
|
|
tokenizer->first_digit_for_negative_number = byte;
|
|
|
|
|
return finalize_maybe_name(tokenizer, text, pos);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!apfl_string_builder_append_byte(text, byte)) {
|
|
|
|
|
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
maybe_name(apfl_tokenizer_ptr tokenizer, bool need, char first_byte)
|
|
|
|
|
{
|
|
|
|
|
struct apfl_string_builder text;
|
2022-02-08 21:53:13 +00:00
|
|
|
apfl_string_builder_init(tokenizer->allocator, &text);
|
2021-12-10 20:22:16 +00:00
|
|
|
|
|
|
|
|
enum apfl_parse_result out = maybe_name_inner(tokenizer, need, first_byte, &text);
|
|
|
|
|
|
|
|
|
|
apfl_string_builder_deinit(&text);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_token
|
|
|
|
|
build_number_token(double number, struct apfl_position position, bool negative)
|
|
|
|
|
{
|
|
|
|
|
if (negative) {
|
|
|
|
|
number *= -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (struct apfl_token) {
|
|
|
|
|
.type = APFL_TOK_NUMBER,
|
|
|
|
|
.position = position,
|
|
|
|
|
.number = (apfl_number)number,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
non_decimal_number(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
bool need,
|
|
|
|
|
struct apfl_position position,
|
|
|
|
|
bool negative,
|
|
|
|
|
int shift,
|
|
|
|
|
int (*byte_to_digit)(char))
|
|
|
|
|
{
|
|
|
|
|
struct apfl_position last_pos;
|
|
|
|
|
bool no_digits_yet = true;
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
uint64_t num = 0;
|
|
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
|
last_pos = tokenizer->position;
|
|
|
|
|
switch (read_byte(tokenizer, &byte, no_digits_yet || need)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
if (no_digits_yet) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_EOF,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
} else {
|
|
|
|
|
tokenizer->token = build_number_token((double)num, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int digit = byte_to_digit(byte);
|
|
|
|
|
if (digit >= 0) {
|
|
|
|
|
num <<= shift;
|
|
|
|
|
num |= digit;
|
|
|
|
|
|
|
|
|
|
no_digits_yet = false;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (no_digits_yet) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_EXPECTED_DIGIT,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_word_byte(byte)) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
tokenizer->token = build_number_token((double)num, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define BUILD_NON_DECIMAL_TOKENIZER(name, shift, byte_to_digit) \
|
|
|
|
|
static enum apfl_parse_result \
|
|
|
|
|
name( \
|
|
|
|
|
apfl_tokenizer_ptr tokenizer, \
|
|
|
|
|
bool need, \
|
|
|
|
|
struct apfl_position position, \
|
|
|
|
|
bool negative \
|
|
|
|
|
) { \
|
|
|
|
|
return non_decimal_number( \
|
|
|
|
|
tokenizer, \
|
|
|
|
|
need, \
|
|
|
|
|
position, \
|
|
|
|
|
negative, \
|
|
|
|
|
shift, \
|
|
|
|
|
byte_to_digit \
|
|
|
|
|
); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
BUILD_NON_DECIMAL_TOKENIZER(hex_number, 4, unhex)
|
|
|
|
|
BUILD_NON_DECIMAL_TOKENIZER(oct_number, 3, unoct)
|
|
|
|
|
BUILD_NON_DECIMAL_TOKENIZER(bin_number, 1, unbin)
|
|
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
number(
|
|
|
|
|
apfl_tokenizer_ptr tokenizer,
|
|
|
|
|
bool need,
|
|
|
|
|
struct apfl_position position,
|
|
|
|
|
char first_digit,
|
|
|
|
|
bool negative
|
|
|
|
|
) {
|
|
|
|
|
double num = (double)undec(first_digit);
|
|
|
|
|
double divider = 1;
|
|
|
|
|
bool first_iteration = true;
|
|
|
|
|
bool seen_dot = false;
|
|
|
|
|
struct apfl_position last_pos;
|
|
|
|
|
|
|
|
|
|
for (;; first_iteration = false) {
|
|
|
|
|
char byte;
|
|
|
|
|
|
|
|
|
|
last_pos = tokenizer->position;
|
|
|
|
|
switch (read_byte(tokenizer, &byte, need)) {
|
|
|
|
|
case RR_OK:
|
|
|
|
|
break;
|
|
|
|
|
case RR_ERR:
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
case RR_EOF:
|
|
|
|
|
tokenizer->next_mode = NM_EOF;
|
|
|
|
|
tokenizer->token = build_number_token(num / divider, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (first_iteration && first_digit == '0') {
|
|
|
|
|
switch (byte) {
|
|
|
|
|
case 'x':
|
|
|
|
|
case 'X':
|
|
|
|
|
return hex_number(tokenizer, need, position, negative);
|
|
|
|
|
case 'b':
|
|
|
|
|
case 'B':
|
|
|
|
|
return bin_number(tokenizer, need, position, negative);
|
|
|
|
|
case 'o':
|
|
|
|
|
case 'O':
|
|
|
|
|
return oct_number(tokenizer, need, position, negative);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int digit = undec(byte);
|
|
|
|
|
if (digit >= 0) {
|
|
|
|
|
num *= 10;
|
|
|
|
|
num += (double)digit;
|
|
|
|
|
|
|
|
|
|
if (seen_dot) {
|
|
|
|
|
divider *= 10;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (byte == '.') {
|
|
|
|
|
if (seen_dot) {
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
tokenizer->token = build_number_token(num / divider, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
} else {
|
|
|
|
|
seen_dot = true;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (is_word_byte(byte)) {
|
|
|
|
|
tokenizer->error = (struct apfl_error) {
|
|
|
|
|
.type = APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER,
|
|
|
|
|
.position = tokenizer->position,
|
|
|
|
|
.byte = byte,
|
|
|
|
|
};
|
|
|
|
|
return APFL_PARSE_ERROR;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unread_byte(tokenizer, last_pos);
|
|
|
|
|
tokenizer->token = build_number_token(num / divider, position, negative);
|
|
|
|
|
return APFL_PARSE_OK;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-15 20:28:44 +00:00
|
|
|
|
|
|
|
|
static enum apfl_parse_result
|
|
|
|
|
token_source_wrap_next(void *opaque, bool need)
|
|
|
|
|
{
|
|
|
|
|
return apfl_tokenizer_next(opaque, need);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_token
|
|
|
|
|
token_source_wrap_get_token(void *opaque)
|
|
|
|
|
{
|
|
|
|
|
return apfl_tokenizer_get_token(opaque);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct apfl_error
|
|
|
|
|
token_source_wrap_get_error(void *opaque)
|
|
|
|
|
{
|
|
|
|
|
return apfl_tokenizer_get_error(opaque);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct apfl_parser_token_source
|
|
|
|
|
apfl_tokenizer_as_token_source(apfl_tokenizer_ptr p)
|
|
|
|
|
{
|
|
|
|
|
return (struct apfl_parser_token_source) {
|
|
|
|
|
.next = token_source_wrap_next,
|
|
|
|
|
.get_token = token_source_wrap_get_token,
|
|
|
|
|
.get_error = token_source_wrap_get_error,
|
|
|
|
|
.opaque = p,
|
|
|
|
|
};
|
|
|
|
|
}
|