apfl/src/tokenizer_test.c
Laria Carolin Chabowski c5fbd92f75 Various cleanup tasks, mostly simplifying the REPL
Instead of passing a flag through the parser and tokenizer for telling
the input source if we need further input or not, we steal a trick from
Lua: In the REPL, we just continue to read lines and append them to the
input, until the input was loaded with no "unexpected EOF" error. After
all, when we didn't expect an EOF is exactly the scenario, when we need
more input.

Doing things this way simplifies a bunch of places and lets us remove
the ugly source_reader and iterative_runner concepts.

To allow the REPL to see the error that happened during loading required
some smaller refactorings, but those were honestly for the better
anyway.

I also decided to get rid of the token_source concept, the parser now
gets the tokenizer directly. This also made things a bit simpler, also
I want to soon-ish implement string interpolation, and for that the
parser needs to do more with the tokenizer than just reading the next
token.

One last thing: This also cleans up the web playground and makes the
playground and REPL share a bunch of code. Nice!
2025-11-30 22:23:40 +01:00

373 lines
12 KiB
C

#include <assert.h>
#include <inttypes.h>
#include "apfl.h"
#include "test.h"
struct tokenizer_test {
testctx t;
struct apfl_allocator allocator;
apfl_tokenizer_ptr tokenizer;
struct apfl_io_string_reader_data string_reader;
struct apfl_io_reader reader;
};
static struct tokenizer_test *
new_tokenizer_test_sv(testctx t, struct apfl_string_view text)
{
struct apfl_allocator allocator = apfl_allocator_default();
struct tokenizer_test *tt = must_alloc(t, sizeof(struct tokenizer_test));
*tt = (struct tokenizer_test) {
.t = t,
.allocator = allocator,
};
tt->string_reader = apfl_io_string_reader_create(text);
tt->reader = apfl_io_string_reader(&tt->string_reader);
if ((tt->tokenizer = apfl_tokenizer_new(
allocator,
tt->reader
)) == NULL) {
test_fatalf(t, "Failed to initialize the tokenizer");
}
return tt;
}
static struct tokenizer_test *
new_tokenizer_test(testctx t, const char *text)
{
return new_tokenizer_test_sv(t, apfl_string_view_from(text));
}
static void
destroy_tokenizer_test(struct tokenizer_test *tt)
{
apfl_tokenizer_destroy(tt->tokenizer);
free(tt);
}
static void
expect_eof(struct tokenizer_test *tt)
{
switch (apfl_tokenizer_next(tt->tokenizer)) {
case APFL_PARSE_OK:
test_fatalf(tt->t, "Expected EOF but got a token");
break;
case APFL_PARSE_EOF:
break;
case APFL_PARSE_ERROR:
test_failf(tt->t, "Got an error instead of an EOF");
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), apfl_io_file_writer(stderr));
test_fatal(tt->t);
break;
}
}
static bool
expect_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, struct apfl_token *tok)
{
switch (apfl_tokenizer_next(tt->tokenizer)) {
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
test_fatalf(tt->t, "Got an EOF instead of a token");
break;
case APFL_PARSE_ERROR:
test_failf(tt->t, "Got an error instead of a token");
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), apfl_io_file_writer(stderr));
test_fatal(tt->t);
break;
}
*tok = apfl_tokenizer_get_token(tt->tokenizer);
if (tok->type != type) {
test_failf(
tt->t,
"Got wrong token type %s (wanted %s)",
apfl_token_type_name(tok->type),
apfl_token_type_name(type)
);
apfl_token_deinit(tt->allocator, tok);
return false;
}
if (tok->position.line != line || tok->position.col != col) {
test_failf(
tt->t,
"Got token at wrong position %" PRIuMAX ":%" PRIuMAX
" (wanted %" PRIuMAX ":%" PRIuMAX ")",
(uintmax_t)tok->position.line,
(uintmax_t)tok->position.col,
(uintmax_t)line,
(uintmax_t)col
);
}
return true;
}
static void
expect_simple_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type)
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
apfl_token_deinit(tt->allocator, &tok);
}
}
static void
expect_text_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, const char *text)
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (!apfl_string_eq(text, tok.text)) {
struct apfl_string_view sv = apfl_string_view_from(tok.text);
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
}
apfl_token_deinit(tt->allocator, &tok);
}
}
static void
expect_text_token_sv(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, struct apfl_string_view text)
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (!apfl_string_eq(text, tok.text)) {
struct apfl_string_view sv = apfl_string_view_from(tok.text);
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
}
apfl_token_deinit(tt->allocator, &tok);
}
}
static void
expect_number_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, apfl_number num)
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (tok.number != num) {
test_failf(tt->t, "Token has wrong content. have=%f, want=%f", tok.number, num);
}
apfl_token_deinit(tt->allocator, &tok);
}
}
static void
expect_error(struct tokenizer_test *tt, enum apfl_error_type want)
{
struct apfl_token tok;
switch (apfl_tokenizer_next(tt->tokenizer)) {
case APFL_PARSE_OK:
tok = apfl_tokenizer_get_token(tt->tokenizer);
test_failf(tt->t, "Expected error, got token of type %s instead", apfl_token_type_name(tok.type));
apfl_token_deinit(tt->allocator, &tok);
return;
case APFL_PARSE_EOF:
test_fatalf(tt->t, "Got an EOF instead of a token");
break;
case APFL_PARSE_ERROR:
break;
}
struct apfl_error have = apfl_tokenizer_get_error(tt->tokenizer);
if (have.type != want) {
test_failf(tt->t, "Expected error of type %s, got %s instead", apfl_error_type_name(want), apfl_error_type_name(have.type));
}
}
TEST(empty, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(simple_variable, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "hello");
expect_text_token(tt, 1, 1, APFL_TOK_NAME, "hello");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(numbers, t) {
struct tokenizer_test *tt = new_tokenizer_test(t,
// 1 2
// 12345678901234567890123456789
"0 1 -1 1.5 -2.25 666 0xfe 0o15"
);
expect_number_token(tt, 1, 1, APFL_TOK_NUMBER, 0);
expect_number_token(tt, 1, 3, APFL_TOK_NUMBER, 1);
expect_number_token(tt, 1, 5, APFL_TOK_NUMBER, -1);
expect_number_token(tt, 1, 8, APFL_TOK_NUMBER, 1.5);
expect_number_token(tt, 1, 12, APFL_TOK_NUMBER, -2.25);
expect_number_token(tt, 1, 18, APFL_TOK_NUMBER, 666);
expect_number_token(tt, 1, 22, APFL_TOK_NUMBER, 0xfe);
expect_number_token(tt, 1, 27, APFL_TOK_NUMBER, 015);
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(names, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "foo bar --->-->-> Δv == a= x12=x+=");
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "foo");
expect_text_token (tt, 1, 5, APFL_TOK_NAME, "bar");
expect_text_token (tt, 1, 9, APFL_TOK_NAME, "--");
expect_simple_token(tt, 1, 11, APFL_TOK_MAPSTO);
expect_text_token (tt, 1, 13, APFL_TOK_NAME, "-");
expect_simple_token(tt, 1, 14, APFL_TOK_MAPSTO);
expect_simple_token(tt, 1, 16, APFL_TOK_MAPSTO);
expect_text_token (tt, 1, 19, APFL_TOK_NAME, "Δv");
expect_text_token (tt, 1, 23, APFL_TOK_NAME, "==");
expect_text_token (tt, 1, 26, APFL_TOK_NAME, "a");
expect_simple_token(tt, 1, 27, APFL_TOK_ASSIGN);
expect_text_token (tt, 1, 29, APFL_TOK_NAME, "x12");
expect_simple_token(tt, 1, 32, APFL_TOK_ASSIGN);
destroy_tokenizer_test(tt);
}
TEST(assignment, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "a=a");
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "a");
expect_simple_token(tt, 1, 2, APFL_TOK_ASSIGN);
expect_text_token (tt, 1, 3, APFL_TOK_NAME, "a");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(all_tokens, t) {
struct tokenizer_test *tt = new_tokenizer_test(t,
// 1234567
"# test\n"
// 1 2345 678901234567
"\"abc\" def g-h*=i\n"
// 123456789012345678901234567890
"1234.5 -10 0x2A 0b101010 0o52\n"
// 12345678901 2
"'foo ;; , \\\n"
// 1234567890123456
"@ . ? ~ -> = :=\n"
// 1234567
"({[]})\n"
// 12345678
": :: :=\n"
// 1234567890
"`foo``bar`"
);
expect_text_token (tt, 1, 1, APFL_TOK_COMMENT, " test");
expect_simple_token(tt, 1, 7, APFL_TOK_LINEBREAK);
expect_text_token (tt, 2, 1, APFL_TOK_STRING, "abc");
expect_text_token (tt, 2, 7, APFL_TOK_NAME, "def");
expect_text_token (tt, 2, 11, APFL_TOK_NAME, "g-h*=i");
expect_simple_token(tt, 2, 17, APFL_TOK_LINEBREAK);
expect_number_token(tt, 3, 1, APFL_TOK_NUMBER, 1234.5);
expect_number_token(tt, 3, 8, APFL_TOK_NUMBER, -10);
expect_number_token(tt, 3, 12, APFL_TOK_NUMBER, 42);
expect_number_token(tt, 3, 17, APFL_TOK_NUMBER, 42);
expect_number_token(tt, 3, 26, APFL_TOK_NUMBER, 42);
expect_simple_token(tt, 3, 30, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 4, 1, APFL_TOK_STRINGIFY);
expect_text_token (tt, 4, 2, APFL_TOK_NAME, "foo");
expect_simple_token(tt, 4, 6, APFL_TOK_SEMICOLON);
expect_simple_token(tt, 4, 7, APFL_TOK_SEMICOLON);
expect_simple_token(tt, 4, 9, APFL_TOK_COMMA);
expect_simple_token(tt, 4, 11, APFL_TOK_CONTINUE_LINE);
expect_simple_token(tt, 4, 12, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 5, 1, APFL_TOK_AT);
expect_simple_token(tt, 5, 3, APFL_TOK_DOT);
expect_simple_token(tt, 5, 5, APFL_TOK_QUESTION_MARK);
expect_simple_token(tt, 5, 7, APFL_TOK_EXPAND);
expect_simple_token(tt, 5, 9, APFL_TOK_MAPSTO);
expect_simple_token(tt, 5, 12, APFL_TOK_ASSIGN);
expect_simple_token(tt, 5, 14, APFL_TOK_LOCAL_ASSIGN);
expect_simple_token(tt, 5, 16, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 6, 1, APFL_TOK_LPAREN);
expect_simple_token(tt, 6, 2, APFL_TOK_LBRACE);
expect_simple_token(tt, 6, 3, APFL_TOK_LBRACKET);
expect_simple_token(tt, 6, 4, APFL_TOK_RBRACKET);
expect_simple_token(tt, 6, 5, APFL_TOK_RBRACE);
expect_simple_token(tt, 6, 6, APFL_TOK_RPAREN);
expect_simple_token(tt, 6, 7, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 7, 1, APFL_TOK_COLON);
expect_simple_token(tt, 7, 3, APFL_TOK_DOUBLE_COLON);
expect_simple_token(tt, 7, 6, APFL_TOK_LOCAL_ASSIGN);
expect_simple_token(tt, 7, 8, APFL_TOK_LINEBREAK);
expect_text_token (tt, 8, 1, APFL_TOK_STRING, "foo`bar");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(strings_with_binary_data, t) {
struct tokenizer_test *tt = new_tokenizer_test_sv(t, (struct apfl_string_view) {
.bytes = (unsigned char []){'"', '\x00', '\xFF', '\\', 'x', '0', '0', '\\', 'x', '2', 'a', '"'},
.len = 12,
});
expect_text_token_sv(tt, 1, 1, APFL_TOK_STRING, (struct apfl_string_view) {
.bytes = (unsigned char []){'\x00', '\xFF', '\x00', '\x2A'},
.len = 4
});
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(backtick_strings, t) {
struct tokenizer_test *tt = new_tokenizer_test(
t,
// 1234567890123456789012345 6789 1
"`foo`bar``baz```` `ab``c\"d'e\nf`"
);
expect_text_token (tt, 1, 1, APFL_TOK_STRING, "foo");
expect_text_token (tt, 1, 6, APFL_TOK_NAME, "bar");
expect_text_token (tt, 1, 9, APFL_TOK_STRING, "");
expect_text_token (tt, 1, 11, APFL_TOK_NAME, "baz");
expect_text_token (tt, 1, 14, APFL_TOK_STRING, "`");
expect_text_token (tt, 1, 19, APFL_TOK_STRING, "ab`c\"d'e\nf");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(err_invalid_bytes, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 2, APFL_TOK_NAME, "foo");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 6, APFL_TOK_NAME, "_bar");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 11, APFL_TOK_NAME, "baz");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TESTS_BEGIN
ADDTEST(empty),
ADDTEST(simple_variable),
ADDTEST(numbers),
ADDTEST(names),
ADDTEST(assignment),
ADDTEST(all_tokens),
ADDTEST(strings_with_binary_data),
ADDTEST(backtick_strings),
ADDTEST(err_invalid_bytes),
TESTS_END