apfl/src/tokenizer_test.c

350 lines
11 KiB
C
Raw Normal View History

2021-12-10 20:22:16 +00:00
#include <assert.h>
2023-02-16 20:41:02 +00:00
#include <inttypes.h>
2021-12-10 20:22:16 +00:00
#include "apfl.h"
#include "test.h"
struct tokenizer_test {
testctx t;
struct apfl_allocator allocator;
2021-12-10 20:22:16 +00:00
apfl_tokenizer_ptr tokenizer;
2023-03-05 16:02:42 +00:00
struct apfl_io_string_reader_data string_reader;
struct apfl_io_reader reader;
2021-12-10 20:22:16 +00:00
};
static struct tokenizer_test *
new_tokenizer_test_sv(testctx t, struct apfl_string_view text)
2021-12-10 20:22:16 +00:00
{
struct apfl_allocator allocator = apfl_allocator_default();
2021-12-10 20:22:16 +00:00
struct tokenizer_test *tt = must_alloc(t, sizeof(struct tokenizer_test));
*tt = (struct tokenizer_test) {
.t = t,
.allocator = allocator,
2021-12-10 20:22:16 +00:00
};
2023-03-05 16:02:42 +00:00
tt->string_reader = apfl_io_string_reader_create(text);
tt->reader = apfl_io_string_reader(&tt->string_reader);
2021-12-10 20:22:16 +00:00
if ((tt->tokenizer = apfl_tokenizer_new(
allocator,
2023-03-05 16:02:42 +00:00
apfl_io_reader_as_source_reader(&tt->reader)
)) == NULL) {
test_fatalf(t, "Failed to initialize the tokenizer");
}
2021-12-10 20:22:16 +00:00
return tt;
}
static struct tokenizer_test *
new_tokenizer_test(testctx t, const char *text)
{
return new_tokenizer_test_sv(t, apfl_string_view_from(text));
}
2021-12-10 20:22:16 +00:00
static void
destroy_tokenizer_test(struct tokenizer_test *tt)
{
apfl_tokenizer_destroy(tt->tokenizer);
free(tt);
}
static void
expect_eof(struct tokenizer_test *tt)
{
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
case APFL_PARSE_OK:
test_fatalf(tt->t, "Expected EOF but got a token");
break;
case APFL_PARSE_EOF:
break;
case APFL_PARSE_ERROR:
test_failf(tt->t, "Got an error instead of an EOF");
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
test_fatal(tt->t);
break;
}
}
static bool
2023-02-16 20:41:02 +00:00
expect_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, struct apfl_token *tok)
2021-12-10 20:22:16 +00:00
{
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
case APFL_PARSE_OK:
break;
case APFL_PARSE_EOF:
test_fatalf(tt->t, "Got an EOF instead of a token");
break;
case APFL_PARSE_ERROR:
test_failf(tt->t, "Got an error instead of a token");
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
test_fatal(tt->t);
break;
}
*tok = apfl_tokenizer_get_token(tt->tokenizer);
if (tok->type != type) {
test_failf(
tt->t,
"Got wrong token type %s (wanted %s)",
apfl_token_type_name(tok->type),
apfl_token_type_name(type)
);
apfl_token_deinit(tt->allocator, tok);
2021-12-10 20:22:16 +00:00
return false;
}
if (tok->position.line != line || tok->position.col != col) {
2023-02-16 20:41:02 +00:00
test_failf(
tt->t,
"Got token at wrong position %" PRIuMAX ":%" PRIuMAX
" (wanted %" PRIuMAX ":%" PRIuMAX ")",
(uintmax_t)tok->position.line,
(uintmax_t)tok->position.col,
(uintmax_t)line,
(uintmax_t)col
);
2021-12-10 20:22:16 +00:00
}
return true;
}
static void
2023-02-16 20:41:02 +00:00
expect_simple_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type)
2021-12-10 20:22:16 +00:00
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
apfl_token_deinit(tt->allocator, &tok);
2021-12-10 20:22:16 +00:00
}
}
static void
2023-02-16 20:41:02 +00:00
expect_text_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, const char *text)
2021-12-10 20:22:16 +00:00
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (!apfl_string_eq(text, tok.text)) {
struct apfl_string_view sv = apfl_string_view_from(tok.text);
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
2021-12-10 20:22:16 +00:00
}
apfl_token_deinit(tt->allocator, &tok);
2021-12-10 20:22:16 +00:00
}
}
static void
2023-02-16 20:41:02 +00:00
expect_text_token_sv(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, struct apfl_string_view text)
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (!apfl_string_eq(text, tok.text)) {
struct apfl_string_view sv = apfl_string_view_from(tok.text);
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
}
apfl_token_deinit(tt->allocator, &tok);
}
}
2021-12-10 20:22:16 +00:00
static void
2023-02-16 20:41:02 +00:00
expect_number_token(struct tokenizer_test *tt, size_t line, size_t col, enum apfl_token_type type, apfl_number num)
2021-12-10 20:22:16 +00:00
{
struct apfl_token tok;
if (expect_token(tt, line, col, type, &tok)) {
if (tok.number != num) {
test_failf(tt->t, "Token has wrong content. have=%f, want=%f", tok.number, num);
}
apfl_token_deinit(tt->allocator, &tok);
2021-12-10 20:22:16 +00:00
}
}
static void
expect_error(struct tokenizer_test *tt, enum apfl_error_type want)
{
struct apfl_token tok;
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
case APFL_PARSE_OK:
tok = apfl_tokenizer_get_token(tt->tokenizer);
test_failf(tt->t, "Expected error, got token of type %s instead", apfl_token_type_name(tok.type));
apfl_token_deinit(tt->allocator, &tok);
return;
case APFL_PARSE_EOF:
test_fatalf(tt->t, "Got an EOF instead of a token");
break;
case APFL_PARSE_ERROR:
break;
}
struct apfl_error have = apfl_tokenizer_get_error(tt->tokenizer);
if (have.type != want) {
test_failf(tt->t, "Expected error of type %s, got %s instead", apfl_error_type_name(want), apfl_error_type_name(have.type));
}
}
2021-12-10 20:22:16 +00:00
TEST(empty, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(simple_variable, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "hello");
expect_text_token(tt, 1, 1, APFL_TOK_NAME, "hello");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(numbers, t) {
struct tokenizer_test *tt = new_tokenizer_test(t,
// 1 2
// 12345678901234567890123456789
"0 1 -1 1.5 -2.25 666 0xfe 0o15"
);
expect_number_token(tt, 1, 1, APFL_TOK_NUMBER, 0);
expect_number_token(tt, 1, 3, APFL_TOK_NUMBER, 1);
expect_number_token(tt, 1, 5, APFL_TOK_NUMBER, -1);
expect_number_token(tt, 1, 8, APFL_TOK_NUMBER, 1.5);
expect_number_token(tt, 1, 12, APFL_TOK_NUMBER, -2.25);
expect_number_token(tt, 1, 18, APFL_TOK_NUMBER, 666);
expect_number_token(tt, 1, 22, APFL_TOK_NUMBER, 0xfe);
expect_number_token(tt, 1, 27, APFL_TOK_NUMBER, 015);
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(names, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "foo bar --->-->-> Δv == a= x12=x+=");
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "foo");
expect_text_token (tt, 1, 5, APFL_TOK_NAME, "bar");
expect_text_token (tt, 1, 9, APFL_TOK_NAME, "--");
expect_simple_token(tt, 1, 11, APFL_TOK_MAPSTO);
expect_text_token (tt, 1, 13, APFL_TOK_NAME, "-");
expect_simple_token(tt, 1, 14, APFL_TOK_MAPSTO);
expect_simple_token(tt, 1, 16, APFL_TOK_MAPSTO);
expect_text_token (tt, 1, 19, APFL_TOK_NAME, "Δv");
expect_text_token (tt, 1, 23, APFL_TOK_NAME, "==");
expect_text_token (tt, 1, 26, APFL_TOK_NAME, "a");
expect_simple_token(tt, 1, 27, APFL_TOK_ASSIGN);
expect_text_token (tt, 1, 29, APFL_TOK_NAME, "x12");
expect_simple_token(tt, 1, 32, APFL_TOK_ASSIGN);
destroy_tokenizer_test(tt);
}
TEST(assignment, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "a=a");
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "a");
expect_simple_token(tt, 1, 2, APFL_TOK_ASSIGN);
expect_text_token (tt, 1, 3, APFL_TOK_NAME, "a");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
2021-12-10 20:22:16 +00:00
TEST(all_tokens, t) {
struct tokenizer_test *tt = new_tokenizer_test(t,
// 1234567
"# test\n"
// 1 2345 678901234567
"\"abc\" def g-h*=i\n"
// 123456789012345678901234567890
"1234.5 -10 0x2A 0b101010 0o52\n"
// 12345678901 2
"'foo ;; , \\\n"
// 1234567890123456
"@ . ? ~ -> = :=\n"
// 1234567
"({[]})\n"
// 1234567
": :: :="
2021-12-10 20:22:16 +00:00
);
expect_text_token (tt, 1, 1, APFL_TOK_COMMENT, " test");
expect_simple_token(tt, 1, 7, APFL_TOK_LINEBREAK);
expect_text_token (tt, 2, 1, APFL_TOK_STRING, "abc");
expect_text_token (tt, 2, 7, APFL_TOK_NAME, "def");
expect_text_token (tt, 2, 11, APFL_TOK_NAME, "g-h*=i");
expect_simple_token(tt, 2, 17, APFL_TOK_LINEBREAK);
expect_number_token(tt, 3, 1, APFL_TOK_NUMBER, 1234.5);
expect_number_token(tt, 3, 8, APFL_TOK_NUMBER, -10);
expect_number_token(tt, 3, 12, APFL_TOK_NUMBER, 42);
expect_number_token(tt, 3, 17, APFL_TOK_NUMBER, 42);
expect_number_token(tt, 3, 26, APFL_TOK_NUMBER, 42);
expect_simple_token(tt, 3, 30, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 4, 1, APFL_TOK_STRINGIFY);
expect_text_token (tt, 4, 2, APFL_TOK_NAME, "foo");
expect_simple_token(tt, 4, 6, APFL_TOK_SEMICOLON);
expect_simple_token(tt, 4, 7, APFL_TOK_SEMICOLON);
expect_simple_token(tt, 4, 9, APFL_TOK_COMMA);
expect_simple_token(tt, 4, 11, APFL_TOK_CONTINUE_LINE);
expect_simple_token(tt, 4, 12, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 5, 1, APFL_TOK_AT);
expect_simple_token(tt, 5, 3, APFL_TOK_DOT);
expect_simple_token(tt, 5, 5, APFL_TOK_QUESTION_MARK);
expect_simple_token(tt, 5, 7, APFL_TOK_EXPAND);
expect_simple_token(tt, 5, 9, APFL_TOK_MAPSTO);
expect_simple_token(tt, 5, 12, APFL_TOK_ASSIGN);
expect_simple_token(tt, 5, 14, APFL_TOK_LOCAL_ASSIGN);
expect_simple_token(tt, 5, 16, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 6, 1, APFL_TOK_LPAREN);
expect_simple_token(tt, 6, 2, APFL_TOK_LBRACE);
expect_simple_token(tt, 6, 3, APFL_TOK_LBRACKET);
expect_simple_token(tt, 6, 4, APFL_TOK_RBRACKET);
expect_simple_token(tt, 6, 5, APFL_TOK_RBRACE);
expect_simple_token(tt, 6, 6, APFL_TOK_RPAREN);
expect_simple_token(tt, 6, 7, APFL_TOK_LINEBREAK);
expect_simple_token(tt, 7, 1, APFL_TOK_COLON);
expect_simple_token(tt, 7, 3, APFL_TOK_DOUBLE_COLON);
expect_simple_token(tt, 7, 6, APFL_TOK_LOCAL_ASSIGN);
2021-12-10 20:22:16 +00:00
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(strings_with_binary_data, t) {
struct tokenizer_test *tt = new_tokenizer_test_sv(t, (struct apfl_string_view) {
2023-02-13 21:31:18 +00:00
.bytes = (unsigned char []){'"', '\x00', '\xFF', '\\', 'x', '0', '0', '\\', 'x', '2', 'a', '"'},
.len = 12,
});
expect_text_token_sv(tt, 1, 1, APFL_TOK_STRING, (struct apfl_string_view) {
2023-02-13 21:31:18 +00:00
.bytes = (unsigned char []){'\x00', '\xFF', '\x00', '\x2A'},
.len = 4
});
expect_eof(tt);
destroy_tokenizer_test(tt);
}
TEST(err_invalid_bytes, t) {
struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 2, APFL_TOK_NAME, "foo");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 6, APFL_TOK_NAME, "_bar");
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
expect_text_token(tt, 1, 11, APFL_TOK_NAME, "baz");
expect_eof(tt);
destroy_tokenizer_test(tt);
}
2021-12-10 20:22:16 +00:00
TESTS_BEGIN
ADDTEST(empty),
ADDTEST(simple_variable),
ADDTEST(numbers),
ADDTEST(names),
ADDTEST(assignment),
2021-12-10 20:22:16 +00:00
ADDTEST(all_tokens),
ADDTEST(strings_with_binary_data),
ADDTEST(err_invalid_bytes),
2021-12-10 20:22:16 +00:00
TESTS_END