The callback and the opaque data are now grouped together in a struct instead of being passed individually into the tokenizer. This also exposes the string source reader struct and therefore removes the need of heap allocating it. Neat!
331 lines
11 KiB
C
331 lines
11 KiB
C
#include <assert.h>
|
|
|
|
#include "apfl.h"
|
|
|
|
#include "test.h"
|
|
|
|
struct tokenizer_test {
|
|
testctx t;
|
|
struct apfl_allocator allocator;
|
|
apfl_tokenizer_ptr tokenizer;
|
|
struct apfl_string_source_reader_data string_source_reader;
|
|
};
|
|
|
|
static struct tokenizer_test *
|
|
new_tokenizer_test_sv(testctx t, struct apfl_string_view text)
|
|
{
|
|
struct apfl_allocator allocator = apfl_allocator_default();
|
|
|
|
struct tokenizer_test *tt = must_alloc(t, sizeof(struct tokenizer_test));
|
|
*tt = (struct tokenizer_test) {
|
|
.t = t,
|
|
.allocator = allocator,
|
|
.string_source_reader = apfl_string_source_reader_create(text),
|
|
};
|
|
|
|
if ((tt->tokenizer = apfl_tokenizer_new(
|
|
allocator,
|
|
apfl_string_source_reader(&tt->string_source_reader)
|
|
)) == NULL) {
|
|
test_fatalf(t, "Failed to initialize the tokenizer");
|
|
}
|
|
|
|
return tt;
|
|
}
|
|
|
|
static struct tokenizer_test *
|
|
new_tokenizer_test(testctx t, const char *text)
|
|
{
|
|
return new_tokenizer_test_sv(t, apfl_string_view_from(text));
|
|
}
|
|
|
|
|
|
static void
|
|
destroy_tokenizer_test(struct tokenizer_test *tt)
|
|
{
|
|
apfl_tokenizer_destroy(tt->tokenizer);
|
|
free(tt);
|
|
}
|
|
|
|
static void
|
|
expect_eof(struct tokenizer_test *tt)
|
|
{
|
|
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
|
|
case APFL_PARSE_OK:
|
|
test_fatalf(tt->t, "Expected EOF but got a token");
|
|
break;
|
|
case APFL_PARSE_EOF:
|
|
break;
|
|
case APFL_PARSE_ERROR:
|
|
test_failf(tt->t, "Got an error instead of an EOF");
|
|
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
|
|
test_fatal(tt->t);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static bool
|
|
expect_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, struct apfl_token *tok)
|
|
{
|
|
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
|
|
case APFL_PARSE_OK:
|
|
break;
|
|
case APFL_PARSE_EOF:
|
|
test_fatalf(tt->t, "Got an EOF instead of a token");
|
|
break;
|
|
case APFL_PARSE_ERROR:
|
|
test_failf(tt->t, "Got an error instead of a token");
|
|
apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
|
|
test_fatal(tt->t);
|
|
break;
|
|
}
|
|
|
|
*tok = apfl_tokenizer_get_token(tt->tokenizer);
|
|
if (tok->type != type) {
|
|
test_failf(
|
|
tt->t,
|
|
"Got wrong token type %s (wanted %s)",
|
|
apfl_token_type_name(tok->type),
|
|
apfl_token_type_name(type)
|
|
);
|
|
apfl_token_deinit(tt->allocator, tok);
|
|
|
|
return false;
|
|
}
|
|
|
|
if (tok->position.line != line || tok->position.col != col) {
|
|
test_failf(tt->t, "Got token at wrong position %d:%d (wanted %d:%d)", tok->position.line, tok->position.col, line, col);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static void
|
|
expect_simple_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type)
|
|
{
|
|
struct apfl_token tok;
|
|
if (expect_token(tt, line, col, type, &tok)) {
|
|
apfl_token_deinit(tt->allocator, &tok);
|
|
}
|
|
}
|
|
|
|
static void
|
|
expect_text_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, const char *text)
|
|
{
|
|
struct apfl_token tok;
|
|
if (expect_token(tt, line, col, type, &tok)) {
|
|
if (!apfl_string_eq(text, tok.text)) {
|
|
struct apfl_string_view sv = apfl_string_view_from(tok.text);
|
|
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
|
|
}
|
|
apfl_token_deinit(tt->allocator, &tok);
|
|
}
|
|
}
|
|
|
|
static void
|
|
expect_text_token_sv(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, struct apfl_string_view text)
|
|
{
|
|
struct apfl_token tok;
|
|
if (expect_token(tt, line, col, type, &tok)) {
|
|
if (!apfl_string_eq(text, tok.text)) {
|
|
struct apfl_string_view sv = apfl_string_view_from(tok.text);
|
|
test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
|
|
}
|
|
apfl_token_deinit(tt->allocator, &tok);
|
|
}
|
|
}
|
|
|
|
static void
|
|
expect_number_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, apfl_number num)
|
|
{
|
|
struct apfl_token tok;
|
|
if (expect_token(tt, line, col, type, &tok)) {
|
|
if (tok.number != num) {
|
|
test_failf(tt->t, "Token has wrong content. have=%f, want=%f", tok.number, num);
|
|
}
|
|
apfl_token_deinit(tt->allocator, &tok);
|
|
}
|
|
}
|
|
|
|
static void
|
|
expect_error(struct tokenizer_test *tt, enum apfl_error_type want)
|
|
{
|
|
struct apfl_token tok;
|
|
|
|
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
|
|
case APFL_PARSE_OK:
|
|
tok = apfl_tokenizer_get_token(tt->tokenizer);
|
|
test_failf(tt->t, "Expected error, got token of type %s instead", apfl_token_type_name(tok.type));
|
|
apfl_token_deinit(tt->allocator, &tok);
|
|
return;
|
|
case APFL_PARSE_EOF:
|
|
test_fatalf(tt->t, "Got an EOF instead of a token");
|
|
break;
|
|
case APFL_PARSE_ERROR:
|
|
break;
|
|
}
|
|
|
|
struct apfl_error have = apfl_tokenizer_get_error(tt->tokenizer);
|
|
if (have.type != want) {
|
|
test_failf(tt->t, "Expected error of type %s, got %s instead", apfl_error_type_name(want), apfl_error_type_name(have.type));
|
|
}
|
|
}
|
|
|
|
TEST(empty, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t, "");
|
|
|
|
expect_eof(tt);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(simple_variable, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t, "hello");
|
|
|
|
expect_text_token(tt, 1, 1, APFL_TOK_NAME, "hello");
|
|
expect_eof(tt);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(numbers, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t,
|
|
// 1 2
|
|
// 12345678901234567890123456789
|
|
"0 1 -1 1.5 -2.25 666 0xfe 0o15"
|
|
);
|
|
|
|
expect_number_token(tt, 1, 1, APFL_TOK_NUMBER, 0);
|
|
expect_number_token(tt, 1, 3, APFL_TOK_NUMBER, 1);
|
|
expect_number_token(tt, 1, 5, APFL_TOK_NUMBER, -1);
|
|
expect_number_token(tt, 1, 8, APFL_TOK_NUMBER, 1.5);
|
|
expect_number_token(tt, 1, 12, APFL_TOK_NUMBER, -2.25);
|
|
expect_number_token(tt, 1, 18, APFL_TOK_NUMBER, 666);
|
|
expect_number_token(tt, 1, 22, APFL_TOK_NUMBER, 0xfe);
|
|
expect_number_token(tt, 1, 27, APFL_TOK_NUMBER, 015);
|
|
expect_eof(tt);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(names, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t, "foo bar --->-->-> Δv == a= x12=x+=");
|
|
|
|
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "foo");
|
|
expect_text_token (tt, 1, 5, APFL_TOK_NAME, "bar");
|
|
expect_text_token (tt, 1, 9, APFL_TOK_NAME, "--");
|
|
expect_simple_token(tt, 1, 11, APFL_TOK_MAPSTO);
|
|
expect_text_token (tt, 1, 13, APFL_TOK_NAME, "-");
|
|
expect_simple_token(tt, 1, 14, APFL_TOK_MAPSTO);
|
|
expect_simple_token(tt, 1, 16, APFL_TOK_MAPSTO);
|
|
expect_text_token (tt, 1, 19, APFL_TOK_NAME, "Δv");
|
|
expect_text_token (tt, 1, 23, APFL_TOK_NAME, "==");
|
|
expect_text_token (tt, 1, 26, APFL_TOK_NAME, "a");
|
|
expect_simple_token(tt, 1, 27, APFL_TOK_ASSIGN);
|
|
expect_text_token (tt, 1, 29, APFL_TOK_NAME, "x12");
|
|
expect_simple_token(tt, 1, 32, APFL_TOK_ASSIGN);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(assignment, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t, "a=a");
|
|
expect_text_token (tt, 1, 1, APFL_TOK_NAME, "a");
|
|
expect_simple_token(tt, 1, 2, APFL_TOK_ASSIGN);
|
|
expect_text_token (tt, 1, 3, APFL_TOK_NAME, "a");
|
|
|
|
expect_eof(tt);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(all_tokens, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t,
|
|
// 1234567
|
|
"# test\n"
|
|
// 1 2345 678901234567
|
|
"\"abc\" def g-h*=i\n"
|
|
// 123456789012345678901234567890
|
|
"1234.5 -10 0x2A 0b101010 0o52\n"
|
|
// 12345678901 2
|
|
"'foo ;; , \\\n"
|
|
// 1234567890123456
|
|
"@ . ? ~ -> = :=\n"
|
|
// 123456
|
|
"({[]})"
|
|
);
|
|
|
|
expect_text_token (tt, 1, 1, APFL_TOK_COMMENT, " test");
|
|
expect_simple_token(tt, 1, 7, APFL_TOK_LINEBREAK);
|
|
expect_text_token (tt, 2, 1, APFL_TOK_STRING, "abc");
|
|
expect_text_token (tt, 2, 7, APFL_TOK_NAME, "def");
|
|
expect_text_token (tt, 2, 11, APFL_TOK_NAME, "g-h*=i");
|
|
expect_simple_token(tt, 2, 17, APFL_TOK_LINEBREAK);
|
|
expect_number_token(tt, 3, 1, APFL_TOK_NUMBER, 1234.5);
|
|
expect_number_token(tt, 3, 8, APFL_TOK_NUMBER, -10);
|
|
expect_number_token(tt, 3, 12, APFL_TOK_NUMBER, 42);
|
|
expect_number_token(tt, 3, 17, APFL_TOK_NUMBER, 42);
|
|
expect_number_token(tt, 3, 26, APFL_TOK_NUMBER, 42);
|
|
expect_simple_token(tt, 3, 30, APFL_TOK_LINEBREAK);
|
|
expect_simple_token(tt, 4, 1, APFL_TOK_STRINGIFY);
|
|
expect_text_token (tt, 4, 2, APFL_TOK_NAME, "foo");
|
|
expect_simple_token(tt, 4, 6, APFL_TOK_SEMICOLON);
|
|
expect_simple_token(tt, 4, 7, APFL_TOK_SEMICOLON);
|
|
expect_simple_token(tt, 4, 9, APFL_TOK_COMMA);
|
|
expect_simple_token(tt, 4, 11, APFL_TOK_CONTINUE_LINE);
|
|
expect_simple_token(tt, 4, 12, APFL_TOK_LINEBREAK);
|
|
expect_simple_token(tt, 5, 1, APFL_TOK_AT);
|
|
expect_simple_token(tt, 5, 3, APFL_TOK_DOT);
|
|
expect_simple_token(tt, 5, 5, APFL_TOK_QUESTION_MARK);
|
|
expect_simple_token(tt, 5, 7, APFL_TOK_EXPAND);
|
|
expect_simple_token(tt, 5, 9, APFL_TOK_MAPSTO);
|
|
expect_simple_token(tt, 5, 12, APFL_TOK_ASSIGN);
|
|
expect_simple_token(tt, 5, 14, APFL_TOK_LOCAL_ASSIGN);
|
|
expect_simple_token(tt, 5, 16, APFL_TOK_LINEBREAK);
|
|
expect_simple_token(tt, 6, 1, APFL_TOK_LPAREN);
|
|
expect_simple_token(tt, 6, 2, APFL_TOK_LBRACE);
|
|
expect_simple_token(tt, 6, 3, APFL_TOK_LBRACKET);
|
|
expect_simple_token(tt, 6, 4, APFL_TOK_RBRACKET);
|
|
expect_simple_token(tt, 6, 5, APFL_TOK_RBRACE);
|
|
expect_simple_token(tt, 6, 6, APFL_TOK_RPAREN);
|
|
|
|
expect_eof(tt);
|
|
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(strings_with_binary_data, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test_sv(t, (struct apfl_string_view) {
|
|
.bytes = (char []){'"', '\x00', '\xFF', '\\', 'x', '0', '0', '\\', 'x', '2', 'a', '"'},
|
|
.len = 12,
|
|
});
|
|
expect_text_token_sv(tt, 1, 1, APFL_TOK_STRING, (struct apfl_string_view) {
|
|
.bytes = (char []){'\x00', '\xFF', '\x00', '\x2A'},
|
|
.len = 4
|
|
});
|
|
expect_eof(tt);
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TEST(err_invalid_bytes, t) {
|
|
struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
|
|
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
|
expect_text_token(tt, 1, 2, APFL_TOK_NAME, "foo");
|
|
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
|
expect_text_token(tt, 1, 6, APFL_TOK_NAME, "_bar");
|
|
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
|
expect_text_token(tt, 1, 11, APFL_TOK_NAME, "baz");
|
|
expect_eof(tt);
|
|
destroy_tokenizer_test(tt);
|
|
}
|
|
|
|
TESTS_BEGIN
|
|
ADDTEST(empty),
|
|
ADDTEST(simple_variable),
|
|
ADDTEST(numbers),
|
|
ADDTEST(names),
|
|
ADDTEST(assignment),
|
|
ADDTEST(all_tokens),
|
|
ADDTEST(strings_with_binary_data),
|
|
ADDTEST(err_invalid_bytes),
|
|
TESTS_END
|