apfl/src/tokenizer_test.c

#include <assert.h>

#include "apfl.h"

#include "test.h"

struct tokenizer_test {
    testctx t;
    struct apfl_allocator allocator;
    apfl_tokenizer_ptr tokenizer;
    void *ctx;
};

static struct tokenizer_test *
new_tokenizer_test_sv(testctx t, struct apfl_string_view text)
{
    struct apfl_allocator allocator = apfl_allocator_default();

    void *ctx = apfl_string_source_reader_new(allocator, text);
    if (ctx == NULL) {
        test_fatalf(t, "Failed to initialize the source reader");
    }

    apfl_tokenizer_ptr tokenizer = apfl_tokenizer_new(allocator, apfl_string_source_reader, ctx);
    if (tokenizer == NULL) {
        test_fatalf(t, "Failed to initialize the tokenizer");
    }

    struct tokenizer_test *tt = must_alloc(t, sizeof(struct tokenizer_test));

    *tt = (struct tokenizer_test) {
        .t = t,
        .allocator = allocator,
        .tokenizer = tokenizer,
        .ctx = ctx,
    };

    return tt;
}

static struct tokenizer_test *
new_tokenizer_test(testctx t, const char *text)
{
    return new_tokenizer_test_sv(t, apfl_string_view_from(text));
}


static void
destroy_tokenizer_test(struct tokenizer_test *tt)
{
    apfl_string_source_reader_destroy(tt->ctx);
    apfl_tokenizer_destroy(tt->tokenizer);
    free(tt);
}

static void
expect_eof(struct tokenizer_test *tt)
{
    switch (apfl_tokenizer_next(tt->tokenizer, false)) {
    case APFL_PARSE_OK:
        test_fatalf(tt->t, "Expected EOF but got a token");
        break;
    case APFL_PARSE_EOF:
        break;
    case APFL_PARSE_ERROR:
        test_failf(tt->t, "Got an error instead of an EOF");
        apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
        test_fatal(tt->t);
        break;
    }
}

static bool
expect_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, struct apfl_token *tok)
{
    switch (apfl_tokenizer_next(tt->tokenizer, false)) {
    case APFL_PARSE_OK:
        break;
    case APFL_PARSE_EOF:
        test_fatalf(tt->t, "Got an EOF instead of a token");
        break;
    case APFL_PARSE_ERROR:
        test_failf(tt->t, "Got an error instead of a token");
        apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr);
        test_fatal(tt->t);
        break;
    }

    *tok = apfl_tokenizer_get_token(tt->tokenizer);
    if (tok->type != type) {
        test_failf(
            tt->t,
            "Got wrong token type %s (wanted %s)",
            apfl_token_type_name(tok->type),
            apfl_token_type_name(type)
        );
        apfl_token_deinit(tt->allocator, tok);

        return false;
    }

    if (tok->position.line != line || tok->position.col != col) {
        test_failf(tt->t, "Got token at wrong position %d:%d (wanted %d:%d)", tok->position.line, tok->position.col, line, col);
    }

    return true;
}

static void
expect_simple_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type)
{
    struct apfl_token tok;
    if (expect_token(tt, line, col, type, &tok)) {
        apfl_token_deinit(tt->allocator, &tok);
    }
}

static void
expect_text_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, const char *text)
{
    struct apfl_token tok;
    if (expect_token(tt, line, col, type, &tok)) {
        if (!apfl_string_eq(text, tok.text)) {
            struct apfl_string_view sv = apfl_string_view_from(tok.text);
            test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
        }
        apfl_token_deinit(tt->allocator, &tok);
    }
}

static void
expect_text_token_sv(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, struct apfl_string_view text)
{
    struct apfl_token tok;
    if (expect_token(tt, line, col, type, &tok)) {
        if (!apfl_string_eq(text, tok.text)) {
            struct apfl_string_view sv = apfl_string_view_from(tok.text);
            test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(sv), text);
        }
        apfl_token_deinit(tt->allocator, &tok);
    }
}

static void
expect_number_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, apfl_number num)
{
    struct apfl_token tok;
    if (expect_token(tt, line, col, type, &tok)) {
        if (tok.number != num) {
            test_failf(tt->t, "Token has wrong content. have=%f, want=%f", tok.number, num);
        }
        apfl_token_deinit(tt->allocator, &tok);
    }
}

static void
expect_error(struct tokenizer_test *tt, enum apfl_error_type want)
{
    struct apfl_token tok;

    switch (apfl_tokenizer_next(tt->tokenizer, false)) {
    case APFL_PARSE_OK:
        tok = apfl_tokenizer_get_token(tt->tokenizer);
        test_failf(tt->t, "Expected error, got token of type %s instead", apfl_token_type_name(tok.type));
        apfl_token_deinit(tt->allocator, &tok);
        return;
    case APFL_PARSE_EOF:
        test_fatalf(tt->t, "Got an EOF instead of a token");
        break;
    case APFL_PARSE_ERROR:
        break;
    }

    struct apfl_error have = apfl_tokenizer_get_error(tt->tokenizer);
    if (have.type != want) {
        test_failf(tt->t, "Expected error of type %s, got %s instead", apfl_error_type_name(want), apfl_error_type_name(have.type));
    }
}

TEST(empty, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t, "");

    expect_eof(tt);

    destroy_tokenizer_test(tt);
}

TEST(simple_variable, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t, "hello");

    expect_text_token(tt, 1, 1, APFL_TOK_NAME, "hello");
    expect_eof(tt);

    destroy_tokenizer_test(tt);
}

TEST(numbers, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t,
    //            1         2
    //   12345678901234567890123456789
        "0 1 -1 1.5 -2.25 666 0xfe 0o15"
    );

    expect_number_token(tt, 1, 1,  APFL_TOK_NUMBER, 0);
    expect_number_token(tt, 1, 3,  APFL_TOK_NUMBER, 1);
    expect_number_token(tt, 1, 5,  APFL_TOK_NUMBER, -1);
    expect_number_token(tt, 1, 8,  APFL_TOK_NUMBER, 1.5);
    expect_number_token(tt, 1, 12, APFL_TOK_NUMBER, -2.25);
    expect_number_token(tt, 1, 18, APFL_TOK_NUMBER, 666);
    expect_number_token(tt, 1, 22, APFL_TOK_NUMBER, 0xfe);
    expect_number_token(tt, 1, 27, APFL_TOK_NUMBER, 015);
    expect_eof(tt);

    destroy_tokenizer_test(tt);
}

TEST(names, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t, "foo bar --->-->-> Δv == a= x12=x+=");

    expect_text_token  (tt, 1, 1,  APFL_TOK_NAME, "foo");
    expect_text_token  (tt, 1, 5,  APFL_TOK_NAME, "bar");
    expect_text_token  (tt, 1, 9,  APFL_TOK_NAME, "--");
    expect_simple_token(tt, 1, 11, APFL_TOK_MAPSTO);
    expect_text_token  (tt, 1, 13, APFL_TOK_NAME, "-");
    expect_simple_token(tt, 1, 14, APFL_TOK_MAPSTO);
    expect_simple_token(tt, 1, 16, APFL_TOK_MAPSTO);
    expect_text_token  (tt, 1, 19, APFL_TOK_NAME, "Δv");
    expect_text_token  (tt, 1, 23, APFL_TOK_NAME, "==");
    expect_text_token  (tt, 1, 26, APFL_TOK_NAME, "a");
    expect_simple_token(tt, 1, 27, APFL_TOK_ASSIGN);
    expect_text_token  (tt, 1, 29, APFL_TOK_NAME, "x12");
    expect_simple_token(tt, 1, 32, APFL_TOK_ASSIGN);

    destroy_tokenizer_test(tt);
}

TEST(assignment, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t, "a=a");
    expect_text_token  (tt, 1, 1, APFL_TOK_NAME, "a");
    expect_simple_token(tt, 1, 2, APFL_TOK_ASSIGN);
    expect_text_token  (tt, 1, 3, APFL_TOK_NAME, "a");

    expect_eof(tt);

    destroy_tokenizer_test(tt);
}

TEST(all_tokens, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t,
    //   1234567
        "# test\n"
    //   1 2345 678901234567
        "\"abc\" def g-h*=i\n"
    //   123456789012345678901234567890
        "1234.5 -10 0x2A 0b101010 0o52\n"
    //   12345678901 2
        "'foo ;; , \\\n"
    //   1234567890123456
        "@ . ? ~ -> = :=\n"
    //   123456
        "({[]})"
    );

    expect_text_token  (tt, 1, 1,  APFL_TOK_COMMENT, " test");
    expect_simple_token(tt, 1, 7,  APFL_TOK_LINEBREAK);
    expect_text_token  (tt, 2, 1,  APFL_TOK_STRING, "abc");
    expect_text_token  (tt, 2, 7,  APFL_TOK_NAME, "def");
    expect_text_token  (tt, 2, 11, APFL_TOK_NAME, "g-h*=i");
    expect_simple_token(tt, 2, 17, APFL_TOK_LINEBREAK);
    expect_number_token(tt, 3, 1,  APFL_TOK_NUMBER, 1234.5);
    expect_number_token(tt, 3, 8,  APFL_TOK_NUMBER, -10);
    expect_number_token(tt, 3, 12, APFL_TOK_NUMBER, 42);
    expect_number_token(tt, 3, 17, APFL_TOK_NUMBER, 42);
    expect_number_token(tt, 3, 26, APFL_TOK_NUMBER, 42);
    expect_simple_token(tt, 3, 30, APFL_TOK_LINEBREAK);
    expect_simple_token(tt, 4, 1,  APFL_TOK_STRINGIFY);
    expect_text_token  (tt, 4, 2,  APFL_TOK_NAME, "foo");
    expect_simple_token(tt, 4, 6,  APFL_TOK_SEMICOLON);
    expect_simple_token(tt, 4, 7,  APFL_TOK_SEMICOLON);
    expect_simple_token(tt, 4, 9,  APFL_TOK_COMMA);
    expect_simple_token(tt, 4, 11, APFL_TOK_CONTINUE_LINE);
    expect_simple_token(tt, 4, 12, APFL_TOK_LINEBREAK);
    expect_simple_token(tt, 5, 1,  APFL_TOK_AT);
    expect_simple_token(tt, 5, 3,  APFL_TOK_DOT);
    expect_simple_token(tt, 5, 5,  APFL_TOK_QUESTION_MARK);
    expect_simple_token(tt, 5, 7,  APFL_TOK_EXPAND);
    expect_simple_token(tt, 5, 9,  APFL_TOK_MAPSTO);
    expect_simple_token(tt, 5, 12, APFL_TOK_ASSIGN);
    expect_simple_token(tt, 5, 14, APFL_TOK_LOCAL_ASSIGN);
    expect_simple_token(tt, 5, 16, APFL_TOK_LINEBREAK);
    expect_simple_token(tt, 6, 1,  APFL_TOK_LPAREN);
    expect_simple_token(tt, 6, 2,  APFL_TOK_LBRACE);
    expect_simple_token(tt, 6, 3,  APFL_TOK_LBRACKET);
    expect_simple_token(tt, 6, 4,  APFL_TOK_RBRACKET);
    expect_simple_token(tt, 6, 5,  APFL_TOK_RBRACE);
    expect_simple_token(tt, 6, 6,  APFL_TOK_RPAREN);

    expect_eof(tt);

    destroy_tokenizer_test(tt);
}

TEST(strings_with_binary_data, t) {
    struct tokenizer_test *tt = new_tokenizer_test_sv(t, (struct apfl_string_view) {
        .bytes = (char []){'"', '\x00', '\xFF', '\\', 'x', '0', '0', '\\', 'x', '2', 'a', '"'},
        .len = 12,
    });
    expect_text_token_sv(tt, 1, 1, APFL_TOK_STRING, (struct apfl_string_view) {
        .bytes = (char []){'\x00', '\xFF', '\x00', '\x2A'},
        .len = 4
    });
    expect_eof(tt);
    destroy_tokenizer_test(tt);
}

TEST(err_invalid_bytes, t) {
    struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
    expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
    expect_text_token(tt, 1, 2, APFL_TOK_NAME, "foo");
    expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
    expect_text_token(tt, 1, 6, APFL_TOK_NAME, "_bar");
    expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
    expect_text_token(tt, 1, 11, APFL_TOK_NAME, "baz");
    expect_eof(tt);
    destroy_tokenizer_test(tt);
}

TESTS_BEGIN
    ADDTEST(empty),
    ADDTEST(simple_variable),
    ADDTEST(numbers),
    ADDTEST(names),
    ADDTEST(assignment),
    ADDTEST(all_tokens),
    ADDTEST(strings_with_binary_data),
    ADDTEST(err_invalid_bytes),
TESTS_END