From d094ed7bd50cc199973f80113a6d6a408b35fc42 Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Fri, 10 Dec 2021 21:22:16 +0100 Subject: [PATCH] Initial commit --- .gitignore | 19 + Makefile.am | 1 + configure.ac | 10 + examples/examples.apfl | 60 + functional_tests/function-const-capture.at | 14 + functional_tests/function-mutable-capture.at | 25 + functional_tests/hello-world.at | 5 + functional_tests/recursive-faculty.at | 10 + src/Makefile.am | 35 + src/apfl.h | 454 +++++ src/common.h | 27 + src/error.c | 117 ++ src/expr.c | 919 ++++++++++ src/hashmap.c | 406 +++++ src/hashmap.h | 69 + src/hashmap_foo.c | 142 ++ src/internal.h | 45 + src/main.c | 74 + src/parser.c | 1717 ++++++++++++++++++ src/position.c | 9 + src/resizable.c | 74 + src/resizable.h | 22 + src/strings.c | 134 ++ src/test.h | 149 ++ src/token.c | 122 ++ src/tokenizer.c | 909 ++++++++++ src/tokenizer_test.c | 284 +++ src/value.h | 0 28 files changed, 5852 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile.am create mode 100644 configure.ac create mode 100644 examples/examples.apfl create mode 100644 functional_tests/function-const-capture.at create mode 100644 functional_tests/function-mutable-capture.at create mode 100644 functional_tests/hello-world.at create mode 100644 functional_tests/recursive-faculty.at create mode 100644 src/Makefile.am create mode 100644 src/apfl.h create mode 100644 src/common.h create mode 100644 src/error.c create mode 100644 src/expr.c create mode 100644 src/hashmap.c create mode 100644 src/hashmap.h create mode 100644 src/hashmap_foo.c create mode 100644 src/internal.h create mode 100644 src/main.c create mode 100644 src/parser.c create mode 100644 src/position.c create mode 100644 src/resizable.c create mode 100644 src/resizable.h create mode 100644 src/strings.c create mode 100644 src/test.h create mode 100644 src/token.c create mode 100644 src/tokenizer.c create mode 100644 src/tokenizer_test.c create mode 100644 src/value.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..273e16f --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*.o +*.trs +*.log +*.test +depcomp +config.status +Makefile.in +missing +test-driver +ar-lib +compile +configure +*.m4 +autom4te.cache/ +.deps/ +Makefile +install-sh +src/libapfl.a +src/apfl diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..af437a6 --- /dev/null +++ b/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..a81df3c --- /dev/null +++ b/configure.ac @@ -0,0 +1,10 @@ +AC_INIT([apfl], [0.0.1]) +AM_INIT_AUTOMAKE([-Wall -Werror foreign]) +AC_PROG_CC +AM_PROG_AR +AC_PROG_RANLIB +AC_CONFIG_FILES([ + Makefile + src/Makefile +]) +AC_OUTPUT diff --git a/examples/examples.apfl b/examples/examples.apfl new file mode 100644 index 0000000..cb020aa --- /dev/null +++ b/examples/examples.apfl @@ -0,0 +1,60 @@ +pipe := { + x -> x + x f?callable ~more -> + pipe (f x) ~more + x [f?callable ~args] ~more -> + pipe (f ~args x) ~more +} + +map := { + _ [] -> [] + f [x ~xs] -> + [(f x) ~(map f xs)] +} + +filter := { + _ [] -> [] + f [x ~xs] -> + if (f x) { + [x ~(filter f xs)] + } { + filter f xs + } +} + +is-odd := { x -> + == 1 (mod x 2) +} + +partial := { f ~a1 -> + { ~a2 -> f ~a1 ~a2 } +} + +reduce := { + _ carry [] -> carry + f carry [x ~xs] -> + reduce f (f x carry) xs +} + +sum := partial reduce + 0 + +pipe ~[ + (range 1 10) # [1 2 3 4 5 6 7 8 9] + [filter is-odd] # [1 3 5 7 9] + [map (partial * 10)] # [10 30 50 70 90] + sum # 250 +] + +##### + +# As each closure is unique, a closure can be used as a symbol, i.e. a unique +# value which only usage is to be able to be compared with itself. +symbol := { -> + sym := { -> sym } + sym # technically not neccessary, but this way it's more obvious we're returning sym +} + +a := (symbol) +b := (symbol) + +assert{ != a b } diff --git a/functional_tests/function-const-capture.at b/functional_tests/function-const-capture.at new file mode 100644 index 0000000..459538e --- /dev/null +++ b/functional_tests/function-const-capture.at @@ -0,0 +1,14 @@ +=== script === +adder = { a -> + { b -> + a b } +} + +add10 = adder 10 +inc = adder 1 + +print (add10 32) +print (inc 665) + +=== output === +42 +666 diff --git a/functional_tests/function-mutable-capture.at b/functional_tests/function-mutable-capture.at new file mode 100644 index 0000000..3bef946 --- /dev/null +++ b/functional_tests/function-mutable-capture.at @@ -0,0 +1,25 @@ +=== script === +counter = { + i = 0 + { i = i + 1 } +} + +c1 = (counter) +c2 = (counter) + +print (c1) +print (c2) +print (c1) +print (c2) +print (c2) +print (c1) +print (c2) + +=== output === +1 +1 +2 +2 +3 +3 +4 diff --git a/functional_tests/hello-world.at b/functional_tests/hello-world.at new file mode 100644 index 0000000..a9051eb --- /dev/null +++ b/functional_tests/hello-world.at @@ -0,0 +1,5 @@ +=== script === +print "Hello World!" + +=== output === +Hello World! diff --git a/functional_tests/recursive-faculty.at b/functional_tests/recursive-faculty.at new file mode 100644 index 0000000..e3a19d4 --- /dev/null +++ b/functional_tests/recursive-faculty.at @@ -0,0 +1,10 @@ +=== script === +fac = { + 0 -> 1 + n -> * n (fac (--n )) +} + +print (fac 10) + +=== output === +3628800 diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..ce4a37f --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,35 @@ +AM_CFLAGS=--std=c11 -Wall -Werror -Wextra -pedantic + +lib_LIBRARIES = libapfl.a + +libapfl_a_SOURCES = +libapfl_a_SOURCES += error.c +libapfl_a_SOURCES += expr.c +libapfl_a_SOURCES += position.c +libapfl_a_SOURCES += resizable.c +libapfl_a_SOURCES += strings.c +libapfl_a_SOURCES += token.c +libapfl_a_SOURCES += tokenizer.c + +apfl_internal_headers = +apfl_internal_headers += common.h +apfl_internal_headers += internal.h +apfl_internal_headers += resizable.h + +EXTRA_DIST = $(apfl_internal_headers) apfl.h + +apflincludesdir = $(pkgincludedir)/apfl +apflincludes_HEADERS = apfl.h + +bin_PROGRAMS = apfl + +apfl_SOURCES = main.c apfl.h +apfl_LDADD = libapfl.a + +TESTS = +check_PROGRAMS = + +TESTS += tokenizer.test +check_PROGRAMS += tokenizer.test +tokenizer_test_SOURCES = tokenizer_test.c test.h +tokenizer_test_LDADD = libapfl.a diff --git a/src/apfl.h b/src/apfl.h new file mode 100644 index 0000000..b2fd227 --- /dev/null +++ b/src/apfl.h @@ -0,0 +1,454 @@ +#ifndef APFL_H +#define APFL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +typedef double apfl_number; + +struct apfl_position { + int line; + int col; +}; + +bool apfl_position_eq(struct apfl_position, struct apfl_position); + +// Strings + +struct apfl_string_view { + const char *bytes; + size_t len; +}; + +struct apfl_string { + char *bytes; + size_t len; +}; + +#define APFL_STR_FMT "%.*s" +#define APFL_STR_FMT_ARGS(s) (int)(s).len,(s).bytes + +struct apfl_string_view apfl_string_view_from_view(struct apfl_string_view); +struct apfl_string_view apfl_string_view_from_cstr(char *); +struct apfl_string_view apfl_string_view_from_const_cstr(const char *); +struct apfl_string_view apfl_string_view_from_string(struct apfl_string); + +#define apfl_string_view_from(s) _Generic((s), \ + struct apfl_string: apfl_string_view_from_string, \ + struct apfl_string_view: apfl_string_view_from_view, \ + char *: apfl_string_view_from_cstr, \ + const char *: apfl_string_view_from_const_cstr \ +)(s) + +int apfl_string_view_cmp(struct apfl_string_view, struct apfl_string_view); + +#define apfl_string_cmp(a, b) apfl_string_view_cmp(apfl_string_view_from(a), apfl_string_view_from(b)) + +void apfl_string_deinit(struct apfl_string *); +bool apfl_string_copy(struct apfl_string *dst, struct apfl_string_view src); +struct apfl_string apfl_string_move(struct apfl_string *src); + +struct apfl_string_builder { + char *bytes; + size_t len; + size_t cap; +}; + +void apfl_string_builder_init(struct apfl_string_builder *); +void apfl_string_builder_deinit(struct apfl_string_builder *); +bool apfl_string_builder_append(struct apfl_string_builder *, struct apfl_string_view); +bool apfl_string_builder_append_byte(struct apfl_string_builder *, char byte); +struct apfl_string apfl_string_builder_move_string(struct apfl_string_builder *); + +#define apfl_string_builder_append_cstr(builder, cstr) (apfl_string_builder_append((builder), apfl_string_view_from_cstr((cstr)))) + +// Tokens + +enum apfl_token_type { + APFL_TOK_LPAREN, + APFL_TOK_RPAREN, + APFL_TOK_LBRACKET, + APFL_TOK_RBRACKET, + APFL_TOK_LBRACE, + APFL_TOK_RBRACE, + APFL_TOK_MAPSTO, + APFL_TOK_EXPAND, + APFL_TOK_DOT, + APFL_TOK_AT, + APFL_TOK_SEMICOLON, + APFL_TOK_LINEBREAK, + APFL_TOK_CONTINUE_LINE, + APFL_TOK_COMMENT, + APFL_TOK_COMMA, + APFL_TOK_QUESTION_MARK, + APFL_TOK_STRINGIFY, + APFL_TOK_ASSIGN, + APFL_TOK_LOCAL_ASSIGN, + APFL_TOK_NUMBER, + APFL_TOK_NAME, + APFL_TOK_STRING, +}; + +struct apfl_token { + enum apfl_token_type type; + struct apfl_position position; + union { + struct apfl_string text; + apfl_number number; + }; +}; + +void apfl_token_deinit(struct apfl_token *); + +const char *apfl_token_type_name(enum apfl_token_type); + +void apfl_token_print(struct apfl_token, FILE *); + +// Errors + +enum apfl_error_type { + APFL_ERR_MALLOC_FAILED, + APFL_ERR_INPUT_ERROR, + APFL_ERR_UNEXPECTED_EOF, + APFL_ERR_EXPECTED_EQ_AFTER_COLON, + APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER, + APFL_ERR_EXPECTED_DIGIT, + APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE, + APFL_ERR_INVALID_ESCAPE_SEQUENCE, + APFL_ERR_NO_LINEBREAK_AFTER_CONTINUE_LINE, + APFL_ERR_UNEXPECTED_TOKEN, + APFL_ERR_MISMATCHING_CLOSING_BRACKET, + APFL_ERR_UNEXPECTED_EOF_AFTER_TOKEN, + APFL_ERR_STATEMENTS_BEFORE_PARAMETERS, + APFL_ERR_EMPTY_ASSIGNMENT_BEFORE_PARAMETERS, + APFL_ERR_UNEXPECTED_EXPRESSION, + APFL_ERR_INVALID_ASSIGNMENT_LHS, + APFL_ERR_EMPTY_ASSIGNMENT, +}; + +struct apfl_error { + enum apfl_error_type type; + + // Optional data + struct apfl_position position; + struct apfl_position position2; + enum apfl_token_type token_type; + enum apfl_token_type token_type2; + char byte; +}; + +void apfl_error_print(struct apfl_error, FILE *); + +struct apfl_error apfl_error_simple(enum apfl_error_type); +bool apfl_error_is_fatal_type(enum apfl_error_type); + +#define APFL_ERROR_IS_FATAL(err) (apfl_error_is_fatal_type((err).type)) + +enum apfl_expr_type { + APFL_EXPR_LIST, + APFL_EXPR_DICT, + APFL_EXPR_CALL, + APFL_EXPR_SIMPLE_FUNC, + APFL_EXPR_COMPLEX_FUNC, + APFL_EXPR_ASSIGNMENT, + APFL_EXPR_DOT, + APFL_EXPR_AT, + APFL_EXPR_CONSTANT, + APFL_EXPR_VAR, +}; + +struct apfl_expr_list_item { + struct apfl_expr *expr; + bool expand; +}; + +struct apfl_expr_list { + struct apfl_expr_list_item *items; + size_t len; +}; + +struct apfl_expr_dict_pair { + struct apfl_expr *k; + struct apfl_expr *v; +}; + +struct apfl_expr_dict { + struct apfl_expr_dict_pair *items; + size_t len; +}; + +struct apfl_expr_call { + struct apfl_expr *callee; + struct apfl_expr_list arguments; +}; + +struct apfl_expr_body { + struct apfl_expr *items; + size_t len; +}; + +enum apfl_expr_const_type { + APFL_EXPR_CONST_NIL, + APFL_EXPR_CONST_BOOLEAN, + APFL_EXPR_CONST_STRING, + APFL_EXPR_CONST_NUMBER, +}; + +struct apfl_expr_const { + enum apfl_expr_const_type type; + + union { + // variant nil is without data + bool boolean; + struct apfl_string string; + apfl_number number; + }; +}; + +struct apfl_expr_param_predicate { + struct apfl_expr_param *lhs; + struct apfl_expr *rhs; +}; + +struct apfl_expr_param_list { + struct apfl_expr_param *children; + size_t len; +}; + +enum apfl_expr_param_type { + APFL_EXPR_PARAM_VAR, + APFL_EXPR_PARAM_CONSTANT, + APFL_EXPR_PARAM_PREDICATE, + APFL_EXPR_PARAM_EXPAND, + APFL_EXPR_PARAM_LIST, +}; + +struct apfl_expr_params { + struct apfl_expr_param *params; + size_t len; +}; + +struct apfl_expr_param { + enum apfl_expr_param_type type; + + union { + struct apfl_string var; + struct apfl_expr_const constant; + struct apfl_expr_param_predicate predicate; + struct apfl_expr_param *expand; + struct apfl_expr_params list; + }; +}; + +struct apfl_expr_subfunc { + struct apfl_expr_params params; + struct apfl_expr_body body; +}; + +struct apfl_expr_complex_func { + struct apfl_expr_subfunc *subfuncs; + size_t len; +}; + +enum apfl_expr_assignable_type { + APFL_EXPR_ASSIGNABLE_VAR, + APFL_EXPR_ASSIGNABLE_CONSTANT, + APFL_EXPR_ASSIGNABLE_PREDICATE, + APFL_EXPR_ASSIGNABLE_EXPAND, + APFL_EXPR_ASSIGNABLE_DOT, + APFL_EXPR_ASSIGNABLE_AT, + APFL_EXPR_ASSIGNABLE_LIST, +}; + +struct apfl_expr_assignable_predicate { + struct apfl_expr_assignable *lhs; + struct apfl_expr *rhs; +}; +struct apfl_expr_assignable_dot { + struct apfl_expr_assignable *lhs; + struct apfl_string rhs; +}; +struct apfl_expr_assignable_at { + struct apfl_expr_assignable *lhs; + struct apfl_expr *rhs; +}; +struct apfl_expr_assignable_list { + struct apfl_expr_assignable *children; + size_t len; +}; + +struct apfl_expr_assignable { + enum apfl_expr_assignable_type type; + + union { + struct apfl_string var; + struct apfl_expr_const constant; + struct apfl_expr_assignable_predicate predicate; + struct apfl_expr_assignable *expand; + struct apfl_expr_assignable_dot dot; + struct apfl_expr_assignable_at at; + struct apfl_expr_assignable_list list; + }; +}; + +struct apfl_expr_assignment { + bool local; + struct apfl_expr_assignable lhs; + struct apfl_expr *rhs; +}; + +struct apfl_expr_dot { + struct apfl_expr *lhs; + struct apfl_string rhs; +}; + +struct apfl_expr_at { + struct apfl_expr *lhs; + struct apfl_expr *rhs; +}; + +struct apfl_expr { + enum apfl_expr_type type; + + union { + struct apfl_expr_list list; + struct apfl_expr_dict dict; + struct apfl_expr_call call; + struct apfl_expr_body simple_func; + struct apfl_expr_complex_func complex_func; + struct apfl_expr_assignment assignment; + struct apfl_expr_dot dot; + struct apfl_expr_at at; + struct apfl_expr_const constant; + struct apfl_string var; + }; + + struct apfl_position position; +}; + +void apfl_expr_print(struct apfl_expr, FILE *); + +bool apfl_expr_eq(struct apfl_expr, struct apfl_expr); + +// Begin deinit functions + +void apfl_expr_deinit(struct apfl_expr *); +void apfl_expr_list_deinit(struct apfl_expr_list *); +void apfl_expr_list_item_deinit(struct apfl_expr_list_item *); +void apfl_expr_dict_pair_deinit(struct apfl_expr_dict_pair *); +void apfl_expr_dict_deinit(struct apfl_expr_dict *); +void apfl_expr_call_deinit(struct apfl_expr_call *); +void apfl_expr_body_deinit(struct apfl_expr_body *); +void apfl_expr_const_deinit(struct apfl_expr_const *); +void apfl_expr_param_predicate_deinit(struct apfl_expr_param_predicate *); +void apfl_expr_param_list_deinit(struct apfl_expr_param_list *); +void apfl_expr_params_deinit(struct apfl_expr_params *); +void apfl_expr_param_deinit(struct apfl_expr_param *); +void apfl_expr_subfunc_deinit(struct apfl_expr_subfunc *); +void apfl_expr_complex_func_deinit(struct apfl_expr_complex_func *); +void apfl_expr_assignable_predicate_deinit(struct apfl_expr_assignable_predicate *); +void apfl_expr_assignable_dot_deinit(struct apfl_expr_assignable_dot *); +void apfl_expr_assignable_at_deinit(struct apfl_expr_assignable_at *); +void apfl_expr_assignable_list_deinit(struct apfl_expr_assignable_list *); +void apfl_expr_assignable_deinit(struct apfl_expr_assignable *); +void apfl_expr_assignment_deinit(struct apfl_expr_assignment *); +void apfl_expr_dot_deinit(struct apfl_expr_dot *); +void apfl_expr_at_deinit(struct apfl_expr_at *); + +// End deinit functions + +// Begin move functions + +struct apfl_expr apfl_expr_move(struct apfl_expr *); +struct apfl_expr_list apfl_expr_list_move(struct apfl_expr_list *); +struct apfl_expr_list_item apfl_expr_list_item_move(struct apfl_expr_list_item *); +struct apfl_expr_dict_pair apfl_expr_dict_pair_move(struct apfl_expr_dict_pair *); +struct apfl_expr_dict apfl_expr_dict_move(struct apfl_expr_dict *); +struct apfl_expr_call apfl_expr_call_move(struct apfl_expr_call *); +struct apfl_expr_body apfl_expr_body_move(struct apfl_expr_body *); +struct apfl_expr_const apfl_expr_const_move(struct apfl_expr_const *); +struct apfl_expr_param_predicate apfl_expr_param_predicate_move(struct apfl_expr_param_predicate *); +struct apfl_expr_param_list apfl_expr_param_list_move(struct apfl_expr_param_list *); +struct apfl_expr_params apfl_expr_params_move(struct apfl_expr_params *); +struct apfl_expr_param apfl_expr_param_move(struct apfl_expr_param *); +struct apfl_expr_subfunc apfl_expr_subfunc_move(struct apfl_expr_subfunc *); +struct apfl_expr_complex_func apfl_expr_complex_func_move(struct apfl_expr_complex_func *); +struct apfl_expr_assignable_predicate apfl_expr_assignable_predicate_move(struct apfl_expr_assignable_predicate *); +struct apfl_expr_assignable_dot apfl_expr_assignable_dot_move(struct apfl_expr_assignable_dot *); +struct apfl_expr_assignable_at apfl_expr_assignable_at_move(struct apfl_expr_assignable_at *); +struct apfl_expr_assignable_list apfl_expr_assignable_list_move(struct apfl_expr_assignable_list *); +struct apfl_expr_assignable apfl_expr_assignable_move(struct apfl_expr_assignable *); +struct apfl_expr_assignment apfl_expr_assignment_move(struct apfl_expr_assignment *); +struct apfl_expr_dot apfl_expr_dot_move(struct apfl_expr_dot *); +struct apfl_expr_at apfl_expr_at_move(struct apfl_expr_at *); + +// End move functions + + +enum apfl_parse_result { + APFL_PARSE_OK, + APFL_PARSE_EOF, + APFL_PARSE_ERROR, +}; + +struct apfl_tokenizer; + +typedef struct apfl_tokenizer *apfl_tokenizer_ptr; + +typedef bool (*apfl_source_reader_cb)(void *context, char *buf, size_t *len, bool need); + +apfl_tokenizer_ptr apfl_tokenizer_new(apfl_source_reader_cb, void *context); +void apfl_tokenizer_destroy(apfl_tokenizer_ptr); + +enum apfl_parse_result apfl_tokenizer_next(apfl_tokenizer_ptr, bool need); + +/* Get the current token. + * Return value is undefined when the last call to apfl_tokenizer_next did not + * return APFL_PARSE_OK. + */ +struct apfl_token apfl_tokenizer_get_token(apfl_tokenizer_ptr); + +/* Get the current error. + * Return value is undefined when the last call to apfl_tokenizer_next did not + * return APFL_PARSE_ERROR. + */ +struct apfl_error apfl_tokenizer_get_error(apfl_tokenizer_ptr); + + +struct apfl_parser_token_source { + enum apfl_parse_result (*next)(void *, bool need); + struct apfl_token (*get_token)(void *); + struct apfl_error (*get_error)(void *); + void *opaque; +}; + +struct apfl_parser; + +typedef struct apfl_parser *apfl_parser_ptr; + +apfl_parser_ptr apfl_parser_new(struct apfl_parser_token_source); + +/* Destroys the parser. + * Note that if the token source needs it's own destruction, you'll have to do + * that yourself after destroying the parser. + */ +void apfl_parser_destroy(apfl_parser_ptr); + +enum apfl_parse_result apfl_parser_next(apfl_parser_ptr); + +/* Get the current error. + * Return value is undefined when the last call to apfl_parser_next did not + * return APFL_PARSE_ERROR. + */ +struct apfl_error apfl_parser_get_error(apfl_parser_ptr); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..6fae71e --- /dev/null +++ b/src/common.h @@ -0,0 +1,27 @@ +#ifndef APFL_COMMON_H +#define APFL_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + +// APFL_DESTROY destroys a dynamically allocated value. +// It will first deinit the value using deiniter, +// free the memory and then set the variable to NULL. +// It is always allowed to destroy an already destroyed +// or deinited value. +#define APFL_DESTROY(var, deiniter) \ + do { \ + if ((var) == NULL) { \ + break; \ + } \ + deiniter(var); \ + free(var); \ + (var) = NULL; \ + } while(0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/error.c b/src/error.c new file mode 100644 index 0000000..5323a14 --- /dev/null +++ b/src/error.c @@ -0,0 +1,117 @@ +#include + +#include "apfl.h" + +#define POSFMT "%d:%d" +#define POSARGS error.position.line, error.position.col +#define POS2ARGS error.position2.line, error.position2.col + +void +apfl_error_print(struct apfl_error error, FILE *file) +{ + switch (error.type) { + case APFL_ERR_MALLOC_FAILED: + fprintf(file, "Could not allocate memory\n"); + return; + case APFL_ERR_INPUT_ERROR: + fprintf(file, "Input error while parsing\n"); + return; + case APFL_ERR_UNEXPECTED_EOF: + fprintf(file, "Unexpected end of file\n"); + return; + case APFL_ERR_EXPECTED_EQ_AFTER_COLON: + fprintf(file, "Expected '=' after ':' at " POSFMT "\n", POSARGS); + return; + case APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER: + fprintf(file, "Unexpected byte '%c' while parsing number at " POSFMT "\n", error.byte, POSARGS); + return; + case APFL_ERR_EXPECTED_DIGIT: + fprintf(file, "Expected a digit at " POSFMT "\n", POSARGS); + return; + case APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE: + fprintf(file, "Expected a hex-digit in hex escape at " POSFMT "\n", POSARGS); + return; + case APFL_ERR_INVALID_ESCAPE_SEQUENCE: + fprintf(file, "Invalid escape sequence \\%c at " POSFMT "\n", error.byte, POSARGS); + return; + case APFL_ERR_NO_LINEBREAK_AFTER_CONTINUE_LINE: + fprintf(file, "No line break (after optional comments) after \\ at " POSFMT "\n", POSARGS); + return; + case APFL_ERR_UNEXPECTED_TOKEN: + fprintf(file, "Unexpected `%s` token at " POSFMT "\n", apfl_token_type_name(error.token_type), POSARGS); + return; + case APFL_ERR_MISMATCHING_CLOSING_BRACKET: + fprintf( + file, + "Closing `%s` token at " POSFMT " does not match opening `%s` at " POSFMT "\n", + apfl_token_type_name(error.token_type), + POSARGS, + apfl_token_type_name(error.token_type2), + POS2ARGS + ); + return; + case APFL_ERR_UNEXPECTED_EOF_AFTER_TOKEN: + fprintf( + file, + "Unexpected end of file after `%s` token at " POSFMT "\n", + apfl_token_type_name(error.token_type), + POSARGS + ); + return; + case APFL_ERR_STATEMENTS_BEFORE_PARAMETERS: + fprintf( + file, + "Unexpected statements before parameters near " POSFMT "\n", + POSARGS + ); + return; + case APFL_ERR_EMPTY_ASSIGNMENT_BEFORE_PARAMETERS: + fprintf( + file, + "Unexpected empty assignment before parameters near " POSFMT "\n", + POSARGS + ); + return; + case APFL_ERR_UNEXPECTED_EXPRESSION: + fprintf( + file, + "Unexpected expression near " POSFMT "\n", + POSARGS + ); + return; + case APFL_ERR_INVALID_ASSIGNMENT_LHS: + fprintf( + file, + "Invalid left hand side of assignment near " POSFMT "\n", + POSARGS + ); + return; + case APFL_ERR_EMPTY_ASSIGNMENT: + fprintf( + file, + "Empty assignment at " POSFMT "\n", + POSARGS + ); + return; + } + + fprintf(file, "Unknown error %d\n", (int)error.type); +} + +struct apfl_error +apfl_error_simple(enum apfl_error_type type) +{ + return (struct apfl_error) { .type = type }; +} + +bool +apfl_error_is_fatal_type(enum apfl_error_type type) +{ + switch (type) { + case APFL_ERR_MALLOC_FAILED: + case APFL_ERR_INPUT_ERROR: + return true; + default: + return false; + } +} diff --git a/src/expr.c b/src/expr.c new file mode 100644 index 0000000..7a80ca6 --- /dev/null +++ b/src/expr.c @@ -0,0 +1,919 @@ +#include +#include +#include + +#include "apfl.h" + +#include "common.h" +#include "internal.h" + +void +apfl_expr_deinit(struct apfl_expr *expr) +{ + switch (expr->type) { + case APFL_EXPR_LIST: + apfl_expr_list_deinit(&expr->list); + break; + case APFL_EXPR_DICT: + apfl_expr_dict_deinit(&expr->dict); + break; + case APFL_EXPR_CALL: + apfl_expr_call_deinit(&expr->call); + break; + case APFL_EXPR_SIMPLE_FUNC: + apfl_expr_body_deinit(&expr->simple_func); + break; + case APFL_EXPR_COMPLEX_FUNC: + apfl_expr_complex_func_deinit(&expr->complex_func); + break; + case APFL_EXPR_ASSIGNMENT: + apfl_expr_assignment_deinit(&expr->assignment); + break; + case APFL_EXPR_DOT: + apfl_expr_dot_deinit(&expr->dot); + break; + case APFL_EXPR_AT: + apfl_expr_at_deinit(&expr->at); + break; + case APFL_EXPR_CONSTANT: + apfl_expr_const_deinit(&expr->constant); + break; + case APFL_EXPR_VAR: + apfl_string_deinit(&expr->var); + break; + } +} + +void +apfl_expr_list_deinit(struct apfl_expr_list *list) +{ + DEINIT_LIST(list->items, list->len, apfl_expr_list_item_deinit); +} + +void +apfl_expr_list_item_deinit(struct apfl_expr_list_item *item) +{ + DESTROY(item->expr, apfl_expr_deinit); +} + +void +apfl_expr_dict_pair_deinit(struct apfl_expr_dict_pair *pair) +{ + DESTROY(pair->k, apfl_expr_deinit); + DESTROY(pair->v, apfl_expr_deinit); +} + +void +apfl_expr_dict_deinit(struct apfl_expr_dict *dict) +{ + DEINIT_LIST(dict->items, dict->len, apfl_expr_dict_pair_deinit); +} + +void +apfl_expr_call_deinit(struct apfl_expr_call *call) +{ + DESTROY(call->callee, apfl_expr_deinit); +} + +void +apfl_expr_body_deinit(struct apfl_expr_body *body) +{ + DEINIT_LIST(body->items, body->len, apfl_expr_deinit); +} + +void +apfl_expr_const_deinit(struct apfl_expr_const *constant) +{ + switch (constant->type) { + case APFL_EXPR_CONST_NIL: + case APFL_EXPR_CONST_BOOLEAN: + case APFL_EXPR_CONST_NUMBER: + // nop + break; + case APFL_EXPR_CONST_STRING: + apfl_string_deinit(&constant->string); + break; + } +} + +#define DEINIT_GENERIC_LHS_RHS_EXPR(x, lhs_deiniter) \ + do { \ + DESTROY(x->lhs, lhs_deiniter); \ + DESTROY(x->rhs, apfl_expr_deinit); \ + } while (0) + +void +apfl_expr_param_predicate_deinit(struct apfl_expr_param_predicate *pred) +{ + DEINIT_GENERIC_LHS_RHS_EXPR(pred, apfl_expr_param_deinit); +} + +void +apfl_expr_param_list_deinit(struct apfl_expr_param_list *list) +{ + DEINIT_LIST(list->children, list->len, apfl_expr_param_deinit); +} + +void +apfl_expr_params_deinit(struct apfl_expr_params *params) +{ + DEINIT_LIST(params->params, params->len, apfl_expr_param_deinit); +} + +void +apfl_expr_param_deinit(struct apfl_expr_param *param) +{ + switch (param->type) { + case APFL_EXPR_PARAM_VAR: + apfl_string_deinit(¶m->var); + break; + case APFL_EXPR_PARAM_CONSTANT: + apfl_expr_const_deinit(¶m->constant); + break; + case APFL_EXPR_PARAM_PREDICATE: + apfl_expr_param_predicate_deinit(¶m->predicate); + break; + case APFL_EXPR_PARAM_EXPAND: + DESTROY(param->expand, apfl_expr_param_deinit); + break; + case APFL_EXPR_PARAM_LIST: + apfl_expr_params_deinit(¶m->list); + break; + } +} + +void +apfl_expr_subfunc_deinit(struct apfl_expr_subfunc *subfunc) +{ + apfl_expr_params_deinit(&subfunc->params); + apfl_expr_body_deinit(&subfunc->body); +} + +void +apfl_expr_complex_func_deinit(struct apfl_expr_complex_func *cf) +{ + DEINIT_LIST(cf->subfuncs, cf->len, apfl_expr_subfunc_deinit); +} + +void +apfl_expr_assignable_predicate_deinit(struct apfl_expr_assignable_predicate *pred) +{ + DEINIT_GENERIC_LHS_RHS_EXPR(pred, apfl_expr_assignable_deinit); +} + +#define GENERIC_DOT_DEINIT(dot, lhs_deiniter) \ + do { \ + DESTROY(dot->lhs, lhs_deiniter); \ + apfl_string_deinit(&dot->rhs); \ + } while (0) + +void +apfl_expr_assignable_dot_deinit(struct apfl_expr_assignable_dot *dot) +{ + GENERIC_DOT_DEINIT(dot, apfl_expr_assignable_deinit); +} + +void +apfl_expr_assignable_at_deinit(struct apfl_expr_assignable_at *at) +{ + DEINIT_GENERIC_LHS_RHS_EXPR(at, apfl_expr_assignable_deinit); +} + +void +apfl_expr_assignable_list_deinit(struct apfl_expr_assignable_list *list) +{ + DEINIT_LIST(list->children, list->len, apfl_expr_assignable_deinit); +} + +void +apfl_expr_assignable_deinit(struct apfl_expr_assignable *a) +{ + switch (a->type) { + case APFL_EXPR_ASSIGNABLE_VAR: + apfl_string_deinit(&a->var); + break; + case APFL_EXPR_ASSIGNABLE_CONSTANT: + apfl_expr_const_deinit(&a->constant); + break; + case APFL_EXPR_ASSIGNABLE_PREDICATE: + apfl_expr_assignable_predicate_deinit(&a->predicate); + break; + case APFL_EXPR_ASSIGNABLE_EXPAND: + DESTROY(a->expand, apfl_expr_assignable_deinit); + break; + case APFL_EXPR_ASSIGNABLE_DOT: + apfl_expr_assignable_dot_deinit(&a->dot); + break; + case APFL_EXPR_ASSIGNABLE_AT: + apfl_expr_assignable_at_deinit(&a->at); + break; + case APFL_EXPR_ASSIGNABLE_LIST: + apfl_expr_assignable_list_deinit(&a->list); + break; + } +} + +void +apfl_expr_assignment_deinit(struct apfl_expr_assignment *a) +{ + apfl_expr_assignable_deinit(&a->lhs); + DESTROY(a->rhs, apfl_expr_deinit); +} + +void +apfl_expr_dot_deinit(struct apfl_expr_dot *dot) +{ + GENERIC_DOT_DEINIT(dot, apfl_expr_deinit); +} + +void +apfl_expr_at_deinit(struct apfl_expr_at *at) +{ + DEINIT_GENERIC_LHS_RHS_EXPR(at, apfl_expr_deinit); +} + +// Move functions + +struct apfl_expr +apfl_expr_move(struct apfl_expr *in) +{ + struct apfl_expr out = *in; + switch (in->type) { + case APFL_EXPR_LIST: + out.list = apfl_expr_list_move(&in->list); + break; + case APFL_EXPR_DICT: + out.dict = apfl_expr_dict_move(&in->dict); + break; + case APFL_EXPR_CALL: + out.call = apfl_expr_call_move(&in->call); + break; + case APFL_EXPR_SIMPLE_FUNC: + out.simple_func = apfl_expr_body_move(&in->simple_func); + break; + case APFL_EXPR_COMPLEX_FUNC: + out.complex_func = apfl_expr_complex_func_move(&in->complex_func); + break; + case APFL_EXPR_ASSIGNMENT: + out.assignment = apfl_expr_assignment_move(&in->assignment); + break; + case APFL_EXPR_DOT: + out.dot = apfl_expr_dot_move(&in->dot); + break; + case APFL_EXPR_AT: + out.at = apfl_expr_at_move(&in->at); + break; + case APFL_EXPR_CONSTANT: + out.constant = apfl_expr_const_move(&in->constant); + break; + case APFL_EXPR_VAR: + out.var = apfl_string_move(&in->var); + break; + } + return out; +} + +#define MOVE_LIST(out, in, items, len) \ + do { \ + MOVEPTR(out.items, in->items); \ + out.len = in->len; \ + out.len = 0; \ + } while (0) + +struct apfl_expr_list +apfl_expr_list_move(struct apfl_expr_list *in) +{ + struct apfl_expr_list out; + MOVE_LIST(out, in, items, len); + return out; +} + +struct apfl_expr_list_item +apfl_expr_list_item_move(struct apfl_expr_list_item *in) +{ + struct apfl_expr_list_item out = *in; + in->expr = NULL; + return out; +} + +struct apfl_expr_dict_pair +apfl_expr_dict_pair_move(struct apfl_expr_dict_pair *in) +{ + struct apfl_expr_dict_pair out = *in; + in->k = NULL; + in->v = NULL; + return out; +} + +struct apfl_expr_dict +apfl_expr_dict_move(struct apfl_expr_dict *in) +{ + struct apfl_expr_dict out; + MOVE_LIST(out, in, items, len); + return out; +} + +struct apfl_expr_call +apfl_expr_call_move(struct apfl_expr_call *in) +{ + struct apfl_expr_call out; + + MOVEPTR(out.callee, in->callee); + out.arguments = apfl_expr_list_move(&in->arguments); + + return out; +} + +struct apfl_expr_body +apfl_expr_body_move(struct apfl_expr_body *in) +{ + struct apfl_expr_body out; + MOVE_LIST(out, in, items, len); + return out; +} + +struct apfl_expr_const +apfl_expr_const_move(struct apfl_expr_const *in) +{ + struct apfl_expr_const out = *in; + + switch (in->type) { + case APFL_EXPR_CONST_NIL: + case APFL_EXPR_CONST_BOOLEAN: + case APFL_EXPR_CONST_NUMBER: + // nop + break; + case APFL_EXPR_CONST_STRING: + out.string = apfl_string_move(&in->string); + } + + return out; +} + +#define GENERIC_LHS_RHS_PTRS_MOVE(out, in) \ + do { \ + MOVEPTR(out.lhs, in->lhs); \ + MOVEPTR(out.rhs, in->rhs); \ + } while (0) + +struct apfl_expr_param_predicate +apfl_expr_param_predicate_move(struct apfl_expr_param_predicate *in) +{ + struct apfl_expr_param_predicate out; + GENERIC_LHS_RHS_PTRS_MOVE(out, in); + return out; +} + +struct apfl_expr_param_list +apfl_expr_param_list_move(struct apfl_expr_param_list *in) +{ + struct apfl_expr_param_list out; + MOVE_LIST(out, in, children, len); + return out; +} + +struct apfl_expr_params +apfl_expr_params_move(struct apfl_expr_params *in) +{ + struct apfl_expr_params out; + MOVE_LIST(out, in, params, len); + return out; +} + +struct apfl_expr_param +apfl_expr_param_move(struct apfl_expr_param *in) +{ + struct apfl_expr_param out = *in; + switch (in->type) { + case APFL_EXPR_PARAM_VAR: + out.var = apfl_string_move(&in->var); + break; + case APFL_EXPR_PARAM_CONSTANT: + out.constant = apfl_expr_const_move(&in->constant); + break; + case APFL_EXPR_PARAM_PREDICATE: + out.predicate = apfl_expr_param_predicate_move(&in->predicate); + break; + case APFL_EXPR_PARAM_EXPAND: + MOVEPTR(out.expand, in->expand); + break; + case APFL_EXPR_PARAM_LIST: + out.list = apfl_expr_params_move(&in->list); + break; + } + return out; +} + +struct apfl_expr_subfunc +apfl_expr_subfunc_move(struct apfl_expr_subfunc *in) +{ + return (struct apfl_expr_subfunc) { + .params = apfl_expr_params_move(&in->params), + .body = apfl_expr_body_move(&in->body), + }; +} + +struct apfl_expr_complex_func +apfl_expr_complex_func_move(struct apfl_expr_complex_func *in) +{ + struct apfl_expr_complex_func out; + MOVE_LIST(out, in, subfuncs, len); + return out; +} + +struct apfl_expr_assignable_predicate +apfl_expr_assignable_predicate_move(struct apfl_expr_assignable_predicate *in) +{ + struct apfl_expr_assignable_predicate out; + GENERIC_LHS_RHS_PTRS_MOVE(out, in); + return out; +} + +#define GENERIC_DOT_MOVE(out, in) \ + do { \ + MOVEPTR(out.lhs, in->lhs); \ + out.rhs = apfl_string_move(&in->rhs); \ + } while (0) + +struct apfl_expr_assignable_dot +apfl_expr_assignable_dot_move(struct apfl_expr_assignable_dot *in) +{ + struct apfl_expr_assignable_dot out; + GENERIC_DOT_MOVE(out, in); + return out; +} + +struct apfl_expr_assignable_at +apfl_expr_assignable_at_move(struct apfl_expr_assignable_at *in) +{ + struct apfl_expr_assignable_at out; + GENERIC_LHS_RHS_PTRS_MOVE(out, in); + return out; +} + +struct apfl_expr_assignable_list +apfl_expr_assignable_list_move(struct apfl_expr_assignable_list *in) +{ + struct apfl_expr_assignable_list out; + MOVE_LIST(out, in, children, len); + return out; +} + +struct apfl_expr_assignable +apfl_expr_assignable_move(struct apfl_expr_assignable *in) +{ + struct apfl_expr_assignable out = *in; + switch (in->type) { + case APFL_EXPR_ASSIGNABLE_VAR: + out.var = apfl_string_move(&in->var); + break; + case APFL_EXPR_ASSIGNABLE_CONSTANT: + out.constant = apfl_expr_const_move(&in->constant); + break; + case APFL_EXPR_ASSIGNABLE_PREDICATE: + out.predicate = apfl_expr_assignable_predicate_move(&in->predicate); + break; + case APFL_EXPR_ASSIGNABLE_EXPAND: + MOVEPTR(out.expand, in->expand); + break; + case APFL_EXPR_ASSIGNABLE_DOT: + out.dot = apfl_expr_assignable_dot_move(&in->dot); + break; + case APFL_EXPR_ASSIGNABLE_AT: + out.at = apfl_expr_assignable_at_move(&in->at); + break; + case APFL_EXPR_ASSIGNABLE_LIST: + out.list = apfl_expr_assignable_list_move(&in->list); + break; + } + return out; +} + +struct apfl_expr_assignment +apfl_expr_assignment_move(struct apfl_expr_assignment *in) +{ + struct apfl_expr_assignment out = *in; + out.lhs = apfl_expr_assignable_move(&in->lhs); + MOVEPTR(out.rhs, in->rhs); + return out; +} + +struct apfl_expr_dot +apfl_expr_dot_move(struct apfl_expr_dot *in) +{ + struct apfl_expr_dot out; + GENERIC_DOT_MOVE(out, in); + return out; +} + +struct apfl_expr_at +apfl_expr_at_move(struct apfl_expr_at *in) +{ + struct apfl_expr_at out; + GENERIC_LHS_RHS_PTRS_MOVE(out, in); + return out; +} + +static void +print_indented(unsigned indent, FILE *f, const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + while (indent--) { + fputs(" ", f); + } + vfprintf(f, fmt, ap); + va_end(ap); +} + +#define POSFMT "%d:%d" +#define POSARGS(p) (p).line, (p).col + +static void print_expr(struct apfl_expr *expr, unsigned indent, FILE *f); + +static void +print_expr_list(struct apfl_expr_list *list, unsigned indent, FILE *f) +{ + for (size_t i = 0; i < list->len; i++) { + unsigned item_indent = indent; + if (list->items[i].expand) { + print_indented(indent, f, "Expand\n"); + item_indent++; + } + print_expr(list->items[i].expr, item_indent, f); + } +} + +static void +print_body(struct apfl_expr_body *body, unsigned indent, FILE *f) +{ + for (size_t i = 0; i < body->len; i++) { + print_expr(&body->items[i], indent, f); + } +} + +static void +print_constant(struct apfl_expr_const constant, unsigned indent, FILE *f) +{ + switch (constant.type) { + case APFL_EXPR_CONST_NIL: + print_indented(indent, f, "Const (nil)\n"); + break; + case APFL_EXPR_CONST_BOOLEAN: + print_indented(indent, f, "Const (%s)\n", constant.boolean ? "true" : "false"); + break; + case APFL_EXPR_CONST_STRING: + print_indented(indent, f, "Const (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(constant.string)); + break; + case APFL_EXPR_CONST_NUMBER: + print_indented(indent, f, "Const (%f)\n", constant.number); + break; + } +} + +static void +print_param(struct apfl_expr_param *param, unsigned indent, FILE *f) +{ + switch (param->type) { + case APFL_EXPR_PARAM_VAR: + print_indented(indent, f, "Var \"" APFL_STR_FMT "\"\n", APFL_STR_FMT_ARGS(param->var)); + break; + case APFL_EXPR_PARAM_CONSTANT: + print_constant(param->constant, indent, f); + break; + case APFL_EXPR_PARAM_PREDICATE: + print_indented(indent, f, "Predicate\n"); + print_indented(indent+1, f, "LHS\n"); + print_param(param->predicate.lhs, indent+2, f); + print_indented(indent+1, f, "RHS\n"); + print_expr(param->predicate.rhs, indent+2, f); + break; + case APFL_EXPR_PARAM_EXPAND: + print_indented(indent, f, "Expand\n"); + print_param(param->expand, indent+1, f); + break; + case APFL_EXPR_PARAM_LIST: + print_indented(indent, f, "List\n"); + for (size_t i = 0; i < param->list.len; i++) { + print_param(¶m->list.params[i], indent+1, f); + } + break; + } +} + +static void +print_assignable(struct apfl_expr_assignable assignable, unsigned indent, FILE *f) +{ + switch(assignable.type) { + case APFL_EXPR_ASSIGNABLE_VAR: + print_indented(indent, f, "Var (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(assignable.var)); + break; + case APFL_EXPR_ASSIGNABLE_CONSTANT: + print_constant(assignable.constant, indent, f); + break; + case APFL_EXPR_ASSIGNABLE_PREDICATE: + print_indented(indent, f, "Predicate\n"); + print_indented(indent+1, f, "LHS\n"); + print_assignable(*assignable.predicate.lhs, indent+2, f); + print_indented(indent+1, f, "RHS\n"); + print_expr(assignable.predicate.rhs, indent+2, f); + break; + case APFL_EXPR_ASSIGNABLE_EXPAND: + print_indented(indent, f, "Expand\n"); + print_assignable(*assignable.expand, indent+1, f); + break; + case APFL_EXPR_ASSIGNABLE_DOT: + print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(assignable.dot.rhs)); + print_assignable(*assignable.dot.lhs, indent+1, f); + break; + case APFL_EXPR_ASSIGNABLE_AT: + print_indented(indent, f, "At\n"); + print_indented(indent+1, f, "LHS\n"); + print_assignable(*assignable.at.lhs, indent+2, f); + print_indented(indent+1, f, "RHS\n"); + print_expr(assignable.at.rhs, indent+2, f); + break; + case APFL_EXPR_ASSIGNABLE_LIST: + print_indented(indent, f, "List\n"); + for (size_t i = 0; i < assignable.list.len; i++) { + print_assignable(assignable.list.children[i], indent+1, f); + } + break; + } +} + +static void +print_expr(struct apfl_expr *expr, unsigned indent, FILE *f) +{ + switch (expr->type) { + case APFL_EXPR_LIST: + print_indented(indent, f, "List @ " POSFMT "\n", POSARGS(expr->position)); + print_expr_list(&expr->list, indent+1, f); + break; + case APFL_EXPR_DICT: + print_indented(indent, f, "Dict @ " POSFMT "\n", POSARGS(expr->position)); + for (size_t i = 0; i < expr->dict.len; i++) { + print_indented(indent+1, f, "Dict item\n"); + print_indented(indent+2, f, "Key\n"); + print_expr(expr->dict.items[i].k, indent+3, f); + print_indented(indent+2, f, "Value\n"); + print_expr(expr->dict.items[i].v, indent+3, f); + } + break; + case APFL_EXPR_CALL: + print_indented(indent, f, "Call @ " POSFMT "\n", POSARGS(expr->position)); + print_indented(indent+1, f, "Callee\n"); + print_expr(expr->call.callee, indent+2, f); + print_indented(indent+1, f, "Args\n"); + print_expr_list(&expr->call.arguments, indent+2, f); + break; + case APFL_EXPR_SIMPLE_FUNC: + print_indented(indent, f, "Simple function @ " POSFMT "\n", POSARGS(expr->position)); + print_body(&expr->simple_func, indent+1, f); + break; + case APFL_EXPR_COMPLEX_FUNC: + print_indented(indent, f, "Complex function @ " POSFMT "\n", POSARGS(expr->position)); + for (size_t i = 0; i < expr->complex_func.len; i++) { + struct apfl_expr_subfunc *sub = &expr->complex_func.subfuncs[i]; + print_indented(indent+1, f, "Parameters\n"); + for (size_t j = 0; j < sub->params.len; j++) { + print_param(&sub->params.params[j], indent+2, f); + } + print_indented(indent+1, f, "Body\n"); + print_body(&sub->body, indent+2, f); + } + break; + case APFL_EXPR_ASSIGNMENT: + print_indented(indent, f, "Assignment"); + print_indented(indent+1, f, "LHS"); + print_assignable(expr->assignment.lhs, indent+2, f); + print_indented(indent+1, f, "RHS"); + print_expr(expr->assignment.rhs, indent+2, f); + break; + case APFL_EXPR_DOT: + print_indented(indent, f, "Dot (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(expr->dot.rhs)); + print_expr(expr->dot.lhs, indent+1, f); + break; + case APFL_EXPR_AT: + print_indented(indent, f, "At\n"); + print_indented(indent+1, f, "LHS\n"); + print_expr(expr->at.lhs, indent+2, f); + print_indented(indent+1, f, "RHS\n"); + print_expr(expr->at.rhs, indent+2, f); + break; + case APFL_EXPR_CONSTANT: + print_constant(expr->constant, indent, f); + break; + case APFL_EXPR_VAR: + print_indented(indent, f, "Var (" APFL_STR_FMT ")\n", APFL_STR_FMT_ARGS(expr->var)); + break; + } +} + +void +apfl_expr_print(struct apfl_expr expr, FILE *f) +{ + print_expr(&expr, 0, f); +} + +static bool +expr_list_eq(struct apfl_expr_list a, struct apfl_expr_list b) +{ + if (a.len != b.len) { + return false; + } + + for (size_t i = 0; i < a.len; i++) { + if ( + a.items[i].expand != b.items[i].expand + || !apfl_expr_eq(*a.items[i].expr, *b.items[i].expr) + ) { + return false; + } + } + + return true; +} + +static bool +body_eq(struct apfl_expr_body a, struct apfl_expr_body b) +{ + if (a.len != b.len) { + return false; + } + + for (size_t i = 0; i < a.len; i++) { + if (!apfl_expr_eq(a.items[i], b.items[i])) { + return false; + } + } + + return true; +} + +static bool param_eq(struct apfl_expr_param, struct apfl_expr_param); + +static bool +params_eq(struct apfl_expr_params a, struct apfl_expr_params b) +{ + if (a.len != b.len) { + return false; + } + for (size_t i = 0; i < a.len; i++) { + if (!param_eq(a.params[i], b.params[i])) { + return false; + } + } + return true; +} + +static bool +const_eq(struct apfl_expr_const a, struct apfl_expr_const b) +{ + if (a.type != b.type) { + return false; + } + + switch (a.type) { + case APFL_EXPR_CONST_NIL: + return true; + case APFL_EXPR_CONST_BOOLEAN: + return a.boolean == b.boolean; + case APFL_EXPR_CONST_STRING: + return apfl_string_cmp(a.string, b.string) == 0; + case APFL_EXPR_CONST_NUMBER: + return a.number == b.number; + } + + assert(false); + return false; +} + +static bool +param_eq(struct apfl_expr_param a, struct apfl_expr_param b) +{ + if (a.type != b.type) { + return false; + } + + switch (a.type) { + case APFL_EXPR_PARAM_VAR: + return apfl_string_cmp(a.var, b.var) == 0; + case APFL_EXPR_PARAM_CONSTANT: + return const_eq(a.constant, b.constant); + case APFL_EXPR_PARAM_PREDICATE: + return param_eq(*a.predicate.lhs, *b.predicate.lhs) + && apfl_expr_eq(*a.predicate.rhs, *b.predicate.rhs); + case APFL_EXPR_PARAM_EXPAND: + return param_eq(*a.expand, *b.expand); + case APFL_EXPR_PARAM_LIST: + return params_eq(a.list, b.list); + } + + assert(false); + return false; +} + +static bool +assignable_eq(struct apfl_expr_assignable a, struct apfl_expr_assignable b) +{ + if (a.type != b.type) { + return false; + } + + switch (a.type) { + case APFL_EXPR_ASSIGNABLE_VAR: + return apfl_string_cmp(a.var, b.var) == 0; + case APFL_EXPR_ASSIGNABLE_CONSTANT: + return const_eq(a.constant, b.constant); + case APFL_EXPR_ASSIGNABLE_PREDICATE: + return assignable_eq(*a.predicate.lhs, *b.predicate.lhs) + && apfl_expr_eq(*a.predicate.rhs, *b.predicate.rhs); + case APFL_EXPR_ASSIGNABLE_EXPAND: + return assignable_eq(*a.expand, *b.expand); + case APFL_EXPR_ASSIGNABLE_DOT: + return assignable_eq(*a.dot.lhs, *b.dot.lhs) + && apfl_string_cmp(a.dot.rhs, b.dot.rhs) == 0; + case APFL_EXPR_ASSIGNABLE_AT: + return assignable_eq(*a.at.lhs, *b.at.lhs) + && apfl_expr_eq(*a.at.rhs, *b.at.rhs); + case APFL_EXPR_ASSIGNABLE_LIST: + if (a.list.len != b.list.len) { + return false; + } + for (size_t i = 0; i < a.list.len; i++) { + if (!assignable_eq(a.list.children[i], b.list.children[i])) { + return false; + } + } + return true; + } + + assert(false); + return false; +} + +bool +apfl_expr_eq(struct apfl_expr a, struct apfl_expr b) +{ + if (a.type != b.type) { + return false; + } + + if (apfl_position_eq(a.position, b.position)) { + return false; + } + + switch (a.type) { + case APFL_EXPR_LIST: + return expr_list_eq(a.list, b.list); + case APFL_EXPR_DICT: + if (a.dict.len != b.dict.len) { + return false; + } + + for (size_t i = 0; i < a.dict.len; i++) { + if ( + !apfl_expr_eq(*a.dict.items[i].k, *b.dict.items[i].k) + || !apfl_expr_eq(*a.dict.items[i].v, *b.dict.items[i].v) + ) { + return false; + } + } + return true; + case APFL_EXPR_CALL: + return apfl_expr_eq(*a.call.callee, *b.call.callee) + && expr_list_eq(a.call.arguments, b.call.arguments); + case APFL_EXPR_SIMPLE_FUNC: + return body_eq(a.simple_func, b.simple_func); + case APFL_EXPR_COMPLEX_FUNC: + if (a.complex_func.len != b.complex_func.len) { + return false; + } + for (size_t i = 0; i < a.complex_func.len; i++) { + if ( + !params_eq(a.complex_func.subfuncs[i].params, b.complex_func.subfuncs[i].params) + || !body_eq(a.complex_func.subfuncs[i].body, b.complex_func.subfuncs[i].body) + ) { + return false; + } + } + return true; + case APFL_EXPR_ASSIGNMENT: + return assignable_eq(a.assignment.lhs, b.assignment.lhs) + && apfl_expr_eq(*a.assignment.rhs, *b.assignment.rhs); + case APFL_EXPR_DOT: + return apfl_expr_eq(*a.dot.lhs, *b.dot.lhs) + && apfl_string_cmp(a.dot.rhs, b.dot.rhs) == 0; + case APFL_EXPR_AT: + return apfl_expr_eq(*a.at.lhs, *b.at.lhs) + && apfl_expr_eq(*a.at.rhs, *b.at.rhs); + case APFL_EXPR_CONSTANT: + return const_eq(a.constant, b.constant); + case APFL_EXPR_VAR: + return apfl_string_cmp(a.var, b.var) == 0; + } + + assert(false); + return false; +} diff --git a/src/hashmap.c b/src/hashmap.c new file mode 100644 index 0000000..bc5502a --- /dev/null +++ b/src/hashmap.c @@ -0,0 +1,406 @@ +#include +#include +#include + +#include "hashmap.h" +#include "resizable.h" + +struct bucket { + void *keys; + void *values; + size_t len; + size_t cap; +}; + +struct apfl_hashmap_struct { + struct apfl_hashmap_callbacks callbacks; + size_t keysize; + size_t valsize; + size_t nbuckets; + struct bucket *buckets; +}; + +struct apfl_hashmap_cursor_struct { + apfl_hashmap map; + size_t bucket; + size_t i; +}; + +#define FNV_PRIME 1099511628211U + +apfl_hash +apfl_hash_fnv1a_add(const void *data, size_t len, apfl_hash hash) +{ + for (size_t i = 0; i < len; i++) { + uint8_t byte = ((uint8_t *)data)[i]; + hash ^= byte; + hash *= FNV_PRIME; + } + return hash; +} + +apfl_hash +apfl_hash_fnv1a(const void *data, size_t len) +{ + return apfl_hash_fnv1a_add(data, len, APFL_HASH_FNV1A_INIT); +} + +#define HAS_CALLBACK(map, cb) ((map)->callbacks.cb != NULL) +#define INVOKE_CALLBACK(map, cb, ...) (map)->callbacks.cb((map)->callbacks.opaque, __VA_ARGS__) + +static bool +keys_eq(apfl_hashmap map, const void *a, const void *b) +{ + if (HAS_CALLBACK(map, keys_eq)) { + return INVOKE_CALLBACK(map, keys_eq, a, b); + } else { + return memcmp(a, b, map->keysize) == 0; + } +} + +static apfl_hash +calc_hash(apfl_hashmap map, const void *key) +{ + if (HAS_CALLBACK(map, calc_hash)) { + return INVOKE_CALLBACK(map, calc_hash, key); + } else { + return apfl_hash_fnv1a(key, map->keysize); + } +} + +static void +destroy_key(apfl_hashmap map, void *key) +{ + if (HAS_CALLBACK(map, destroy_key)) { + INVOKE_CALLBACK(map, destroy_key, key); + } +} + +static void +destroy_value(apfl_hashmap map, void *value) +{ + if (HAS_CALLBACK(map, destroy_value)) { + INVOKE_CALLBACK(map, destroy_value, value); + } +} + +static bool +copy_key(apfl_hashmap map, void *dest, const void *src) +{ + if (HAS_CALLBACK(map, copy_key)) { + return INVOKE_CALLBACK(map, copy_key, dest, src); + } else { + memcpy(dest, src, map->keysize); + return true; + } +} + +static bool +copy_value(apfl_hashmap map, void *dest, const void *src) +{ + if (HAS_CALLBACK(map, copy_value)) { + return INVOKE_CALLBACK(map, copy_value, dest, src); + } else { + memcpy(dest, src, map->valsize); + return true; + } +} + +#define CAP_GROW 5 +static_assert(CAP_GROW >= 1, "CAP_GROW must be at least 1"); + +static size_t +calc_new_cap(size_t old_cap) +{ + return old_cap + CAP_GROW; +} + +#define KVADDR(base, elemsize, off) (((char*)(base)) + ((elemsize)*(off))) + +static bool +find_key_in_bucket(apfl_hashmap map, struct bucket *bucket, const void *key, size_t *off) +{ + size_t keysize = map->keysize; + + for (size_t i = 0; i < bucket->len; i++) { + if (keys_eq(map, key, KVADDR(bucket->keys, keysize, i))) { + *off = i; + return true; + } + } + + return false; +} + +static bool +set_in_bucket(apfl_hashmap map, struct bucket *bucket, const void *key, const void *value) +{ + size_t keysize = map->keysize; + size_t valsize = map->valsize; + + size_t i; + if (find_key_in_bucket(map, bucket, key, &i)) { + void *dest = KVADDR(bucket->values, valsize, i); + destroy_value(map, dest); + + return copy_value(map, dest, value); + } + + if (bucket->len <= bucket->cap) { + size_t new_cap = calc_new_cap(bucket->cap); + + void *newmem; + + newmem = realloc(bucket->keys, new_cap * keysize); + if (newmem == NULL) { + return false; + } + bucket->keys = newmem; + + newmem = realloc(bucket->values, new_cap * valsize); + if (newmem == NULL) { + return false; + } + bucket->values = newmem; + + bucket->cap = new_cap; + } + + if (!copy_key(map, KVADDR(bucket->keys, keysize, bucket->len), key)) { + return false; + } + + if (!copy_value(map, KVADDR(bucket->values, valsize, bucket->len), value)) { + destroy_key(map, KVADDR(bucket->keys, keysize, bucket->len)); + return false; + } + + bucket->len++; + + return true; +} + +static bool +get_in_bucket(apfl_hashmap map, struct bucket *bucket, const void *key, void *value) +{ + size_t i; + if (!find_key_in_bucket(map, bucket, key, &i)) { + return false; + } + + if (value != NULL) { + size_t valsize = map->valsize; + if (!copy_value(map, value, KVADDR(bucket->values, valsize, i))) { + return false; // TODO: This way, we cant distinguish an error in copy_value from a non-set key + } + } + + return true; +} + +static struct bucket * +bucket_by_key(apfl_hashmap map, const void *key) +{ + apfl_hash hash = calc_hash(map, key); + return &map->buckets[hash % map->nbuckets]; +} + +static void +delete_in_bucket(apfl_hashmap map, struct bucket *bucket, const void *key) +{ + size_t i; + if (!find_key_in_bucket(map, bucket, key, &i)) { + return; + } + + size_t keysize = map->keysize; + size_t valsize = map->valsize; + + destroy_key(map, KVADDR(bucket->keys, keysize, i)); + destroy_value(map, KVADDR(bucket->values, valsize, i)); + + assert(bucket->len >= (i+1)); + memmove( + KVADDR(bucket->keys, keysize, i), + KVADDR(bucket->keys, keysize, i+1), + (bucket->len - (i+1)) * keysize + ); + memmove( + KVADDR(bucket->values, valsize, i), + KVADDR(bucket->values, valsize, i+1), + (bucket->len - (i+1)) * valsize + ); + + assert(bucket->len > 0); // if len == 0, we would not have found an entry + bucket->len--; +} + +#define INITIAL_NBUCKETS 16 // Must be a power of 2 + +apfl_hashmap +apfl_hashmap_new(struct apfl_hashmap_callbacks callbacks, size_t keysize, size_t valsize) +{ + apfl_hashmap map = malloc(sizeof(struct apfl_hashmap_struct)); + if (map == NULL) { + goto fail; + } + + map->callbacks = callbacks; + map->keysize = keysize; + map->valsize = valsize; + map->nbuckets = INITIAL_NBUCKETS; + map->buckets = malloc(sizeof(struct bucket) * INITIAL_NBUCKETS); + if (map->buckets == NULL) { + goto fail; + } + + for (size_t i = 0; i < INITIAL_NBUCKETS; i++) { + map->buckets[i] = (struct bucket) { + .keys = NULL, + .values = NULL, + .len = 0, + .cap = 0, + }; + } + + return map; + +fail: + free(map); + return NULL; +} + +void +apfl_hashmap_delete(apfl_hashmap map, const void *key) +{ + delete_in_bucket(map, bucket_by_key(map, key), key); +} + +bool +apfl_hashmap_get(apfl_hashmap map, const void *key, void *value) +{ + return get_in_bucket(map, bucket_by_key(map, key), key, value); +} + +bool +apfl_hashmap_set(apfl_hashmap map, const void *key, const void *value) +{ + return set_in_bucket(map, bucket_by_key(map, key), key, value); +} + +static void +destroy_bucket(apfl_hashmap map, struct bucket *bucket) +{ + for (size_t i = 0; i < bucket->len; i++) { + destroy_key(map, KVADDR(bucket->keys, map->keysize, i)); + destroy_value(map, KVADDR(bucket->values, map->valsize, i)); + } + free(bucket->keys); + free(bucket->values); + bucket->len = 0; + bucket->cap = 0; +} + +void +apfl_hashmap_destroy(apfl_hashmap map) +{ + if (map == NULL) { + return; + } + + if (map->buckets != NULL) { + for (size_t i = 0; i < map->nbuckets; i++) { + destroy_bucket(map, &map->buckets[i]); + } + free(map->buckets); + } + + free(map); +} + +static void +cursor_skip_empty_buckets(apfl_hashmap_cursor cur) +{ + apfl_hashmap map = cur->map; + while (cur->bucket < map->nbuckets && map->buckets[cur->bucket].len == 0) { + cur->bucket++; + } +} + +apfl_hashmap_cursor +apfl_hashmap_get_cursor(apfl_hashmap map) +{ + apfl_hashmap_cursor cursor = malloc(sizeof(struct apfl_hashmap_cursor_struct)); + if (cursor != NULL) { + cursor->map = map; + cursor->i = 0; + cursor->bucket = 0; + + cursor_skip_empty_buckets(cursor); + } + return cursor; +} + +bool +apfl_hashmap_cursor_is_end(apfl_hashmap_cursor cursor) +{ + return cursor->bucket >= cursor->map->nbuckets; +} + +static struct bucket * +cursor_get_bucket(apfl_hashmap_cursor cursor) +{ + return apfl_hashmap_cursor_is_end(cursor) + ? NULL + : &cursor->map->buckets[cursor->bucket]; +} + +void +apfl_hashmap_cursor_next(apfl_hashmap_cursor cursor) +{ + struct bucket *bucket = cursor_get_bucket(cursor); + + if (bucket == NULL) { + return; // End already reached + } + + cursor->i++; + if (cursor->i < bucket->len) { + return; + } + + cursor->bucket++; + cursor->i = 0; + cursor_skip_empty_buckets(cursor); +} + +#define CURSOR_GET(cursor, out, bucketmemb, sizememb, copy) \ + struct bucket *bucket = cursor_get_bucket(cursor); \ + \ + if (bucket == NULL) { \ + return false; /* End already reached */ \ + } \ + \ + if (cursor->i >= bucket->len) { \ + return false; \ + } \ + \ + size_t size = cursor->map->sizememb; \ + \ + return copy( \ + cursor->map, \ + out, \ + KVADDR(bucket->bucketmemb, size, bucket->len) \ + ); \ + +bool +apfl_hashmap_cursor_get_key(apfl_hashmap_cursor cursor, void *key) +{ + CURSOR_GET(cursor, key, keys, keysize, copy_key) +} + +bool +apfl_hashmap_cursor_get_value(apfl_hashmap_cursor cursor, void *value) +{ + CURSOR_GET(cursor, value, values, valsize, copy_value) +} diff --git a/src/hashmap.h b/src/hashmap.h new file mode 100644 index 0000000..14d8a63 --- /dev/null +++ b/src/hashmap.h @@ -0,0 +1,69 @@ +#ifndef APFL_HASHMAP_H +#define APFL_HASHMAP_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define APFL_HASH_FNV1A_INIT 14695981039346656037U // offset_basis for 64bit FNV-1(a) + +// TODO: is not required to provide uint64_t +typedef uint64_t apfl_hash; + +typedef struct apfl_hashmap_struct *apfl_hashmap; + +typedef struct apfl_hashmap_cursor_struct *apfl_hashmap_cursor; + +struct apfl_hashmap_callbacks { + void *opaque; + + // Compare keys a and b. If not provided, they will be compared with memcmp + bool (*keys_eq) (void *opaque, const void *a, const void *b); + + // Calculate a hash value of a key. + // If not provided, a hash will be calculated based on the bytes of the key. + apfl_hash (*calc_hash) (void *opaque, const void *key); + + // Destroy a key. Does nothing, if not provided. + void (*destroy_key) (void *opaque, void *key); + + // Destroy a value. Does nothing, if not provided. + void (*destroy_value)(void *opaque, void *key); + + // Copies a key. Returns true on success, false on failure. + // If not provided, the bytes will be cpiled with memcpy. + bool (*copy_key) (void *opaque, void *dest, const void *src); + + // Copies a value. Returns true on success, false on failure. + // If not provided, the bytes will be cpiled with memcpy. + bool (*copy_value) (void *opaque, void *dest, const void *src); +}; + +apfl_hash apfl_hash_fnv1a_add(const void *, size_t len, apfl_hash); +apfl_hash apfl_hash_fnv1a(const void *, size_t len); + +apfl_hashmap apfl_hashmap_new(struct apfl_hashmap_callbacks, size_t keysize, size_t valsize); +void apfl_hashmap_delete(apfl_hashmap, const void *key); +bool apfl_hashmap_get(apfl_hashmap, const void *key, void *value); +bool apfl_hashmap_set(apfl_hashmap, const void *key, const void *value); +void apfl_hashmap_destroy(apfl_hashmap); + +#define apfl_hashmap_isset(m, k) (apfl_hashmap_get((m), (k), NULL)) + +apfl_hashmap_cursor apfl_hashmap_get_cursor(apfl_hashmap); +bool apfl_hashmap_cursor_is_end(apfl_hashmap_cursor); +void apfl_hashmap_cursor_next(apfl_hashmap_cursor); +bool apfl_hashmap_cursor_get_key(apfl_hashmap_cursor, void *key); +bool apfl_hashmap_cursor_get_value(apfl_hashmap_cursor, void *value); + +#define apfl_hashmap_cursor_destroy(cur) (free(cur)) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/hashmap_foo.c b/src/hashmap_foo.c new file mode 100644 index 0000000..4cfeb90 --- /dev/null +++ b/src/hashmap_foo.c @@ -0,0 +1,142 @@ +#include +#include +#include +#include + +#include "hashmap.h" + +bool keys_eq_impl(void *opaque, const void *_a, const void *_b) +{ + (void)opaque; + + const char * const *a = _a; + const char * const *b = _b; + return strcmp(*a, *b) == 0; +} + +apfl_hash calc_hash_impl(void *opaque, const void *_key) +{ + (void)opaque; + + const char * const *key = _key; + + return apfl_hash_fnv1a(key, strlen(*key)); +} + +void destroy_kv_impl(void *opaque, void *_data) +{ + (void)opaque; + + char **data = _data; + free(*data); +} + +bool copy_kv_impl(void *opaque, void *_dest, const void *_src) +{ + (void)opaque; + + char **dest = _dest; + const char * const *src = _src; + + *dest = malloc(strlen(*src) + 1); + if (*dest == NULL) { + return false; + } + + strcpy(*dest, *src); + return true; +} + +#define BUFSIZE 10000 + +int +main(int argc, char **argv) +{ + (void)argc; + (void)argv; + + struct apfl_hashmap_callbacks callbacks = { + .opaque = NULL, + .keys_eq = keys_eq_impl, + .calc_hash = calc_hash_impl, + .destroy_key = destroy_kv_impl, + .destroy_value = destroy_kv_impl, + .copy_key = copy_kv_impl, + .copy_value = copy_kv_impl, + }; + + apfl_hashmap map = apfl_hashmap_new(callbacks, sizeof(char *), sizeof(char *)); + if (map == NULL) { + return 1; + } + + char line[BUFSIZE]; + + char key[BUFSIZE]; + char *keyp = &key[0]; + char value[BUFSIZE]; + char *valuep = &value[0]; + + char *retreived = NULL; + + for (;;) { + if (fgets(line, BUFSIZE, stdin) == NULL) { + break; + } + + char *tok = strtok(line, " "); + if (tok == NULL) { + continue; + } + + if (strcmp(tok, "set") == 0) { + tok = strtok(NULL, " \n"); + strcpy(key, tok); + + tok = strtok(NULL, " \n"); + strcpy(value, tok); + + if (!apfl_hashmap_set(map, &keyp, &valuep)) { + return 2; + } + } else if (strcmp(tok, "get") == 0 ) { + tok = strtok(NULL, " \n"); + strcpy(key, tok); + + if (apfl_hashmap_get(map, &keyp, &retreived)) { + printf("%s => %s\n", key, retreived); + free(retreived); + continue; + } + } else if (strcmp(tok, "del") == 0 ) { + tok = strtok(NULL, " \n"); + strcpy(key, tok); + + apfl_hashmap_delete(map, &keyp); + } else if (strcmp(tok, "list") == 0) { + apfl_hashmap_cursor cur = apfl_hashmap_get_cursor(map); + if (cur == NULL) { + return 3; + } + for (; !apfl_hashmap_cursor_is_end(cur); apfl_hashmap_cursor_next(cur)) { + char *k = NULL; + char *v = NULL; + + if ( + apfl_hashmap_cursor_get_key(cur, &k) + && apfl_hashmap_cursor_get_value(cur, &v) + ) { + printf("%s => %s\n", k, v); + } + free(k); + free(v); + } + + apfl_hashmap_cursor_destroy(cur); + } + } + + apfl_hashmap_destroy(map); + + return 0; +} diff --git a/src/internal.h b/src/internal.h new file mode 100644 index 0000000..6625437 --- /dev/null +++ b/src/internal.h @@ -0,0 +1,45 @@ +#ifndef APFL_INTERNAL_H +#define APFL_INTERNAL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "common.h" + +#define DEINIT_LIST(items, len, item_deinit) \ + do { \ + for (size_t i = 0; i < (len); i++) { \ + item_deinit(&((items)[i])); \ + } \ + len = 0; \ + free(items); \ + (items) = NULL; \ + } while(0) + +#define MOVEPTR(out, in) \ + do { \ + out = in; \ + in = NULL; \ + } while(0) + +// ALLOC_LIST allocates memory for a list of n values of type T. +// n == 0 will always result in NULL (not guaranteed by calloc()) and the +// result will be cast into a pointer to T (this way the compiler can warn us, +// if we try to allocate memory for a wrong type). Also if we always use +// calloc(), the allocated memory is in a defined state. +#define ALLOC_LIST(T, n) (T *)((n) == 0 ? NULL : calloc((n), sizeof(T))) + +#define ALLOC(T) ALLOC_LIST(T, 1) + +// Aliases to commonly used functions / macros + +#define DESTROY APFL_DESTROY + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..a12c0a9 --- /dev/null +++ b/src/main.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include + +#include "apfl.h" + +static bool +repl_source_reader(void *context, char *buf, size_t *len, bool need) +{ + (void)context; + + printf(need ? "... " : "> "); + fflush(stdout); + + size_t maxlen = *len; + + if (fgets(buf, maxlen, stdin) == NULL) { + if (feof(stdin)) { + *len = 0; + return true; + } else { + return false; + } + } + + *len = strlen(buf); + return true; +} + +int +main(int argc, const char **argv) +{ + (void)argc; + (void)argv; + + int rv = 0; + + apfl_tokenizer_ptr tokenizer = NULL; + if (!(tokenizer = apfl_tokenizer_new(repl_source_reader, NULL))) { + fprintf(stderr, "Failed initializing tokenizer\n"); + goto exit; + } + + while (true) { + struct apfl_error err; + struct apfl_token token; + + switch (apfl_tokenizer_next(tokenizer, false)) { + case APFL_PARSE_OK: + token = apfl_tokenizer_get_token(tokenizer); + apfl_token_print(token, stdout); + apfl_token_deinit(&token); + + break; + case APFL_PARSE_EOF: + goto exit; + case APFL_PARSE_ERROR: + err = apfl_tokenizer_get_error(tokenizer); + apfl_error_print(err, stderr); + + if (APFL_ERROR_IS_FATAL(err)) { + rv = 1; + goto exit; + } + break; + } + } + +exit: + apfl_tokenizer_destroy(tokenizer); + + return rv; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..dd4e104 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,1717 @@ +#include +#include +#include + +#include "apfl.h" + +#include "resizable.h" +#include "internal.h" + +struct apfl_parser { + struct apfl_parser_token_source token_source; + + struct apfl_error error; + + bool eof; + bool has_unread; + struct apfl_token unread_buf; +}; + +enum parse_fragment_result { + PF_OK, + PF_CANT_HANDLE, + PF_EOF, + PF_ERROR, +}; + +enum parse_fragment_flags { + FFLAG_NO_EXPAND = 1, +}; + +enum fragment_type { + FRAG_EXPAND, + FRAG_CONSTANT, + FRAG_NAME, + FRAG_DOT, + FRAG_AT, + FRAG_PREDICATE, + FRAG_EXPR, + FRAG_LIST, +}; + +struct fragment_dot { + struct fragment *lhs; + struct apfl_string rhs; +}; + +struct fragment_lhs_rhs { + struct fragment *lhs; + struct fragment *rhs; +}; + +struct fragment_list { + APFL_RESIZABLE_TRAIT(struct fragment, children) +}; + +struct fragment { + enum fragment_type type; + + union { + struct fragment *expand; + struct apfl_expr_const constant; + struct apfl_string name; + struct fragment_dot dot; + struct fragment_lhs_rhs at; + struct fragment_lhs_rhs predicate; + struct apfl_expr expr; + struct fragment_list list; + }; + + struct apfl_position position; +}; + +static enum parse_fragment_result parse_fragment(apfl_parser_ptr, struct fragment*, bool need, enum parse_fragment_flags); + +static bool +grow_fragment_cap(struct fragment_list *list, size_t inc) +{ + return apfl_resizable_grow_cap( + sizeof(struct fragment), + APFL_RESIZABLE_ARGS(*list, children), + inc + ); +} + +static bool +append_fragment(struct fragment_list *list, struct fragment fragment) +{ + return apfl_resizable_append( + sizeof(struct fragment), + APFL_RESIZABLE_ARGS(*list, children), + &fragment, + 1 + ); +} + +static void fragment_deinit(struct fragment *); + +static void +deinit_fragment_lhs_rhs(struct fragment_lhs_rhs *lr) +{ + DESTROY(lr->lhs, fragment_deinit); + DESTROY(lr->rhs, fragment_deinit); +} + +static void +fragment_list_deinit(struct fragment_list *list) +{ + DEINIT_LIST(list->children, list->len, fragment_deinit); + apfl_resizable_init(APFL_RESIZABLE_ARGS(*list, children)); +} + +static void +fragment_deinit(struct fragment *fragment) +{ + if (fragment == NULL) { + return; + } + + switch (fragment->type) { + case FRAG_EXPAND: + DESTROY(fragment->expand, fragment_deinit); + break; + case FRAG_CONSTANT: + apfl_expr_const_deinit(&fragment->constant); + break; + case FRAG_NAME: + apfl_string_deinit(&fragment->name); + case FRAG_DOT: + DESTROY(fragment->dot.lhs, fragment_deinit); + apfl_string_deinit(&fragment->dot.rhs); + break; + case FRAG_AT: + deinit_fragment_lhs_rhs(&fragment->at); + break; + case FRAG_PREDICATE: + deinit_fragment_lhs_rhs(&fragment->at); + break; + case FRAG_EXPR: + apfl_expr_deinit(&fragment->expr); + break; + case FRAG_LIST: + fragment_list_deinit(&fragment->list); + break; + } +} + +static struct fragment_dot +fragment_dot_move(struct fragment_dot *in) +{ + struct fragment_dot out; + MOVEPTR(out.lhs, in->lhs); + out.rhs = apfl_string_move(&in->rhs); + return out; +} + +static struct fragment_lhs_rhs +fragment_lhs_rhs_move(struct fragment_lhs_rhs *in) +{ + struct fragment_lhs_rhs out; + MOVEPTR(out.lhs, in->lhs); + MOVEPTR(out.rhs, in->rhs); + return out; +} + +static struct fragment_list +fragment_list_move(struct fragment_list *in) +{ + struct fragment_list out; + MOVEPTR(out.children, in->children); + in->len = 0; + in->cap = 0; + return out; +} + +static struct fragment +fragment_move(struct fragment *in) +{ + struct fragment out = *in; + + switch (in->type) { + case FRAG_EXPAND: + MOVEPTR(out.expand, in->expand); + break; + case FRAG_CONSTANT: + out.constant = apfl_expr_const_move(&in->constant); + break; + case FRAG_NAME: + out.name = apfl_string_move(&in->name); + break; + case FRAG_DOT: + out.dot = fragment_dot_move(&in->dot); + break; + case FRAG_AT: + out.at = fragment_lhs_rhs_move(&in->at); + break; + case FRAG_PREDICATE: + out.predicate = fragment_lhs_rhs_move(&in->predicate); + break; + case FRAG_EXPR: + out.expr = apfl_expr_move(&in->expr); + break; + case FRAG_LIST: + out.list = fragment_list_move(&in->fragment); + break; + } + + return out; +} + +apfl_parser_ptr +apfl_parser_new(struct apfl_parser_token_source token_source) +{ + apfl_parser_ptr p = malloc(sizeof(struct apfl_parser)); + if (p == NULL) { + return NULL; + } + + p->token_source = token_source; + p->eof = false; + p->has_unread = false; + + return p; +} + +static enum apfl_parse_result +get_raw_token(apfl_parser_ptr p, struct apfl_token *token, bool need) +{ + struct apfl_parser_token_source *src = &(p->token_source); + + enum apfl_parse_result result = src->next(src->opaque, need); + + switch (result) { + case APFL_PARSE_ERROR: + p->error = src->get_error(src->opaque); + break; + case APFL_PARSE_OK: + *token = src->get_token(src->opaque); + break; + default: + // nop + break; + } + + return result; +} + +static enum apfl_parse_result +get_non_comment_token(apfl_parser_ptr p, struct apfl_token *token, bool need) +{ + for (;;) { + enum apfl_parse_result result = get_raw_token(p, token, need); + + if (result != APFL_PARSE_OK) { + return result; + } + + if (token->type == APFL_TOK_COMMENT) { + apfl_token_deinit(token); + } else { + return APFL_PARSE_OK; + } + } +} + +static enum apfl_parse_result +get_preprocessed_token(apfl_parser_ptr p, struct apfl_token *token, bool need) +{ + enum apfl_parse_result result; + + for (;;) { + result = get_non_comment_token(p, token, need); + + if (result != APFL_PARSE_OK) { + return result; + } + + if (token->type != APFL_TOK_CONTINUE_LINE) { + return APFL_PARSE_OK; + } + + struct apfl_position continue_line_pos = token->position; + + apfl_token_deinit(token); + + result = get_non_comment_token(p, token, true); + if (result != APFL_PARSE_OK) { + return result; + } + + if (token->type != APFL_TOK_LINEBREAK) { + apfl_token_deinit(token); + + p->error = (struct apfl_error) { + .type = APFL_ERR_NO_LINEBREAK_AFTER_CONTINUE_LINE, + .position = continue_line_pos, + }; + + return APFL_PARSE_ERROR; + } + + apfl_token_deinit(token); + } +} + +static enum apfl_parse_result +read_token(apfl_parser_ptr p, struct apfl_token *token, bool need) +{ + if (p->eof) { + return APFL_PARSE_EOF; + } + + if (p->has_unread) { + *token = p->unread_buf; + p->has_unread = false; + return APFL_PARSE_OK; + } + + enum apfl_parse_result result = get_preprocessed_token(p, token, need); + p->eof = result == APFL_PARSE_EOF; + return result; +} + +static void +unread_token(apfl_parser_ptr p, struct apfl_token token) +{ + if (p->has_unread) { + assert(false); // TODO: Or should we return a regular error? + } + + p->unread_buf = token; + p->has_unread = true; +} + +// Must only be called after an PF_CANT_HANDLE! +static struct apfl_token +read_token_after_cant_handle(apfl_parser_ptr p) +{ + struct apfl_token token; + + // A function that returns PF_CANT_HANDLE always unreads a token, so we are + // guaranteed to have at least one token. + assert(read_token(p, &token, true) == APFL_PARSE_OK); + + return token; +} + +static struct apfl_error +err_unexpected_token(enum apfl_token_type token_type, struct apfl_position pos) +{ + return (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_TOKEN, + .token_type = token_type, + .position = pos, + }; +} + +#define ERR_UNEXPECTED_TOKEN(t) (err_unexpected_token((t).type, (t).position)) + +// Must only be called after an PF_CANT_HANDLE! +static enum parse_fragment_result +unexpected_cant_handle(apfl_parser_ptr p) +{ + struct apfl_token token = read_token_after_cant_handle(p); + + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + return PF_ERROR; +} + +static enum parse_fragment_result +parse_fragment_into_list(apfl_parser_ptr p, struct fragment_list *list, bool need, enum parse_fragment_flags flags) +{ + if (!grow_fragment_cap(list, 1)) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return PF_ERROR; + } + + struct fragment *elem = &list->children[list->len]; + + enum parse_fragment_result result = parse_fragment(p, elem, need, flags); + if (result != PF_OK) {// \mystuff\TODO:even more flags? + return result; + } + + list->len++; + return PF_OK; +} + +static enum parse_fragment_result +parse_parens_head(apfl_parser_ptr p, struct fragment_list *children, struct apfl_position position) +{ + return parse_fragment_into_list(p, children, true, FFLAG_NO_EXPAND); // \mystuff\TODO:more flags? +} + +static enum parse_fragment_result +parse_parens_tail(apfl_parser_ptr p, struct fragment_list *children, struct apfl_position position) +{ + enum parse_fragment_result result; + for (;;) { + result = parse_fragment_into_list(p, children, true, 0); // \mystuff\TODO:more flags? + if (result != PF_OK) { + break; + } + } + + switch (result) { + case PF_OK: + assert(false); // already handled + case PF_EOF: + // \mystuff\TODO:unexpected eof + case PF_CANT_HANDLE: + break; + case PF_ERROR: + return PF_ERROR; + } + + assert(result == PF_CANT_HANDLE); + struct apfl_token token = read_token_after_cant_handle(p); + + if (token->type == APFL_TOK_RPAREN) { + result = PF_OK; + // \mystuff\TODO:finalize list somehow? + } else { + p->error = (struct apfl_error) { + // \mystuff\TODO:unexpected token error + }; + result = PF_ERROR; + } + + apfl_token_deinit(&token); + return result; +} + +static enum parse_fragment_result +parse_parens(apfl_parser_ptr p, struct fragment *out, struct apfl_position position) +{ + struct fragment_list children; + apfl_resizable_init(APFL_RESIZABLE_ARGS(children, children)); + + enum parse_fragment_result result; + + result = parse_parens_head(p, &children, position); + if (result != PF_OK) { + goto fail; + } + + result = parse_parens_tail(p, &children, position); + if (result != PF_OK) { + goto fail; + } + + out->type = FRAG_EXPR; + out->expr = TODO(); // \mystuff\TODO: + out->position = position; + + return PF_OK; + +fail: + // \mystuff\TODO:cleanup + + return result; +} + +static enum parse_fragment_result +skip_inner_bracket_separators(apfl_parser_ptr p) +{ + struct apfl_token token; + + for (;;) { + switch (read_token(t, &token, true)) { + case APFL_PARSE_OK: + if (token.type == APFL_TOK_COMMA || token.type == APFL_TOK_LINEBREAK || token.type == APFL_TOK_SEMICOLON) { + apfl_token_deinit(&token); + break; // Note: breaks switch, continues loop + } + + unread_token(p, token); + return PF_OK; + case APFL_PARSE_EOF: + return PF_OK; + case APFL_PARSE_ERROR: + return PF_ERROR; + } + } +} + +static enum parse_fragment_result +parse_empty_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_position position) +{ + // We already got `[ ->`, we now read another (non separator) token and return success, if it's an `]`. + // Else it's a syntax error + + enum parse_fragment_result result; + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + return result; + } + + struct apfl_token token; + switch (read_token(p, &token, true)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + p->error = apfl_error_simple(APFL_ERR_UNEXPECTED_EOF); + return PF_ERROR; + + case APFL_PARSE_ERROR: + return PF_ERROR; + } + + if (token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + return PF_ERROR; + } + + apfl_token_deinit(&token); + out->type = FRAG_EMPTY_DICT; + out->position = position; + return PF_OK; +} + +static enum parse_fragment_result +parse_empty_list_or_dict(apfl_parser_ptr p, struct fragment *out, struct apfl_token token, struct apfl_position position) +{ + enum apfl_token_type token_type = token.type; + struct apfl_position token_position = token.position; + apfl_token_deinit(&token); + + switch (token_type) { + case APFL_TOK_RBRACKET: + out->type = FRAG_EMPTY_LIST; + out->position = position; + return PF_OK; + case APFL_TOK_MAPSTO: + return parse_empty_dict(p, out, position); + default: + p->error = err_unexpected_token(token_type, token_position); + return PF_ERROR; + } + + apfl_token_deinit(&token); +} + +static struct apfl_error +err_unexpected_eof_after(enum apfl_token_type token_type, struct apfl_position pos) +{ + return (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_EOF_AFTER_TOKEN, + .token_type = token_type, + .position = pos, + }; +} + +static bool +fragment_to_expr(apfl_parser_ptr p, struct fragment fragment, struct apfl_expr *out) +{ + // \mystuff\TODO: +} + +static struct apfl_expr * +fragment_to_expr_allocated(apfl_parser_ptr p, struct fragment fragment) +{ + struct apfl_expr *out = malloc(sizeof(struct apfl_expr)); + if (out == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return false; + } + + if (!fragment_to_expr(p, fragment, out)) { + free(out); + return false; + } + return true; +} + +static enum parse_fragment_result +parse_dict( + apfl_parser_ptr p, + struct fragment *out, + struct fragment key, + struct apfl_position mapsto_pos, + struct apfl_position start +) { + struct apfl_token token; + + struct fragment value; + + bool cleanup_key = true; + bool cleanup_value = false; + + enum parse_fragment_result result; + + struct apfl_expr_dict dict = { + .items = NULL, + .len = 0, + }; + size_t dict_cap = 0; + + goto after_mapsto; + + for (;;) { + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + goto error; + } + + switch ((result = parse_fragment(p, &key, true, FFLAG_NO_EXPAND))) { + case PF_OK: + cleanup_key = true; + break; + case PF_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); + result = PF_ERROR; + goto error; + case PF_CANT_HANDLE: + goto maybe_end; + case PF_ERROR: + goto error; + } + + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + goto error; + } + + switch (read_token(p, &token, true)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); + result = PF_ERROR; + goto error; + case APFL_PARSE_ERROR: + result = PF_ERROR; + goto error; + } + + if (token.type != APFL_TOK_MAPSTO) { + unread_token(p, token); + goto error; + } + + mapsto_pos = token.position; + apfl_token_deinit(&token); + +after_mapsto: + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + goto error; + } + + struct fragment value; + switch ((result = parse_fragment(p, &value, true, FFLAG_NO_EXPAND))) { + case PF_OK: + cleanup_value = true; + break; + case PF_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_MAPSTO, mapsto_pos); + result = PF_ERROR; + goto error; + case PF_CANT_HANDLE: + result = unexpected_cant_handle(p); + goto error; + case PF_ERROR: + goto error; + } + + struct apfl_expr_dict_pair pair; + if ( + (pair.k = fragment_to_expr_allocated(p, key)) == NULL + || (pair.v = fragment_to_expr_allocated(p, value)) == NULL + ) { + result = PF_ERROR; + goto error; + } + + fragment_deinit(&key); + cleanup_key = false; + fragment_deinit(&value); + cleanup_value = false; + + if (!apfl_resizable_append( + sizeof(struct apfl_expr_dict_pair), + &dict.items, + &dict.len, + &dict_cap, + &pair, + 1 + )) { + // \mystuff\TODO:destroy pair! + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return PF_ERROR; + } + } + +maybe_end: + assert(!cleanup_key && !cleanup_value); + + token = read_token_after_cant_handle(p); + if (token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + goto error; + } + + apfl_token_deinit(&token); + + out->type = FRAG_EXPR, + out->expr.type = (struct apfl_expr) { + .type = APFL_EXPR_DICT, + .dict = dict, + .position = start, + }; + out->position = start; + return PF_OK; + +error: + if (cleanup_key) { + fragment_deinit(&key); + } + if (cleanup_value) { + fragment_deinit(&value); + } + free(dict.items); // \mystuff\TODO:also destroy all items! + return result; +} + +static enum parse_fragment_result +parse_list( + apfl_parser_ptr p, + struct fragment *out, + struct fragment first, + struct apfl_position start +) { + struct fragment_list list; + apfl_resizable_init(APFL_RESIZABLE_ARGS(list, children)); + + if (!append_fragment(&list, first)) { + fragment_deinit(&first); + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return PF_ERROR; + } + + for (;;) { + switch (parse_fragment_into_list(p, &list, true, 0)) { + case PF_OK: + break; + case PF_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); + goto error; + case PF_CANT_HANDLE: + goto maybe_end; + case PF_ERROR: + goto error; + } + } + +maybe_end: + struct apfl_token token = read_token_after_cant_handle(p); + if (token.type != APFL_TOK_RBRACKET) { + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + goto error; + } + apfl_token_deinit(&token); + + out->type = FRAG_LIST; + out->list = list; + out->position = start; + + return PF_OK; + +error: + // \mystuff\TODO:clean up list + return PF_ERROR; +} + +static enum parse_fragment_result +parse_brackets(apfl_parser_ptr p, struct fragment *out, struct apfl_position start) +{ + enum parse_fragment_result result; + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + return result; + } + + struct fragment first; + switch (parse_fragment(p, &first, true, 0)) { + case PF_OK: + break; + case PF_CANT_HANDLE: + return parse_empty_list_or_dict(p, out, read_token_after_cant_handle(p), start); + case PF_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); + return PF_ERROR; + case PF_ERROR: + return PF_ERROR; + } + + result = skip_inner_bracket_separators(p); + if (result != PF_OK) { + goto error; + } + + struct apfl_token token; + switch (read_token(p, &token, true)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_LBRACKET, start); + result = PF_ERROR; + goto error; + case APFL_PARSE_ERROR: + result = PF_ERROR; + goto error; + } + + if (token.type == APFL_TOK_MAPSTO) { + struct apfl_position mapsto_pos = token.position; + apfl_token_deinit(&token); + return parse_dict(p, out, first, mapsto_pos, start); + } else { + unread_token(p, token); + return parse_list(p, out, first, start); + } + +error: + fragment_deinit(&first); + return result; +} + +static enum parse_fragment_result +parse_expand(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position position) +{ + struct fragment *inner = malloc(sizeof(struct fragment)); + if (inner == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return PF_ERROR; + } + + enum parse_fragment_result result = parse_fragment(p, inner, true, FFLAG_NO_EXPAND); + if (result == PF_OK) { + fragment->type = FRAG_EXPAND; + fragment->expand = inner; + fragment->position = position; + return PF_OK; + } + + free(inner); + + switch (result) { + case PF_OK: + assert(false); // Already handled above + break; + case PF_CANT_HANDLE: + return unexpected_cant_handle(p); + case PF_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_EXPAND, position); + return PF_ERROR; + case PF_ERROR: + return PF_ERROR; + } +} + +static enum parse_fragment_result +parse_stringify(apfl_parser_ptr p, struct fragment *fragment, struct apfl_position position) +{ + struct apfl_token token; + switch (read_token(p, &token, true)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + p->error = err_unexpected_eof_after(APFL_TOK_STRINGIFY, position); + return PF_ERROR; + + case APFL_PARSE_ERROR: + return PF_ERROR; + } + + if (token.type != APFL_TOK_NAME) { + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + return PF_ERROR; + } + + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_STRING, + .string = apfl_string_move(&token.text), + }; + fragment->position = position; + return PF_OK; +} + +static bool fragment_to_param_recursive(apfl_parser_ptr, struct fragment *, struct apfl_expr_param *); + +static bool fragments_to_params(apfl_parser_ptr, struct fragment_list, struct apfl_expr_params *); + +static bool +predicate_fragment_to_param( + apfl_parser_ptr p, + struct fragment_lhs_rhs *lhs_rhs, + struct apfl_expr_param *out +) { + out->type = APFL_EXPR_PARAM_PREDICATE; + out->predicate.lhs = NULL; + out->predicate.rhs = NULL; + + if ((out->predicate.lhs = malloc(sizeof(struct apfl_expr_param))) == NULL) { + goto error; + } + + if (!fragment_to_param_recursive(p, lhs_rhs->lhs, out->predicate.lhs)) { + free(out->predicate.lhs); + out->predicate.lhs = NULL; + goto error; + } + + out->predicate.rhs = fragment_to_expr_allocated(p, fragment_move(lhs_rhs->rhs)); + if (out->predicate.rhs == NULL) { + goto error; + } + + return true; + +error: + apfl_expr_param_deinit(out); + return false; +} + +static bool +fragment_to_param_recursive( + apfl_parser_ptr p, + struct fragment *fragment, + struct apfl_expr_param *out +) { + switch (fragment->type) { + case FRAG_EXPAND: + p->error = err_unexpected_token(APFL_TOK_EXPAND, fragment->position); + return false; + case FRAG_CONSTANT: + out->type = APFL_EXPR_PARAM_CONSTANT; + out->constant = apfl_expr_const_move(&fragment->constant); + return true; + case FRAG_NAME: + out->type = APFL_EXPR_PARAM_VAR; + out->var = apfl_string_move(&fragment->name); + return true; + case FRAG_DOT: + p->error = err_unexpected_token(APFL_TOK_DOT, fragment->position); + return false; + case FRAG_AT: + p->error = err_unexpected_token(APFL_TOK_AT, fragment->position); + return false; + case FRAG_PREDICATE: + return predicate_fragment_to_param(p, &fragment->predicate, out); + case FRAG_EXPR: + p->error = (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_EXPRESSION, + .position = fragment->position, + }; + return false; + case FRAG_LIST: + out->type = APFL_EXPR_PARAM_LIST; + return fragments_to_params(p, fragment_list_move(&fragment->list), &out->list); + } +} + +static bool +fragment_to_param( + apfl_parser_ptr p, + struct fragment *fragment, + struct apfl_expr_param *out, + bool *seen_expand +) { + if (fragment->type == FRAG_EXPAND && !*seen_expand) { + *seen_expand = true; // This prevents a param list with more than one ~ + + out->type = APFL_EXPR_PARAM_EXPAND; + if ((out->expand = malloc(sizeof(struct apfl_expr_param))) == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return false; + } + + return fragment_to_param_recursive(p, fragment->expand, out->expand); + } + + return fragment_to_param_recursive(p, fragment, out); +} + +static bool +fragments_to_params( + apfl_parser_ptr p, + struct fragment_list fragments, + struct apfl_expr_params *out +) { + bool result = true; + + out->params = malloc(sizeof(struct apfl_expr_param) * fragments.len); + out->len = 0; + if (out->params == NULL) { + goto error; + } + + bool seen_expand = false; + for (size_t i = 0; i < fragments.len; i++) { + if (!fragment_to_param( + p, + &fragments.children[i], + &out->params[i], + &seen_expand + )) { + goto error; + } + + out->len++; + } + + goto ok; + +error: + result = false; + apfl_expr_params_deinit(out); +ok: + fragment_list_deinit(&fragments); + return result; +} + +static bool +fragment_to_assignable( + apfl_parser_ptr p, + bool expand_ok, + struct fragment fragment, + struct apfl_expr_assignable *out +) { + switch (fragment.type) { + case FRAG_EXPAND: + if (!expand_ok) { + p->error = (struct apfl_error) { + .type = APFL_ERR_INVALID_ASSIGNMENT_LHS, + .position = fragment.position, + }; + goto error; + } + + out->type = APFL_EXPR_ASSIGNABLE_EXPAND; + if ((out->expand = malloc(sizeof(struct apfl_expr_assignable))) == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + if (!fragment_to_assignable( + p, + false, + fragment_move(fragment.expand), + out->expand + )) { + goto error; + } + + return true; + case FRAG_CONSTANT: + out->type = APFL_EXPR_ASSIGNABLE_CONSTANT; + out->constant = apfl_expr_const_move(&fragment.constant); + return true; + case FRAG_NAME: + out->type = APFL_EXPR_ASSIGNABLE_VAR; + out->var = apfl_string_move(&fragment.name); + return true; + case FRAG_DOT: + out->type = APFL_EXPR_ASSIGNABLE_DOT; + + out->dot.rhs = apfl_string_move(&fragment.dot.rhs); + + if ((out->dot.lhs = malloc(sizeof(struct apfl_expr_assignable))) == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + if (!fragment_to_assignable( + p, + expand_ok, + fragment_move(fragment.dot.lhs), + out->dot.lhs + )) { + goto error; + } + + return true; + case FRAG_AT: + out->type = APFL_EXPR_ASSIGNABLE_AT; + + out->at.lhs = malloc(sizeof(struct apfl_expr_assignable)); + out->at.rhs = malloc(sizeof(struct apfl_expr)); + + if (out->at.lhs == NULL || out->at.rhs == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + if (!fragment_to_assignable( + p, + expand_ok, + fragment_move(fragment.at.lhs), + out->at.lhs + )) { + free(out->at.rhs); + out->at.rhs = NULL; + goto error; + } + + if (!fragment_to_expr( + p, + fragment_move(fragment.at.rhs), + out->at.rhs + )) { + goto error; + } + + return true; + case FRAG_PREDICATE: + out->type = APFL_EXPR_ASSIGNABLE_PREDICATE; + + out->predicate.lhs = malloc(sizeof(struct apfl_expr_assignable)); + out->predicate.rhs = malloc(sizeof(struct apfl_expr)); + + if (out->predicate.lhs == NULL || out->predicate.rhs == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + if (!fragment_to_assignable( + p, + expand_ok, + fragment_move(fragment.at.lhs), + out->predicate.lhs + )) { + free(out->predicate.rhs); + out->predicate.rhs = NULL; + goto error; + } + + if (!fragment_to_expr( + p, + fragment_move(fragment.at.rhs), + out->predicate.rhs + )) { + goto error; + } + + return true; + case FRAG_EXPR: + p->error = (struct apfl_error) { + .type = APFL_ERR_INVALID_ASSIGNMENT_LHS, + .position = fragment.position, + }; + goto error; + case FRAG_LIST: + out->type = APFL_EXPR_ASSIGNABLE_LIST; + out->list.len = 0; + out->list.children = malloc(sizeof(struct apfl_expr_assignable) * fragment.list.len); + + expand_ok = true; + for (size_t i = 0; i < fragment.list.len; i++) { + struct apfl_expr_assignable *cur = &out->list.children[i]; + + if (!fragment_to_assignable( + p, + expand_ok, + fragment_move(&fragment.list.children[i]), + cur + )) { + goto error; + } + + expand_ok = expand_ok && cur->type != APFL_EXPR_ASSIGNABLE_EXPAND; + + out->list.len++; + } + + return true; + } + + assert(false); // Should not be reached + +error: + apfl_expr_assignable_deinit(out); + return false; +} + +static bool +fragment_to_partial_assignment( + apfl_parser_ptr p, + bool local, + struct fragment fragment, + struct apfl_expr_assignment *out +) { + out->local = local; + out->rhs = NULL; + + bool rv = fragment_to_assignable(p, false, fragment, &out->lhs); + fragment_deinit(&fragment); + return rv; +} + +enum parse_body_or_toplevel_finalize_result { + BODY_FINALIZE_ERROR, + BODY_FINALIZE_OK, + BODY_FINALIZE_EMPTY, +}; + +static bool +fragment_to_list_item( + apfl_parser_ptr p, + struct fragment fragment, + struct apfl_expr_list_item *out +) { + if (fragment.type == FRAG_EXPAND) { + out->expand = true; + out->expr = fragment_to_expr_allocated(p, fragment_move(fragment.expand)); + fragment_deinit(&fragment); + return out->expr != NULL; + } else { + out->expand = false; + out->expr = fragment_to_expr_allocated(p, fragment_move(&fragment)); + return out->expr != NULL; + } + +} + +static bool +fragments_to_call( + apfl_parser_ptr p, + struct fragment_list *fragments, // \mystuff\TODO:really a pointer? Why? + struct apfl_expr *out +) { + assert(fragments->len > 0); // \mystuff\TODO: Or should we check this here? + + out->type = APFL_EXPR_CALL; + out->position = fragments->children[0].position; + out->call.arguments = (struct apfl_expr_list) { + .items = NULL, + .len = 0, + }; + + out->call.callee = fragment_to_expr_allocated(p, fragment_move(&fragments->children[0])); + if (out->call.callee == NULL) { + goto error; + } + + if (fragments->len == 1) { + return true; + } + + out->call.arguments.items = ALLOC_LIST(struct apfl_expr_list_item, (fragments->len - 1)); + + for (size_t i = 1; i < fragments->len; i++) { + if (!fragment_to_list_item(p, fragment_move(&fragments->children[i]), &out->call.arguments.items[i-1])) { + goto error; + } + + out->call.arguments.len++; + } + +error: + apfl_expr_deinit(out); + return false; +} + +static enum parse_body_or_toplevel_finalize_result +parse_body_or_toplevel_finalize( + apfl_parser_ptr p, + struct fragment_list *fragments, + struct apfl_expr **leftmost_assignment_expr, + struct apfl_expr *rightmost_assignment_expr, + struct apfl_expr *out +) { + if (fragments->len == 0) { + if (rightmost_assignment_expr != NULL) { + p->error = (struct apfl_error) { + .type = APFL_ERR_EMPTY_ASSIGNMENT, + .position = rightmost_assignment_expr->position, + }; + goto error; + } + + return BODY_FINALIZE_EMPTY; + } + + // Nasty pointer juggling: If there are no assignments, we want to put the + // expression generated from the fragments to go directly to the output. + // Otherwise we want to store it in the right-hand-side of the rightmost + // assignment expression and the leftmost assignment expression becomes the + // output. + struct apfl_expr *dest = NULL; + if (rightmost_assignment_expr == NULL) { + dest = out; + } else { + assert(*leftmost_assignment_expr != NULL); + + *out = **leftmost_assignment_expr; + free(*leftmost_assignment_expr); + *leftmost_assignment_expr = NULL; + + assert(rightmost_assignment_expr->type == APFL_EXPR_ASSIGNMENT); + rightmost_assignment_expr->assignment.rhs = malloc(sizeof(struct apfl_expr)); + if (rightmost_assignment_expr->assignment.rhs == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + dest = rightmost_assignment_expr->assignment.rhs; + } + + if (fragments->len == 1) { + if (!fragment_to_expr(p, fragment_move(&fragments->children[0]), dest)) { + goto error; + } + } else { + if (!fragments_to_call(p, fragments, dest)) { + goto error; + } + } + + return true; + +error: + apfl_expr_deinit(out); + return BODY_FINALIZE_ERROR; +} + +static enum parse_fragment_result +parse_body_or_toplevel( + apfl_parser_ptr p, + bool handle_eof, + struct fragment_list *fragments, + struct apfl_expr *out +) { + struct apfl_expr *leftmost_assignment_expr = NULL; + struct apfl_expr *rightmost_assignment_expr = NULL; + + for (;;) { + for (;;) { + switch (parse_fragment_into_list(p, fragments, true, 0)) { + case PF_OK: + break; + case PF_CANT_HANDLE: + goto break_inner; + case PF_EOF: + if (handle_eof) { + switch (parse_body_or_toplevel_finalize( + p, + fragments, + &leftmost_assignment_expr, + rightmost_assignment_expr, + out + )) { + case BODY_FINALIZE_OK: + return PF_OK; + case BODY_FINALIZE_ERROR: + goto error; + case BODY_FINALIZE_EMPTY: + return PF_EOF; + } + } else { + return PF_EOF; + } + case PF_ERROR: + goto error; + } + } + +break_inner: + struct apfl_token token = read_token_after_cant_handle(p); + + switch (token.type) { + case APFL_TOK_ASSIGN: + case APFL_TOK_LOCAL_ASSIGN: + bool local = token.type == APFL_TOK_LOCAL_ASSIGN; + struct apfl_position position = token.position; + apfl_token_deinit(&token); + + if (fragments->len == 0) { + p->error = (struct apfl_error) { + .type = APFL_ERR_EMPTY_ASSIGNMENT, + .position = position, + }; + goto error; + } + + if (fragments->len > 1) { + p->error = err_unexpected_token( + local ? APFL_TOK_LOCAL_ASSIGN : APFL_TOK_ASSIGN, + position + ); + goto error; + } + + struct apfl_expr *cur_assignment_expr = ALLOC(struct apfl_expr); + if (cur_assignment_expr == NULL) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + cur_assignment_expr->type = APFL_EXPR_ASSIGNMENT; + cur_assignment_expr->position = position; + + struct fragment fragment = fragment_move(&fragments->children[0]); + fragment_list_deinit(fragments); // Reset fragment list + if (!fragment_to_partial_assignment( + p, + local, + fragment, + &cur_assignment_expr->assignment + )) { + free(cur_assignment_expr); + goto error; + } + + if (rightmost_assignment_expr == NULL) { + assert(leftmost_assignment_expr == NULL); + leftmost_assignment_expr = cur_assignment_expr; + rightmost_assignment_expr = cur_assignment_expr; + } else { + rightmost_assignment_expr->assignment.rhs = cur_assignment_expr; + rightmost_assignment_expr = cur_assignment_expr; + } + + break; + case APFL_TOK_LINEBREAK: + case APFL_TOK_SEMICOLON: + switch (parse_body_or_toplevel_finalize( + p, + fragments, + &leftmost_assignment_expr, + rightmost_assignment_expr, + out + )) { + case BODY_FINALIZE_OK: + return PF_OK; + case BODY_FINALIZE_ERROR: + goto error; + case BODY_FINALIZE_EMPTY: + // If there was nothing to finalize, we have an empty expression + // that doesn't need to end up in the AST. So let's just + // continue with the outermost loop and try again. + break; + } + + default: + if (leftmost_assignment_expr != NULL) { + p->error = ERR_UNEXPECTED_TOKEN(token); + apfl_token_deinit(&token); + goto error; + } + + unread_token(p, token); + return PF_CANT_HANDLE; + } + } + +error: // \mystuff\TODO:also on other non ok results??? + DESTROY(leftmost_assignment_expr, apfl_expr_deinit); +} + +static enum parse_fragment_result +parse_braces( + apfl_parser_ptr p, + struct fragment *out, + struct apfl_position start +) { + enum { + FUNTYPE_UNDECIDED, + FUNTYPE_SIMPLE, + FUNTYPE_COMPLEX, + } type = FUNTYPE_UNDECIDED; + + struct apfl_expr_subfunc *subfuncs = NULL; + size_t subfuncs_len = 0; + size_t subfuncs_cap = 0; + + bool has_params = false; + struct apfl_expr_params params = {}; + + struct fragment_list fragments; + apfl_resizable_init(APFL_RESIZABLE_ARGS(fragments, children)); + + struct apfl_expr_body body = { + .items = NULL, + .len = 0, + }; + size_t body_cap = 0; + + for (;;) { + struct apfl_expr expr; + + switch (parse_body_or_toplevel( + p, + false, + &fragments, + &expr + )) { + case PF_OK: + if (!apfl_resizable_append( + sizeof(struct apfl_expr), + (void **)&body.items, + &body.len, + &body_cap, + &expr, + 1 + )) { + apfl_expr_deinit(&expr); + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + break; + case PF_EOF: + p->error = apfl_error_simple(APFL_ERR_UNEXPECTED_EOF); + goto error; + case PF_ERROR: + goto error; + case PF_CANT_HANDLE: + struct apfl_token token = read_token_after_cant_handle(p); + + // TODO: Something similar to this is done all around in this file. + // It would probably be better, if the parser object owns the + // current token. Then we wouldn't have to clean up the + // current token all the time. + enum apfl_token_type token_type = token.type; + struct apfl_position position = token.position; + apfl_token_deinit(&token); + + switch (token_type) { + case APFL_TOK_RBRACE: + // \mystuff\TODO: + break; + case APFL_TOK_MAPSTO: + if (body.len > 0 && !has_params) { + p->error = (struct apfl_error) { + .type = APFL_ERR_STATEMENTS_BEFORE_PARAMETERS, + .position = position, + }; + goto error; + } + + if (has_params) { + // Finalize previous subfunc and append + if (!apfl_resizable_append( + sizeof(struct apfl_expr_subfunc), + (void **)&subfuncs, + &subfuncs_len, + &subfuncs_cap, + &(struct apfl_expr_subfunc) { + .params = params, + .body = body, + }, + 1 + )) { + p->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + goto error; + } + + params = (struct apfl_expr_params) {}; + body = (struct apfl_expr_body) {}; + body_cap = 0; + } + + type = FUNTYPE_COMPLEX; + + if (!fragments_to_params(p, fragments, ¶ms)) { + goto error; + } + has_params = true; + + break; + default: + p->error = err_unexpected_token(token_type, position); + goto error; + } + + break; + } + } + +error: + // \mystuff\TODO:cleanup + return PF_ERROR; +} + +static enum parse_fragment_result +parse_fragment(apfl_parser_ptr p, struct fragment *fragment, bool need, enum parse_fragment_flags flags) +{ + struct apfl_token token; + + switch (read_token(p, &token, need)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + return PF_EOF; + case APFL_PARSE_ERROR: + return PF_ERROR; + } + + enum parse_fragment_result result; + + switch (token.type) { + case APFL_TOK_LPAREN: + result = parse_parens(p, fragment, token.position); + break; + case APFL_TOK_LBRACKET: + result = parse_brackets(p, fragment, token.position); + break; + case APFL_TOK_LBRACE: + result = parse_braces(p, fragment, token.position); + break; + case APFL_TOK_EXPAND: + if (flags & FFLAG_NO_EXPAND) { + unread_token(p, token); + return PF_CANT_HANDLE; + } + + result = parse_expand(p, fragment, token.position); + break; + case APFL_TOK_STRINGIFY: + result = parse_stringify(p, fragment, token.position); + break; + case APFL_TOK_NUMBER: + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_NUMBER, + .number = token.number, + }; + fragment->position = token.position; + result = PF_OK; + break; + case APFL_TOK_NAME: + if (apfl_string_cmp(token.text, "nil") == 0) { + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_NIL, + }; + } else if (apfl_string_cmp(token.text, "true") == 0) { + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_BOOLEAN, + .boolean = true, + }; + } else if (apfl_string_cmp(token.text, "false") == 0) { + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_BOOLEAN, + .boolean = false, + }; + } else { + fragment->type = FRAG_NAME; + fragment->name = apfl_string_move(&token.text); + } + fragment->position = token.position; + result = PF_OK; + break; + case APFL_TOK_STRING: + fragment->type = FRAG_CONSTANT; + fragment->constant = (struct apfl_expr_const) { + .type = APFL_EXPR_CONST_STRING, + .string = apfl_string_move(&token.text), + }; + fragment->position = token.position; + result = PF_OK; + break; + default: + unread_token(p, token); + return PF_CANT_HANDLE; + } + + if (result == PF_OK) { + apfl_token_deinit(&token); + + switch (read_token(p, &token, need)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + return PF_OK; + case APFL_PARSE_ERROR: + return PF_ERROR; // \mystuff\TODO:destroy fragment in case of errors + } + + switch (token.type) { + case APFL_TOK_DOT: + result = parse_dot(p, fragment, token.position); + break; + case APFL_TOK_AT: + result = parse_at(p, fragment, token.position); + break; + case APFL_TOK_QUESTION_MARK: + result = parse_predicate(p, fragment, token.position); + break; + default: + unread_token(p, token); + return result; + } + } + + apfl_token_deinit(&token); + return result; +} + +enum apfl_parse_result +apfl_parser_next(apfl_parser_ptr p) +{ + // \mystuff\TODO: +} + +struct apfl_error +apfl_parser_get_error(apfl_parser_ptr p) +{ + return p->error; +} diff --git a/src/position.c b/src/position.c new file mode 100644 index 0000000..5293c63 --- /dev/null +++ b/src/position.c @@ -0,0 +1,9 @@ +#include + +#include "apfl.h" + +bool +apfl_position_eq(struct apfl_position a, struct apfl_position b) +{ + return a.line == b.line && a.col == b.col; +} diff --git a/src/resizable.c b/src/resizable.c new file mode 100644 index 0000000..fd86133 --- /dev/null +++ b/src/resizable.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +#include "resizable.h" + +void +apfl_resizable_init(void **mem, size_t *len, size_t *cap) +{ + *mem = NULL; + *len = 0; + *cap = 0; +} + +bool +apfl_resizable_resize(size_t elem_size, void **mem, size_t *len, size_t *cap, size_t newlen) +{ + // TODO: We're wasteful here by never actually shrinking the memory. + if (newlen <= *len || newlen < *cap) { + *len = newlen; + return true; + } + + assert(newlen >= *cap); + + if (!apfl_resizable_grow_cap(elem_size, mem, len, cap, newlen - *cap)) { + return false; + } + + *len = newlen; + return true; +} + +bool +apfl_resizable_grow_cap(size_t elem_size, void **mem, size_t *len, size_t *cap, size_t inc_cap) +{ + (void)len;// \mystuff\TODO:better not even have the arg + + if (inc_cap == 0) { + return true; + } + + size_t newcap = *cap + elem_size * inc_cap; + + // TODO: We currently simply grow the memory to have space for exactly + // inc_cap more elements. It would probably be smarter to grow the + // memory a bit larger to reduce calls to realloc. + void *newmem = realloc(*mem, newcap); + if (newmem == NULL) { + return false; + } + + *mem = newmem; + *cap = newcap; + return true; +} + +bool +apfl_resizable_append(size_t elem_size, void **mem, size_t *len, size_t *cap, const void *other_mem, size_t other_len) +{ + size_t newlen = *len + other_len; + if (newlen > *cap) { + if (!apfl_resizable_grow_cap(elem_size, mem, len, cap, newlen - *cap)) { + return false; + } + } + + memcpy(*((char**)mem) + (elem_size * *len), other_mem, other_len * elem_size); + *len += other_len; + + return true; +} diff --git a/src/resizable.h b/src/resizable.h new file mode 100644 index 0000000..806862e --- /dev/null +++ b/src/resizable.h @@ -0,0 +1,22 @@ +#ifndef APFL_RESIZABLE +#define APFL_RESIZABLE 1 + +#include +#include + +#define APFL_RESIZABLE_TRAIT(T, N) \ + T* N; \ + size_t len; \ + size_t cap; + +#define APFL_RESIZABLE_ARGS(S, N) (void **)(&(S).N), &(S).len, &(S).cap + +void apfl_resizable_init(void **mem, size_t *len, size_t *cap); + +bool apfl_resizable_resize(size_t elem_size, void **mem, size_t *len, size_t *cap, size_t newlen); + +bool apfl_resizable_grow_cap(size_t elem_size, void **mem, size_t *len, size_t *cap, size_t inc_cap); + +bool apfl_resizable_append(size_t elem_size, void **mem, size_t *len, size_t *cap, const void *other_mem, size_t other_len); + +#endif diff --git a/src/strings.c b/src/strings.c new file mode 100644 index 0000000..e905da2 --- /dev/null +++ b/src/strings.c @@ -0,0 +1,134 @@ +#include +#include +#include +#include + +#include "apfl.h" + +#include "resizable.h" + +struct apfl_string_view +apfl_string_view_from_view(struct apfl_string_view view) +{ + return view; +} + +struct apfl_string_view +apfl_string_view_from_cstr(char *cstr) +{ + return (struct apfl_string_view) { + .bytes = cstr, + .len = strlen(cstr), + }; +} + +struct apfl_string_view +apfl_string_view_from_const_cstr(const char *cstr) +{ + return (struct apfl_string_view) { + .bytes = cstr, + .len = strlen(cstr), + }; +} + +struct apfl_string_view +apfl_string_view_from_string(struct apfl_string string) +{ + return (struct apfl_string_view) { + .bytes = string.bytes, + .len = string.len, + }; +} + +int +apfl_string_view_cmp(struct apfl_string_view a, struct apfl_string_view b) +{ + size_t n = a.len > b.len ? b.len : a.len; + int cmp = memcmp(a.bytes, b.bytes, n); + if (cmp != 0) { + return cmp; + } + if (a.len == b.len) { + return 0; + } + return a.len > b.len ? 1 : -1; +} + +void +apfl_string_deinit(struct apfl_string *string) +{ + free(string->bytes); + string->len = 0; + string->bytes = NULL; +} + +bool +apfl_string_copy(struct apfl_string *dst, struct apfl_string_view src) +{ + apfl_string_deinit(dst); + if ((dst->bytes = malloc(src.len)) == NULL) { + return false; + } + + memcpy(dst->bytes, src.bytes, src.len); + dst->len = src.len; + + return true; +} + +struct apfl_string +apfl_string_move(struct apfl_string *src) +{ + struct apfl_string out = *src; + src->bytes = NULL; + return out; +} + +void +apfl_string_builder_init(struct apfl_string_builder *builder) +{ + apfl_resizable_init((void **)&(builder->bytes), &(builder->len), &(builder->cap)); +} + +void +apfl_string_builder_deinit(struct apfl_string_builder *builder) +{ + free(builder->bytes); + apfl_string_builder_init(builder); +} + +static bool +append_bytes(struct apfl_string_builder *builder, const char *bytes, size_t len) +{ + return apfl_resizable_append( + sizeof(char), + APFL_RESIZABLE_ARGS(*builder, bytes), + bytes, + len + ); +} + +bool +apfl_string_builder_append(struct apfl_string_builder *builder, struct apfl_string_view view) +{ + return append_bytes(builder, view.bytes, view.len); +} + +bool +apfl_string_builder_append_byte(struct apfl_string_builder *builder, char byte) +{ + return append_bytes(builder, &byte, 1); +} + +struct apfl_string +apfl_string_builder_move_string(struct apfl_string_builder *builder) +{ + struct apfl_string str; + + str.bytes = builder->bytes; + str.len = builder->len; + + apfl_string_builder_init(builder); + + return str; +} diff --git a/src/test.h b/src/test.h new file mode 100644 index 0000000..90a5025 --- /dev/null +++ b/src/test.h @@ -0,0 +1,149 @@ +#ifndef APFL_TEST_H +#define APFL_TEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +struct testctx_struct { + jmp_buf *fataljmp; + const char *name; + const char *prefix; + bool ok; +}; + +typedef struct testctx_struct *testctx; + +typedef void (*testfunc)(testctx); + +struct testdef { + const char *name; + testfunc func; +}; + +void +test_fatal(testctx t) { + t->ok = false; + longjmp(*(t->fataljmp), 1); +} + +static void +test_vfailf(testctx t, const char* fmt, va_list varargs) { + t->ok = false; + + char* newfmt = malloc(strlen(fmt) + strlen(t->prefix) + 2); // +2: newline + terminating zero byte + if(newfmt == NULL) { + fprintf(stderr, "Could not print failure message in test '%s': could not allocate memory.\n", t->name); + test_fatal(t); + } + + strcpy(newfmt, t->prefix); + strcat(newfmt, fmt); + strcat(newfmt, "\n"); + + vfprintf(stderr, newfmt, varargs); + + free(newfmt); +} + +void +test_failf(testctx t, const char* fmt, ...) { + va_list varargs; + va_start(varargs, fmt); + test_vfailf(t, fmt, varargs); + va_end(varargs); +} + +void +test_fatalf(testctx t, const char* fmt, ...) { + va_list varargs; + va_start(varargs, fmt); + test_vfailf(t, fmt, varargs); + va_end(varargs); + test_fatal(t); +} + +#define TESTPREFIXSIZE 1024 + +bool +test_run_test(struct testdef test) +{ + char* prefix = malloc(TESTPREFIXSIZE); + testctx t = malloc(sizeof(struct testctx_struct)); + jmp_buf* here = malloc(sizeof(jmp_buf)); + if(prefix == NULL || t == NULL) { + fprintf(stderr, "Could not execute test '%s': could not allocate memory.\n", test.name); + free(t); + free(prefix); + free(here); + return false; + } + + snprintf(prefix, TESTPREFIXSIZE, "%s: ", test.name); + + if(setjmp(*here) == 0) { + t->fataljmp = here; + t->name = test.name; + t->prefix = prefix; + t->ok = true; + + test.func(t); + } + + bool ok = t->ok; + free(prefix); + free(t); + free(here); + + if(ok) { + printf("%s: \x1b[32mOK\x1b[0m\n", test.name); + } else { + printf("%s: \x1b[31mFAIL\x1b[0m\n", test.name); + } + + return ok; +} + +int +test_main(int argc, const char **argv, struct testdef tests[]) +{ + (void)argc; + (void)argv; + + bool allok = true; + + for (struct testdef *t = tests; t->name != NULL && t->func != NULL; t++) { + allok = test_run_test(*t) && allok; + } + + return allok ? 0 : 1; +} + +#define TESTS_BEGIN \ + int main(int argc, const char **argv) \ + { \ + return test_main(argc, argv, (struct testdef[]) { \ + +#define TESTS_END \ + {NULL, NULL}, \ + }); \ + } \ + +#define ADDTEST(name) {#name, name##_test} + +#define TEST(name, t) void name##_test(testctx t) + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..641def2 --- /dev/null +++ b/src/token.c @@ -0,0 +1,122 @@ +#include +#include +#include + +#include "apfl.h" + +static bool +has_text_data(enum apfl_token_type type) +{ + switch (type) { + case APFL_TOK_NAME: + case APFL_TOK_STRING: + case APFL_TOK_COMMENT: + return true; + default: + return false; + } +} + +static bool +has_numeric_data(enum apfl_token_type type) +{ + switch (type) { + case APFL_TOK_NUMBER: + return true; + default: + return false; + } +} + +void +apfl_token_deinit(struct apfl_token *token) +{ + if (has_text_data(token->type)) { + apfl_string_deinit(&token->text); + } +} + +const char * +apfl_token_type_name(enum apfl_token_type type) +{ + switch (type) { + case APFL_TOK_LPAREN: + return "("; + case APFL_TOK_RPAREN: + return ")"; + case APFL_TOK_LBRACKET: + return "["; + case APFL_TOK_RBRACKET: + return "]"; + case APFL_TOK_LBRACE: + return "{"; + case APFL_TOK_RBRACE: + return "}"; + case APFL_TOK_MAPSTO: + return "->"; + case APFL_TOK_EXPAND: + return "~"; + case APFL_TOK_DOT: + return "."; + case APFL_TOK_AT: + return "@"; + case APFL_TOK_SEMICOLON: + return ";"; + case APFL_TOK_LINEBREAK: + return "LINEBREAK"; + case APFL_TOK_CONTINUE_LINE: + return "\\"; + case APFL_TOK_COMMENT: + return "COMMENT"; + case APFL_TOK_COMMA: + return ","; + case APFL_TOK_QUESTION_MARK: + return "?"; + case APFL_TOK_STRINGIFY: + return "'"; + case APFL_TOK_ASSIGN: + return "="; + case APFL_TOK_LOCAL_ASSIGN: + return ":="; + case APFL_TOK_NUMBER: + return "NUMBER"; + case APFL_TOK_NAME: + return "NAME"; + case APFL_TOK_STRING: + return "STRING"; + } + + return "(unknown token)"; +} + +void +apfl_token_print(struct apfl_token token, FILE *file) +{ + if (has_text_data(token.type)) { + fprintf( + file, + "%s (" APFL_STR_FMT ") @ (%d:%d)\n", + apfl_token_type_name(token.type), + APFL_STR_FMT_ARGS(token.text), + token.position.line, + token.position.col + ); + } else if (has_numeric_data(token.type)) { + fprintf( + file, + "%s (%f) @ (%d:%d)\n", + apfl_token_type_name(token.type), + token.number, + token.position.line, + token.position.col + ); + } else { + fprintf( + file, + "%s @ (%d:%d)\n", + apfl_token_type_name(token.type), + token.position.line, + token.position.col + ); + } +} diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 0000000..99b36f8 --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,909 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "apfl.h" + +#define BUFSIZE 4096 +typedef int buf_offset; +static_assert(INT_MAX >= BUFSIZE, "BUFSIZE is too large for type buf_offset"); + +struct apfl_tokenizer { + apfl_source_reader_cb source_reader; + void *source_reader_context; + char *buf; + buf_offset buf_pos; + buf_offset buf_len; + + enum { + NM_REGULAR, + NM_NEGATIVE_NUMBER, + NM_MAPSTO, + NM_ASSIGN, + NM_EOF, + } next_mode; + struct apfl_position pos_for_mapsto; + char first_digit_for_negative_number; + + struct apfl_position position; + bool last_byte_was_linebreak; + + union { + struct apfl_token token; + struct apfl_error error; + }; +}; + +apfl_tokenizer_ptr +apfl_tokenizer_new(apfl_source_reader_cb source_reader, void *context) +{ + apfl_tokenizer_ptr tokenizer = malloc(sizeof(struct apfl_tokenizer)); + if (tokenizer == NULL) { + return NULL; + } + + tokenizer->source_reader = source_reader; + tokenizer->source_reader_context = context; + + if ((tokenizer->buf = malloc(BUFSIZE)) == NULL) { + free(tokenizer); + return NULL; + } + + tokenizer->buf_pos = 0; + tokenizer->buf_len = 0; + + tokenizer->position = (struct apfl_position) { + .line = 1, + .col = 0, // The first character was not yet read + }; + tokenizer->last_byte_was_linebreak = false; + + tokenizer->next_mode = NM_REGULAR; + + return tokenizer; +} + +void +apfl_tokenizer_destroy(apfl_tokenizer_ptr tokenizer) +{ + if (tokenizer == NULL) { + return; + } + + free(tokenizer->buf); + free(tokenizer); +} + +struct apfl_token +apfl_tokenizer_get_token(apfl_tokenizer_ptr tokenizer) +{ + return tokenizer->token; +} + +struct apfl_error +apfl_tokenizer_get_error(apfl_tokenizer_ptr tokenizer) +{ + return tokenizer->error; +} + +enum read_result { + RR_OK, + RR_ERR, + RR_EOF, +}; + +static enum read_result +read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need) +{ + if (tokenizer->buf_pos >= tokenizer->buf_len) { + size_t len = BUFSIZE; + + tokenizer->buf_pos = 0; + tokenizer->buf_len = 0; + + if (!tokenizer->source_reader(tokenizer->source_reader_context, tokenizer->buf, &len, need)) { + tokenizer->error.type = APFL_ERR_INPUT_ERROR; + return RR_ERR; + } + + tokenizer->buf_len = len; + + if (len == 0) { + return RR_EOF; + } + } + + if (tokenizer->last_byte_was_linebreak) { + tokenizer->position.line++; + tokenizer->position.col = 0; + } + + *byte = tokenizer->buf[tokenizer->buf_pos]; + tokenizer->buf_pos++; + + tokenizer->last_byte_was_linebreak = (*byte == '\n'); + tokenizer->position.col++; + + return RR_OK; +} + +// Only at most 1 unread_byte() call is allowed after a read_byte() call! +static void +unread_byte(apfl_tokenizer_ptr tokenizer, struct apfl_position pos) +{ + tokenizer->position = pos; + tokenizer->buf_pos--; + tokenizer->last_byte_was_linebreak = false; +} + +static enum apfl_parse_result +yield_simple_token( + apfl_tokenizer_ptr tokenizer, + enum apfl_token_type type, + struct apfl_position pos +) { + tokenizer->token.type = type; + tokenizer->token.position = pos; + return APFL_PARSE_OK; +} + +static enum apfl_parse_result comment(apfl_tokenizer_ptr); +static enum apfl_parse_result colon(apfl_tokenizer_ptr); +static enum apfl_parse_result string(apfl_tokenizer_ptr); +static enum apfl_parse_result maybe_name(apfl_tokenizer_ptr, bool, char); +static enum apfl_parse_result number(apfl_tokenizer_ptr, bool, struct apfl_position, char, bool); + +enum apfl_parse_result +apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need) +{ + switch (tokenizer->next_mode) { + case NM_REGULAR: + break; + case NM_MAPSTO: + tokenizer->next_mode = NM_REGULAR; + return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, tokenizer->pos_for_mapsto); + case NM_NEGATIVE_NUMBER: + tokenizer->next_mode = NM_REGULAR; + return number(tokenizer, need, tokenizer->position, tokenizer->first_digit_for_negative_number, true); + case NM_ASSIGN: + tokenizer->next_mode = NM_REGULAR; + return yield_simple_token(tokenizer, APFL_TOK_ASSIGN, tokenizer->position); + case NM_EOF: + return APFL_PARSE_EOF; + } + + char byte; + + for (;;) { + switch (read_byte(tokenizer, &byte, need)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + return APFL_PARSE_EOF; + } + + switch (byte) { + case '(': + return yield_simple_token(tokenizer, APFL_TOK_LPAREN, tokenizer->position); + case ')': + return yield_simple_token(tokenizer, APFL_TOK_RPAREN, tokenizer->position); + case '[': + return yield_simple_token(tokenizer, APFL_TOK_LBRACKET, tokenizer->position); + case ']': + return yield_simple_token(tokenizer, APFL_TOK_RBRACKET, tokenizer->position); + case '{': + return yield_simple_token(tokenizer, APFL_TOK_LBRACE, tokenizer->position); + case '}': + return yield_simple_token(tokenizer, APFL_TOK_RBRACE, tokenizer->position); + case '~': + return yield_simple_token(tokenizer, APFL_TOK_EXPAND, tokenizer->position); + case '.': + return yield_simple_token(tokenizer, APFL_TOK_DOT, tokenizer->position); + case '@': + return yield_simple_token(tokenizer, APFL_TOK_AT, tokenizer->position); + case ';': + return yield_simple_token(tokenizer, APFL_TOK_SEMICOLON, tokenizer->position); + case '\n': + return yield_simple_token(tokenizer, APFL_TOK_LINEBREAK, tokenizer->position); + case '\\': + return yield_simple_token(tokenizer, APFL_TOK_CONTINUE_LINE, tokenizer->position); + case ',': + return yield_simple_token(tokenizer, APFL_TOK_COMMA, tokenizer->position); + case '?': + return yield_simple_token(tokenizer, APFL_TOK_QUESTION_MARK, tokenizer->position); + case '\'': + return yield_simple_token(tokenizer, APFL_TOK_STRINGIFY, tokenizer->position); + case '#': + return comment(tokenizer); + case ':': + return colon(tokenizer); + case '"': + return string(tokenizer); + case ' ': + case '\r': + case '\t': + // Skip whitespace + break; + default: + if (isdigit(byte)) + return number(tokenizer, need, tokenizer->position, byte, false); + else + return maybe_name(tokenizer, need, byte); + } + } +} + +static enum apfl_parse_result +comment(apfl_tokenizer_ptr tokenizer) +{ + char byte; + + struct apfl_position pos = tokenizer->position; + struct apfl_position last_pos; + + struct apfl_string_builder text; + apfl_string_builder_init(&text); + + for (;;) { + last_pos = tokenizer->position; + + switch (read_byte(tokenizer, &byte, true)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->token = (struct apfl_token) { + .type = APFL_TOK_COMMENT, + .position = pos, + .text = apfl_string_builder_move_string(&text), + }; + return APFL_PARSE_OK; + } + + if (byte == '\n') { + unread_byte(tokenizer, last_pos); + + tokenizer->token = (struct apfl_token) { + .type = APFL_TOK_COMMENT, + .position = pos, + .text = apfl_string_builder_move_string(&text), + }; + return APFL_PARSE_OK; + } + + if (!apfl_string_builder_append_byte(&text, byte)) { + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return APFL_PARSE_ERROR; + } + } +} + +static enum apfl_parse_result +colon(apfl_tokenizer_ptr tokenizer) +{ + char byte; + struct apfl_position pos = tokenizer->position; + + switch (read_byte(tokenizer, &byte, true)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF }; + return APFL_PARSE_ERROR; + } + + if (byte != '=') { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_EXPECTED_EQ_AFTER_COLON, + .position = tokenizer->position, + }; + return APFL_PARSE_ERROR; + } + + return yield_simple_token(tokenizer, APFL_TOK_LOCAL_ASSIGN, pos); +} + +static enum apfl_parse_result +append_single_byte( + apfl_tokenizer_ptr tokenizer, + struct apfl_string_builder *text, + char byte +) { + if (!apfl_string_builder_append_byte(text, byte)) { + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return APFL_PARSE_ERROR; + } + + return APFL_PARSE_OK; +} + +static int +unhex(char byte) +{ + switch (byte) { + case '0': + return 0x0; + case '1': + return 0x1; + case '2': + return 0x2; + case '3': + return 0x3; + case '4': + return 0x4; + case '5': + return 0x5; + case '6': + return 0x6; + case '7': + return 0x7; + case '8': + return 0x8; + case '9': + return 0x9; + case 'a': + case 'A': + return 0xA; + case 'b': + case 'B': + return 0xB; + case 'c': + case 'C': + return 0xC; + case 'd': + case 'D': + return 0xD; + case 'e': + case 'E': + return 0xE; + case 'f': + case 'F': + return 0xF; + } + + return -1; +} + +static int +undec(char byte) +{ + switch (byte) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + case '8': + return 8; + case '9': + return 9; + } + + return -1; +} + +static int +unoct(char byte) +{ + switch (byte) { + case '0': + return 0; + case '1': + return 1; + case '2': + return 2; + case '3': + return 3; + case '4': + return 4; + case '5': + return 5; + case '6': + return 6; + case '7': + return 7; + } + + return -1; +} + +static int +unbin(char byte) +{ + switch (byte) { + case '0': + return 0; + case '1': + return 1; + } + + return -1; +} + +static enum apfl_parse_result +hex_escape( + apfl_tokenizer_ptr tokenizer, + struct apfl_string_builder *text +) { + char escaped_byte = 0; + + for (int i = 0; i < 2; i++) { + char byte; + + switch (read_byte(tokenizer, &byte, true)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF }; + return APFL_PARSE_ERROR; + } + + int nibble = unhex(byte); + if (nibble < 0) { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE, + .position = tokenizer->position, + }; + return APFL_PARSE_ERROR; + } + + escaped_byte <<= 4; + escaped_byte |= 0xF & nibble; + } + + return append_single_byte(tokenizer, text, escaped_byte); +} + +static enum apfl_parse_result +escape_sequence(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text) +{ + struct apfl_position pos = tokenizer->position; + + char byte; + + switch (read_byte(tokenizer, &byte, true)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF }; + return APFL_PARSE_ERROR; + } + + switch (byte) { + case 'x': + case 'X': + return hex_escape(tokenizer, text); + // case 'u': + // case 'U': + // return unicode_escape(tokenizer, pos, text); + case '\\': + return append_single_byte(tokenizer, text, '\\'); + case 'n': + return append_single_byte(tokenizer, text, '\n'); + case 'r': + return append_single_byte(tokenizer, text, '\r'); + case 't': + return append_single_byte(tokenizer, text, '\t'); + case '"': + return append_single_byte(tokenizer, text, '"'); + case '0': + return append_single_byte(tokenizer, text, 0); + default: + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_INVALID_ESCAPE_SEQUENCE, + .position = pos, + .byte = byte, + }; + return APFL_PARSE_ERROR; + } +} + +static enum apfl_parse_result +inner_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text) +{ + struct apfl_position pos = tokenizer->position; + + char byte; + + enum apfl_parse_result subresult; + + for (;;) { + switch (read_byte(tokenizer, &byte, true)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF }; + return APFL_PARSE_ERROR; + } + + switch (byte) { + case '"': + tokenizer->token = (struct apfl_token) { + .type = APFL_TOK_STRING, + .position = pos, + .text = apfl_string_builder_move_string(text), + }; + return APFL_PARSE_OK; + case '\\': + if ((subresult = escape_sequence(tokenizer, text)) != APFL_PARSE_OK) { + return subresult; + } + break; + default: + if (!apfl_string_builder_append_byte(text, byte)) { + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return APFL_PARSE_ERROR; + } + } + } +} + +static enum apfl_parse_result +string(apfl_tokenizer_ptr tokenizer) +{ + struct apfl_string_builder text; + apfl_string_builder_init(&text); + + enum apfl_parse_result out = inner_string(tokenizer, &text); + + apfl_string_builder_deinit(&text); + + return out; +} + +static enum apfl_parse_result +finalize_maybe_name( + apfl_tokenizer_ptr tokenizer, + struct apfl_string_builder *text, + struct apfl_position pos +) { + assert(text->len > 0); + + if (text->len == 1 && text->bytes[0] == '=') { + tokenizer->token = (struct apfl_token) { + .type = APFL_TOK_ASSIGN, + .position = pos, + }; + } else { + tokenizer->token = (struct apfl_token) { + .type = APFL_TOK_NAME, + .position = pos, + .text = apfl_string_builder_move_string(text), + }; + } + + return APFL_PARSE_OK; +} + +static bool +is_word_byte(unsigned char byte) +{ + return isalnum(byte) || byte > 0x7F; +} + +static enum apfl_parse_result +maybe_name_inner( + apfl_tokenizer_ptr tokenizer, + bool need, + char byte, + struct apfl_string_builder *text +) { + struct apfl_position pos = tokenizer->position; + struct apfl_position last_pos; + char last_byte; + + if (!apfl_string_builder_append_byte(text, byte)) { + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return APFL_PARSE_ERROR; + } + + for (;;) { + last_byte = byte; + last_pos = tokenizer->position; + + switch (read_byte(tokenizer, &byte, need)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + return finalize_maybe_name(tokenizer, text, pos); + } + + switch (byte) { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '~': + case '.': + case '@': + case ';': + case '\n': + case '\\': + case ',': + case '?': + case '\'': + case '#': + case ':': + case '"': + case ' ': + case '\r': + case '\t': + unread_byte(tokenizer, last_pos); + return finalize_maybe_name(tokenizer, text, pos); + case '=': + if (is_word_byte(last_byte)) { + tokenizer->next_mode = NM_ASSIGN; + return finalize_maybe_name(tokenizer, text, pos); + } + + break; + case '>': + if (last_byte == '-') { + text->len--; // This removes the '-' from the end of text + if (text->len == 0) { + return yield_simple_token(tokenizer, APFL_TOK_MAPSTO, last_pos); + } + + tokenizer->next_mode = NM_MAPSTO; + tokenizer->pos_for_mapsto = last_pos; + return finalize_maybe_name(tokenizer, text, pos); + } + + break; + default: + if (isdigit(byte) && last_byte == '-') { + text->len--; // This removes the '-' from the end of text + + if (text->len == 0) { + return number(tokenizer, need, pos, byte, true); + } + + tokenizer->next_mode = NM_NEGATIVE_NUMBER; + tokenizer->first_digit_for_negative_number = byte; + return finalize_maybe_name(tokenizer, text, pos); + } + + break; + } + + if (!apfl_string_builder_append_byte(text, byte)) { + tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED); + return APFL_PARSE_ERROR; + } + } +} + +static enum apfl_parse_result +maybe_name(apfl_tokenizer_ptr tokenizer, bool need, char first_byte) +{ + struct apfl_string_builder text; + apfl_string_builder_init(&text); + + enum apfl_parse_result out = maybe_name_inner(tokenizer, need, first_byte, &text); + + apfl_string_builder_deinit(&text); + + return out; +} + +static struct apfl_token +build_number_token(double number, struct apfl_position position, bool negative) +{ + if (negative) { + number *= -1; + } + + return (struct apfl_token) { + .type = APFL_TOK_NUMBER, + .position = position, + .number = (apfl_number)number, + }; +} + +static enum apfl_parse_result +non_decimal_number( + apfl_tokenizer_ptr tokenizer, + bool need, + struct apfl_position position, + bool negative, + int shift, + int (*byte_to_digit)(char)) +{ + struct apfl_position last_pos; + bool no_digits_yet = true; + char byte; + + uint64_t num = 0; + + for (;;) { + last_pos = tokenizer->position; + switch (read_byte(tokenizer, &byte, no_digits_yet || need)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + if (no_digits_yet) { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_EOF, + }; + return APFL_PARSE_ERROR; + } else { + tokenizer->token = build_number_token((double)num, position, negative); + return APFL_PARSE_OK; + } + } + + int digit = byte_to_digit(byte); + if (digit >= 0) { + num <<= shift; + num |= digit; + + no_digits_yet = false; + continue; + } + + if (no_digits_yet) { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_EXPECTED_DIGIT, + .position = tokenizer->position, + }; + return APFL_PARSE_ERROR; + } + + if (is_word_byte(byte)) { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER, + .position = tokenizer->position, + .byte = byte, + }; + return APFL_PARSE_ERROR; + } + + unread_byte(tokenizer, last_pos); + tokenizer->token = build_number_token((double)num, position, negative); + return APFL_PARSE_OK; + } +} + +#define BUILD_NON_DECIMAL_TOKENIZER(name, shift, byte_to_digit) \ + static enum apfl_parse_result \ + name( \ + apfl_tokenizer_ptr tokenizer, \ + bool need, \ + struct apfl_position position, \ + bool negative \ + ) { \ + return non_decimal_number( \ + tokenizer, \ + need, \ + position, \ + negative, \ + shift, \ + byte_to_digit \ + ); \ + } + +BUILD_NON_DECIMAL_TOKENIZER(hex_number, 4, unhex) +BUILD_NON_DECIMAL_TOKENIZER(oct_number, 3, unoct) +BUILD_NON_DECIMAL_TOKENIZER(bin_number, 1, unbin) + +static enum apfl_parse_result +number( + apfl_tokenizer_ptr tokenizer, + bool need, + struct apfl_position position, + char first_digit, + bool negative +) { + double num = (double)undec(first_digit); + double divider = 1; + bool first_iteration = true; + bool seen_dot = false; + struct apfl_position last_pos; + + for (;; first_iteration = false) { + char byte; + + last_pos = tokenizer->position; + switch (read_byte(tokenizer, &byte, need)) { + case RR_OK: + break; + case RR_ERR: + return APFL_PARSE_ERROR; + case RR_EOF: + tokenizer->next_mode = NM_EOF; + tokenizer->token = build_number_token(num / divider, position, negative); + return APFL_PARSE_OK; + } + + if (first_iteration && first_digit == '0') { + switch (byte) { + case 'x': + case 'X': + return hex_number(tokenizer, need, position, negative); + case 'b': + case 'B': + return bin_number(tokenizer, need, position, negative); + case 'o': + case 'O': + return oct_number(tokenizer, need, position, negative); + } + } + + int digit = undec(byte); + if (digit >= 0) { + num *= 10; + num += (double)digit; + + if (seen_dot) { + divider *= 10; + } + + continue; + } + + if (byte == '.') { + if (seen_dot) { + unread_byte(tokenizer, last_pos); + tokenizer->token = build_number_token(num / divider, position, negative); + return APFL_PARSE_OK; + } else { + seen_dot = true; + continue; + } + } + + if (is_word_byte(byte)) { + tokenizer->error = (struct apfl_error) { + .type = APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER, + .position = tokenizer->position, + .byte = byte, + }; + return APFL_PARSE_ERROR; + } + + unread_byte(tokenizer, last_pos); + tokenizer->token = build_number_token(num / divider, position, negative); + return APFL_PARSE_OK; + } +} diff --git a/src/tokenizer_test.c b/src/tokenizer_test.c new file mode 100644 index 0000000..2e7e8f6 --- /dev/null +++ b/src/tokenizer_test.c @@ -0,0 +1,284 @@ +#include + +#include "apfl.h" + +#include "test.h" + +struct string_src_reader_ctx { + char *text; + char *remain_text; + size_t remain_len; +}; + +static void * +must_alloc(testctx t, size_t size) +{ + void *out = malloc(size); + if (out == NULL) { + test_fatalf(t, "Failed allocating %d bytes of memory", size); + } + return out; +} + +static bool +string_src_reader(void *_ctx, char *buf, size_t *len, bool need) +{ + (void)need; + + struct string_src_reader_ctx *ctx = _ctx; + + size_t maxlen = *len; + *len = maxlen < ctx->remain_len ? maxlen : ctx->remain_len; + memcpy(buf, ctx->remain_text, *len); + ctx->remain_text += *len; + assert(*len <= ctx->remain_len); + ctx->remain_len -= *len; + + return true; +} + +struct tokenizer_test { + testctx t; + apfl_tokenizer_ptr tokenizer; + struct string_src_reader_ctx *ctx; +}; + +static struct tokenizer_test * +new_tokenizer_test(testctx t, const char *text) +{ + struct string_src_reader_ctx *ctx = must_alloc(t, sizeof(struct string_src_reader_ctx)); + ctx->remain_len = strlen(text); + ctx->text = must_alloc(t, ctx->remain_len + 1); + strcpy(ctx->text, text); + ctx->remain_text = ctx->text; + + apfl_tokenizer_ptr tokenizer = apfl_tokenizer_new(string_src_reader, ctx); + if (tokenizer == NULL) { + test_fatalf(t, "Failed to initialize the tokenizer"); + } + + struct tokenizer_test *tt = must_alloc(t, sizeof(struct tokenizer_test)); + + *tt = (struct tokenizer_test) { + .t = t, + .tokenizer = tokenizer, + .ctx = ctx, + }; + + return tt; +} + +static void +destroy_tokenizer_test(struct tokenizer_test *tt) +{ + free(tt->ctx->text); + free(tt->ctx); + apfl_tokenizer_destroy(tt->tokenizer); + free(tt); +} + +static void +expect_eof(struct tokenizer_test *tt) +{ + switch (apfl_tokenizer_next(tt->tokenizer, false)) { + case APFL_PARSE_OK: + test_fatalf(tt->t, "Expected EOF but got a token"); + break; + case APFL_PARSE_EOF: + break; + case APFL_PARSE_ERROR: + test_failf(tt->t, "Got an error instead of an EOF"); + apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr); + test_fatal(tt->t); + break; + } +} + +static bool +expect_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, struct apfl_token *tok) +{ + switch (apfl_tokenizer_next(tt->tokenizer, false)) { + case APFL_PARSE_OK: + break; + case APFL_PARSE_EOF: + test_fatalf(tt->t, "Got an EOF instead of a token"); + break; + case APFL_PARSE_ERROR: + test_failf(tt->t, "Got an error instead of a token"); + apfl_error_print(apfl_tokenizer_get_error(tt->tokenizer), stderr); + test_fatal(tt->t); + break; + } + + *tok = apfl_tokenizer_get_token(tt->tokenizer); + if (tok->type != type) { + test_failf( + tt->t, + "Got wrong token type %s (wanted %s)", + apfl_token_type_name(tok->type), + apfl_token_type_name(type) + ); + apfl_token_deinit(tok); + + return false; + } + + if (tok->position.line != line || tok->position.col != col) { + test_failf(tt->t, "Got token at wrong position %d:%d (wanted %d:%d)", tok->position.line, tok->position.col, line, col); + } + + return true; +} + +static void +expect_simple_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type) +{ + struct apfl_token tok; + if (expect_token(tt, line, col, type, &tok)) { + apfl_token_deinit(&tok); + } +} + +static void +expect_text_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, const char *text) +{ + struct apfl_token tok; + if (expect_token(tt, line, col, type, &tok)) { + if (apfl_string_cmp(text, tok.text) != 0) { + test_failf(tt->t, "Token has wrong content. have=\"" APFL_STR_FMT "\", want=\"%s\"", APFL_STR_FMT_ARGS(tok.text), text); + } + apfl_token_deinit(&tok); + } +} + +static void +expect_number_token(struct tokenizer_test *tt, int line, int col, enum apfl_token_type type, apfl_number num) +{ + struct apfl_token tok; + if (expect_token(tt, line, col, type, &tok)) { + if (tok.number != num) { + test_failf(tt->t, "Token has wrong content. have=%f, want=%f", tok.number, num); + } + apfl_token_deinit(&tok); + } +} + +TEST(empty, t) { + struct tokenizer_test *tt = new_tokenizer_test(t, ""); + + expect_eof(tt); + + destroy_tokenizer_test(tt); +} + +TEST(simple_variable, t) { + struct tokenizer_test *tt = new_tokenizer_test(t, "hello"); + + expect_text_token(tt, 1, 1, APFL_TOK_NAME, "hello"); + expect_eof(tt); + + destroy_tokenizer_test(tt); +} + +TEST(numbers, t) { + struct tokenizer_test *tt = new_tokenizer_test(t, + // 1 2 + // 12345678901234567890123456789 + "0 1 -1 1.5 -2.25 666 0xfe 0o15" + ); + + expect_number_token(tt, 1, 1, APFL_TOK_NUMBER, 0); + expect_number_token(tt, 1, 3, APFL_TOK_NUMBER, 1); + expect_number_token(tt, 1, 5, APFL_TOK_NUMBER, -1); + expect_number_token(tt, 1, 8, APFL_TOK_NUMBER, 1.5); + expect_number_token(tt, 1, 12, APFL_TOK_NUMBER, -2.25); + expect_number_token(tt, 1, 18, APFL_TOK_NUMBER, 666); + expect_number_token(tt, 1, 22, APFL_TOK_NUMBER, 0xfe); + expect_number_token(tt, 1, 27, APFL_TOK_NUMBER, 015); + expect_eof(tt); + + destroy_tokenizer_test(tt); +} + +TEST(names, t) { + struct tokenizer_test *tt = new_tokenizer_test(t, "foo bar --->-->-> Δv == a= x12=x+="); + + expect_text_token (tt, 1, 1, APFL_TOK_NAME, "foo"); + expect_text_token (tt, 1, 5, APFL_TOK_NAME, "bar"); + expect_text_token (tt, 1, 9, APFL_TOK_NAME, "--"); + expect_simple_token(tt, 1, 11, APFL_TOK_MAPSTO); + expect_text_token (tt, 1, 13, APFL_TOK_NAME, "-"); + expect_simple_token(tt, 1, 14, APFL_TOK_MAPSTO); + expect_simple_token(tt, 1, 16, APFL_TOK_MAPSTO); + expect_text_token (tt, 1, 19, APFL_TOK_NAME, "Δv"); + expect_text_token (tt, 1, 23, APFL_TOK_NAME, "=="); + expect_text_token (tt, 1, 26, APFL_TOK_NAME, "a"); + expect_simple_token(tt, 1, 27, APFL_TOK_ASSIGN); + expect_text_token (tt, 1, 29, APFL_TOK_NAME, "x12"); + expect_simple_token(tt, 1, 32, APFL_TOK_ASSIGN); + + destroy_tokenizer_test(tt); +} + +TEST(all_tokens, t) { + struct tokenizer_test *tt = new_tokenizer_test(t, + // 1234567 + "# test\n" + // 1 2345 678901234567 + "\"abc\" def g-h*=i\n" + // 123456789012345678901234567890 + "1234.5 -10 0x2A 0b101010 0o52\n" + // 12345678901 2 + "'foo ;; , \\\n" + // 1234567890123456 + "@ . ? ~ -> = :=\n" + // 123456 + "({[]})" + ); + + expect_text_token (tt, 1, 1, APFL_TOK_COMMENT, " test"); + expect_simple_token(tt, 1, 7, APFL_TOK_LINEBREAK); + expect_text_token (tt, 2, 1, APFL_TOK_STRING, "abc"); + expect_text_token (tt, 2, 7, APFL_TOK_NAME, "def"); + expect_text_token (tt, 2, 11, APFL_TOK_NAME, "g-h*=i"); + expect_simple_token(tt, 2, 17, APFL_TOK_LINEBREAK); + expect_number_token(tt, 3, 1, APFL_TOK_NUMBER, 1234.5); + expect_number_token(tt, 3, 8, APFL_TOK_NUMBER, -10); + expect_number_token(tt, 3, 12, APFL_TOK_NUMBER, 42); + expect_number_token(tt, 3, 17, APFL_TOK_NUMBER, 42); + expect_number_token(tt, 3, 26, APFL_TOK_NUMBER, 42); + expect_simple_token(tt, 3, 30, APFL_TOK_LINEBREAK); + expect_simple_token(tt, 4, 1, APFL_TOK_STRINGIFY); + expect_text_token (tt, 4, 2, APFL_TOK_NAME, "foo"); + expect_simple_token(tt, 4, 6, APFL_TOK_SEMICOLON); + expect_simple_token(tt, 4, 7, APFL_TOK_SEMICOLON); + expect_simple_token(tt, 4, 9, APFL_TOK_COMMA); + expect_simple_token(tt, 4, 11, APFL_TOK_CONTINUE_LINE); + expect_simple_token(tt, 4, 12, APFL_TOK_LINEBREAK); + expect_simple_token(tt, 5, 1, APFL_TOK_AT); + expect_simple_token(tt, 5, 3, APFL_TOK_DOT); + expect_simple_token(tt, 5, 5, APFL_TOK_QUESTION_MARK); + expect_simple_token(tt, 5, 7, APFL_TOK_EXPAND); + expect_simple_token(tt, 5, 9, APFL_TOK_MAPSTO); + expect_simple_token(tt, 5, 12, APFL_TOK_ASSIGN); + expect_simple_token(tt, 5, 14, APFL_TOK_LOCAL_ASSIGN); + expect_simple_token(tt, 5, 16, APFL_TOK_LINEBREAK); + expect_simple_token(tt, 6, 1, APFL_TOK_LPAREN); + expect_simple_token(tt, 6, 2, APFL_TOK_LBRACE); + expect_simple_token(tt, 6, 3, APFL_TOK_LBRACKET); + expect_simple_token(tt, 6, 4, APFL_TOK_RBRACKET); + expect_simple_token(tt, 6, 5, APFL_TOK_RBRACE); + expect_simple_token(tt, 6, 6, APFL_TOK_RPAREN); + + expect_eof(tt); + + destroy_tokenizer_test(tt); +} + +TESTS_BEGIN + ADDTEST(empty), + ADDTEST(simple_variable), + ADDTEST(numbers), + ADDTEST(names), + ADDTEST(all_tokens), +TESTS_END diff --git a/src/value.h b/src/value.h new file mode 100644 index 0000000..e69de29