From 97f5986781566517547b3c36079f81116835540e Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Mon, 3 Jul 2023 23:33:19 +0200 Subject: [PATCH] Implement tonumber --- src/CMakeLists.txt | 2 + src/builtins.c | 71 +++++++++++++++++++++ src/functional-tests/tonumber.at | 14 +++++ src/globals.apfl | 10 +++ src/numparse.c | 103 +++++++++++++++++++++++++++++++ src/parsing.h | 30 +++++++++ src/tokenizer.c | 7 +-- 7 files changed, 231 insertions(+), 6 deletions(-) create mode 100644 src/functional-tests/tonumber.at create mode 100644 src/numparse.c create mode 100644 src/parsing.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c27df5b..f7db914 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ set(commonfiles hashmap.c io.c messages.c + numparse.c parser.c position.c resizable.c @@ -119,6 +120,7 @@ functionaltest("pairs") functionaltest("symbols") functionaltest("get-optional") functionaltest("has-key") +functionaltest("tonumber") install(TARGETS apfl DESTINATION lib) install(TARGETS apfl-bin DESTINATION bin) diff --git a/src/builtins.c b/src/builtins.c index 954224e..f48a559 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -10,6 +10,7 @@ #include "bytecode.h" #include "context.h" #include "modules.h" +#include "parsing.h" #include "scope.h" #define TRY_FORMAT(ctx, x) \ @@ -284,6 +285,75 @@ tostring(apfl_ctx ctx) apfl_tostring(ctx, -1); } +struct numparse_data { + struct apfl_string_view sv; + size_t off; +}; + +static enum read_result +numparse_read(void *opaque, unsigned char *b) +{ + struct numparse_data *data = opaque; + if (data->off >= data->sv.len) { + return RR_EOF; + } + + *b = data->sv.bytes[data->off]; + data->off++; + return RR_OK; +} + +static void +numparse_unread(void *opaque) +{ + struct numparse_data *data = opaque; + assert(data->off > 0); + data->off--; +} + +static void +tonumber(apfl_ctx ctx) +{ + (void)ctx; + if (apfl_len(ctx, 0) != 2) { + apfl_raise_const_error(ctx, "tonumber needs exactly 2 arguments"); + } + + apfl_get_list_member_by_index(ctx, 0, 0); + unsigned base = (unsigned)apfl_get_number(ctx, -1); + apfl_get_list_member_by_index(ctx, 0, 1); + struct apfl_string_view sv = apfl_get_string(ctx, -1); + apfl_drop(ctx, 0); + + if (base < 2 || base > 36) { + apfl_raise_const_error(ctx, "base must be between 2 and 36"); + } + + bool negative = false; + if (sv.len > 0 && sv.bytes[0] == '-') { + negative = true; + sv = apfl_string_view_offset(sv, 1); + } + + apfl_number number = 0; + struct numparse_data data = { + .sv = sv, + .off = 0, + }; + bool ok = apfl_parse_number(base, numparse_read, numparse_unread, &data, &number); + assert(ok); + + if (data.off != sv.len) { + apfl_raise_const_error(ctx, "Can not parse as number"); + } + + if (negative) { + number *= -1; + } + + apfl_push_number(ctx, number); +} + static void not(apfl_ctx ctx) { @@ -780,4 +850,5 @@ apfl_builtins(apfl_ctx ctx) add_builtin(ctx, "getsym-Some", apfl_sym_some); add_builtin(ctx, "get-argv", get_argv); add_builtin(ctx, "cmod-searcher", cmod_searcher); + add_builtin(ctx, "tonumber", tonumber); } diff --git a/src/functional-tests/tonumber.at b/src/functional-tests/tonumber.at new file mode 100644 index 0000000..b35427e --- /dev/null +++ b/src/functional-tests/tonumber.at @@ -0,0 +1,14 @@ +===== script ===== +print (tonumber 2 "101010") +print (tonumber 10 "123.45") +print (tonumber 16 "2a") +print (tonumber 16 "2A") +print (tonumber 16 "-2A") +print (tonumber 36 "cool") +===== output ===== +42 +123.45 +42 +42 +-42 +591861 diff --git a/src/globals.apfl b/src/globals.apfl index e11883b..3d64923 100644 --- a/src/globals.apfl +++ b/src/globals.apfl @@ -238,6 +238,15 @@ ((load-file f)) } + tonumber := { + x -> + tonumber 10 x + 10 x?(has type 'number) -> + x + base x -> + builtins.tonumber base (tostring x) + } + modules := ({ loaded-modules := [->] searchers := [] @@ -309,6 +318,7 @@ 'dump -> dump 'disasm -> disasm 'tostring -> tostring + 'tonumber -> tonumber 'not -> not 'len -> len 'type -> type diff --git a/src/numparse.c b/src/numparse.c new file mode 100644 index 0000000..ff29690 --- /dev/null +++ b/src/numparse.c @@ -0,0 +1,103 @@ +#include + +#include "apfl.h" + +#include "parsing.h" + +static int +byte_to_digit(unsigned char b) +{ + switch (b) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'a': case 'A': return 10; + case 'b': case 'B': return 11; + case 'c': case 'C': return 12; + case 'd': case 'D': return 13; + case 'e': case 'E': return 14; + case 'f': case 'F': return 15; + case 'g': case 'G': return 16; + case 'h': case 'H': return 17; + case 'i': case 'I': return 18; + case 'j': case 'J': return 19; + case 'k': case 'K': return 20; + case 'l': case 'L': return 21; + case 'm': case 'M': return 22; + case 'n': case 'N': return 23; + case 'o': case 'O': return 24; + case 'p': case 'P': return 25; + case 'q': case 'Q': return 26; + case 'r': case 'R': return 27; + case 's': case 'S': return 28; + case 't': case 'T': return 29; + case 'u': case 'U': return 30; + case 'v': case 'V': return 31; + case 'w': case 'W': return 32; + case 'x': case 'X': return 33; + case 'y': case 'Y': return 34; + case 'z': case 'Z': return 35; + + default: + return -1; + } +} + +bool +apfl_parse_number( + unsigned base, + enum read_result (*read)(void *, unsigned char *), + void (*unread_last)(void *), + void *opaque, + apfl_number *restrict out +) { + assert(2 <= base && base <= 36); + *out = 0; + apfl_number divisor = 1; + bool seen_period = false; + + unsigned char b; + for (;;) { + switch (read(opaque, &b)) { + case RR_OK: + break; + case RR_ERR: + return false; + case RR_EOF: + goto finalize; + } + + if (b == '.') { + if (seen_period) { + unread_last(opaque); + goto finalize; + } + + seen_period = true; + continue; + } + + int digit = byte_to_digit(b); + if (digit < 0 || (unsigned)digit >= base) { + unread_last(opaque); + goto finalize; + } + + *out *= base; + *out += digit; + if (seen_period) { + divisor *= base; + } + } + +finalize: + *out /= divisor; + return true; +} diff --git a/src/parsing.h b/src/parsing.h new file mode 100644 index 0000000..91fbf27 --- /dev/null +++ b/src/parsing.h @@ -0,0 +1,30 @@ +#ifndef APFL_PARSING_H +#define APFL_PARSING_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "apfl.h" + +enum read_result { + RR_OK, + RR_ERR, + RR_EOF, +}; + +bool apfl_parse_number( + unsigned base, + enum read_result (*read)(void *, unsigned char *), + void (*unread_last)(void *), + void *opaque, + apfl_number *restrict out +); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/tokenizer.c b/src/tokenizer.c index fcf932d..a4657d5 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -9,6 +9,7 @@ #include "apfl.h" #include "alloc.h" +#include "parsing.h" #define BUFSIZE 4096 typedef int buf_offset; @@ -93,12 +94,6 @@ apfl_tokenizer_get_error(apfl_tokenizer_ptr tokenizer) return tokenizer->error; } -enum read_result { - RR_OK, - RR_ERR, - RR_EOF, -}; - static enum read_result read_byte(apfl_tokenizer_ptr tokenizer, char *byte, bool need) {