From 2e7fefc7f7a50e8b6edd23667eeb884cc627872b Mon Sep 17 00:00:00 2001 From: Laria Carolin Chabowski Date: Tue, 7 Mar 2023 21:51:40 +0100 Subject: [PATCH] Implement some string manipulation functions --- src/CMakeLists.txt | 1 + src/builtins.c | 31 ++++++++++++++ src/functional-tests/string-manip.at | 42 ++++++++++++++++++ src/globals.apfl | 64 ++++++++++++++++++++++++++++ src/strings.c | 5 +++ 5 files changed, 143 insertions(+) create mode 100644 src/functional-tests/string-manip.at diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 130b2c8..aaa4b3d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -112,6 +112,7 @@ functionaltest("compare") functionaltest("concat") functionaltest("join") functionaltest("quine") +functionaltest("string-manip") install(TARGETS apfl DESTINATION lib) install(TARGETS apfl-bin DESTINATION bin) diff --git a/src/builtins.c b/src/builtins.c index 77787cd..58355de 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -576,6 +576,35 @@ set_func_name(apfl_ctx ctx) apfl_set_func_name(ctx, -2, -1); } +static void +substring(apfl_ctx ctx) +{ + apfl_get_list_member_by_index(ctx, 0, 0); + struct apfl_string_view sv = apfl_get_string(ctx, -1); + apfl_get_list_member_by_index(ctx, 0, 1); + apfl_number start = apfl_get_number(ctx, -1); + apfl_get_list_member_by_index(ctx, 0, 2); + apfl_number len = apfl_get_number(ctx, -1); + + apfl_push_string_view_copy(ctx, apfl_string_view_substr(sv, (size_t)start, (size_t)len)); +} + +static void +stringsearch(apfl_ctx ctx) +{ + apfl_get_list_member_by_index(ctx, 0, 0); + struct apfl_string_view haystack = apfl_get_string(ctx, -1); + apfl_get_list_member_by_index(ctx, 0, 1); + struct apfl_string_view needle = apfl_get_string(ctx, -1); + + ptrdiff_t off = apfl_string_view_search(haystack, needle); + if (off < 0) { + apfl_push_nil(ctx); + } else { + apfl_push_number(ctx, (apfl_number)off); + } +} + static bool iterate_dict_callback(apfl_ctx ctx, void *opaque) { @@ -646,5 +675,7 @@ apfl_builtins(apfl_ctx ctx) add_builtin(ctx, "-serialize-bytecode", serialize_bytecode); add_builtin(ctx, "-unserialize-bytecode", unserialize_bytecode); add_builtin(ctx, "set-func-name", set_func_name); + add_builtin(ctx, "substring", substring); + add_builtin(ctx, "stringsearch", stringsearch); add_builtin(ctx, "iterate-dict", iterate_dict); } diff --git a/src/functional-tests/string-manip.at b/src/functional-tests/string-manip.at new file mode 100644 index 0000000..6e6c31a --- /dev/null +++ b/src/functional-tests/string-manip.at @@ -0,0 +1,42 @@ +===== script ===== +print (substr 0 3 "foobar") +print (substr 3 "foobar") +print (substr 1 3 "foobar") +print (substr -1 "foobar") +print (substr 6 "foobar") +print (substr 10 "foobar") + +print (strsearch "a" "aaababcabcde") +print (strsearch "ab" "aaababcabcde") +print (strsearch "x" "aaababcabcde") +print (strsearch "ab" 3 "aaababcabcde") + +print (split "::" "foo:bar:::baz::::xxx::::y::z::") +print (split "::" 3 "foo:bar:::baz::::xxx::::y::z::") + +===== output ===== +foo +bar +oob +r + + +0 +2 +nil +4 +[ + "foo:bar" + ":baz" + "" + "xxx" + "" + "y" + "z" + "" +] +[ + "foo:bar" + ":baz" + "::xxx::::y::z::" +] diff --git a/src/globals.apfl b/src/globals.apfl index 94b28a4..9bc6ede 100644 --- a/src/globals.apfl +++ b/src/globals.apfl @@ -99,6 +99,65 @@ has pred == y } + identity := { x -> x } + + is := -named 'is (partial has identity) + + -raw-substring := builtins.substring + substr := { + start s -> + substr start (len s) s + start?(is < 0) end s -> + substr (+ start (len s)) end s + start end?(is < 0) s -> + substr start (+ end (- (len s) start)) s + start end s -> + -raw-substring s start end + } + + -raw-stringsearch := builtins.stringsearch + strsearch := { + needle haystack -> + -raw-stringsearch haystack needle + needle start?(is < 0) haystack -> + strsearch needle (+ start (len s)) haystack + needle start haystack -> + off := (strsearch needle (substr start haystack)) + if (== off nil) { nil } { + start off } + } + + split := ({ + split-aux := { maxlen-reached sep s -> + sep-len := len sep + parts := [] + loop { + if (maxlen-reached (+ 1 (len parts))) { + parts = [~parts s] + false + } { + off := strsearch sep s + if (== nil off) { + parts = [~parts s] + false + } { + parts = [~parts (substr 0 off s)] + s = substr (+ off sep-len) s + true + } + } + } + + parts + } + + split := { + sep s -> + split-aux {false} sep s + sep maxlen s -> + split-aux (is >= maxlen) sep s + } + }) + keach := { d?(has type 'dict) body -> out := nil @@ -155,9 +214,14 @@ '-serialize-bytecode -> -serialize-bytecode '-unserialize-bytecode -> -unserialize-bytecode '& -> & + 'substr -> substr + 'strsearch -> strsearch + 'split -> split 'partial -> partial 'compose -> compose 'has -> has + 'identity -> identity + 'is -> is '!= -> != '!> -> !> '!< -> !< diff --git a/src/strings.c b/src/strings.c index 0abd0f9..b2eba29 100644 --- a/src/strings.c +++ b/src/strings.c @@ -80,6 +80,11 @@ bool apfl_string_copy(struct apfl_allocator allocator, struct apfl_string *dst, struct apfl_string_view src) { apfl_string_deinit(allocator, dst); + + if (src.len == 0) { + return true; + } + if ((dst->bytes = ALLOC_BYTES(allocator, src.len)) == NULL) { return false; }