Add regex module "re"
This uses the PCRE2 library to implement regexes in apfl
This commit is contained in:
parent
97f5986781
commit
63d64b0778
13 changed files with 1049 additions and 43 deletions
|
|
@ -4,6 +4,11 @@ set(CMAKE_C_EXTENSIONS OFF)
|
|||
option(BUILD_SHARED_LIBS "Build dynamic / shared libraries" ON)
|
||||
option(TEST_WITH_VALGRIND_MEMCHECK "Also run tests with valgrind / memcheck" ON)
|
||||
|
||||
include(FindPkgConfig)
|
||||
|
||||
pkg_check_modules(PCRE2 REQUIRED libpcre2-8)
|
||||
|
||||
include_directories(${PCRE2_INCLUDE_DIRS})
|
||||
set(commonfiles
|
||||
alloc.c
|
||||
bytecode.c
|
||||
|
|
@ -32,13 +37,17 @@ add_library(apfl
|
|||
builtins.c
|
||||
context.c
|
||||
eval.c
|
||||
modules.c
|
||||
re.c
|
||||
registry.c
|
||||
scope.c
|
||||
symbols.c
|
||||
|
||||
mod_globals.c
|
||||
mod_re.c
|
||||
)
|
||||
target_link_libraries(apfl PUBLIC m)
|
||||
target_link_libraries(apfl PUBLIC ${PCRE2_LIBRARIES})
|
||||
|
||||
add_executable(apfl-bin main.c)
|
||||
target_link_libraries(apfl-bin PUBLIC apfl)
|
||||
|
|
@ -51,11 +60,16 @@ else()
|
|||
export(TARGETS apflc FILE "${CMAKE_BINARY_DIR}/ApflApflcNativeConfig.cmake")
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/mod_globals.c"
|
||||
COMMAND apflc -c apfl_mod_globals "${CMAKE_CURRENT_SOURCE_DIR}/globals.apfl" "${CMAKE_CURRENT_BINARY_DIR}/mod_globals.c"
|
||||
DEPENDS apflc "${CMAKE_CURRENT_SOURCE_DIR}/globals.apfl"
|
||||
)
|
||||
function(apfl_to_c apflfile cfile cfuncname)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}"
|
||||
COMMAND apflc -c ${cfuncname} "${CMAKE_CURRENT_SOURCE_DIR}/${apflfile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}"
|
||||
DEPENDS apflc "${CMAKE_CURRENT_SOURCE_DIR}/${apflfile}"
|
||||
)
|
||||
endfunction()
|
||||
|
||||
apfl_to_c(globals.apfl mod_globals.c apfl_mod_globals)
|
||||
apfl_to_c(re.apfl mod_re.c apfl_mod_re)
|
||||
|
||||
add_executable(functional-test-runner functional-test-runner.c)
|
||||
target_link_libraries(functional-test-runner PUBLIC apfl)
|
||||
|
|
@ -121,6 +135,7 @@ functionaltest("symbols")
|
|||
functionaltest("get-optional")
|
||||
functionaltest("has-key")
|
||||
functionaltest("tonumber")
|
||||
functionaltest("re")
|
||||
|
||||
install(TARGETS apfl DESTINATION lib)
|
||||
install(TARGETS apfl-bin DESTINATION bin)
|
||||
|
|
|
|||
16
src/apfl.h
16
src/apfl.h
|
|
@ -687,6 +687,7 @@ enum apfl_result {
|
|||
struct apfl_config {
|
||||
struct apfl_allocator allocator;
|
||||
struct apfl_io_writer output_writer;
|
||||
bool no_standard_modules;
|
||||
};
|
||||
|
||||
apfl_ctx apfl_ctx_new(struct apfl_config);
|
||||
|
|
@ -697,6 +698,8 @@ typedef void (*apfl_panic_callback)(apfl_ctx, void *, enum apfl_result);
|
|||
|
||||
void apfl_ctx_set_panic_callback(apfl_ctx, apfl_panic_callback, void *);
|
||||
|
||||
struct apfl_allocator apfl_get_allocator(apfl_ctx);
|
||||
|
||||
typedef struct apfl_iterative_runner_data *apfl_iterative_runner;
|
||||
|
||||
apfl_iterative_runner apfl_iterative_runner_new(apfl_ctx, struct apfl_source_reader);
|
||||
|
|
@ -741,8 +744,15 @@ void apfl_push_number(apfl_ctx, apfl_number);
|
|||
void apfl_push_string_view_copy(apfl_ctx, struct apfl_string_view);
|
||||
// Push a constant string.
|
||||
void apfl_push_const_string(apfl_ctx, const char *);
|
||||
// Move a string onto the stack as a new string value.
|
||||
// Returns false on error, you should then clean up string manually and throw an
|
||||
// allocation error.
|
||||
bool apfl_move_string_onto_stack(apfl_ctx ctx, struct apfl_string string);
|
||||
// Push a C symbol onto the stack.
|
||||
void apfl_push_csymbol(apfl_ctx, apfl_cfunc, const char *);
|
||||
// Returns the csymbol function or NULL, if the element was no csymbol.
|
||||
// The value is popped from the stack in any case.
|
||||
apfl_cfunc apfl_pop_csymbol(apfl_ctx, apfl_stackidx);
|
||||
// Push a symbol onto the stack. s is a string value that will be popped from the stack.
|
||||
void apfl_push_symbol(apfl_ctx, apfl_stackidx s);
|
||||
// Push an anonymous symbol onto the stack.
|
||||
|
|
@ -804,6 +814,12 @@ void apfl_cfunc_self_getslot(apfl_ctx, apfl_slotidx);
|
|||
void apfl_cfunc_setslot(apfl_ctx, apfl_stackidx cfunc, apfl_slotidx, apfl_stackidx value);
|
||||
void apfl_cfunc_self_setslot(apfl_ctx, apfl_slotidx, apfl_stackidx value);
|
||||
|
||||
typedef void (*apfl_cfunc_defer_callback)(apfl_ctx, void *);
|
||||
|
||||
// Run a callback at the end of a cfunc. Callbacks added with this will be run
|
||||
// in the reverse order upon returning from the cfunc.
|
||||
void apfl_cfunc_defer(apfl_ctx, apfl_cfunc_defer_callback, void *);
|
||||
|
||||
void apfl_push_userdata(apfl_ctx, void *);
|
||||
void *apfl_get_userdata(apfl_ctx, apfl_stackidx);
|
||||
|
||||
|
|
|
|||
148
src/context.c
148
src/context.c
|
|
@ -35,13 +35,46 @@ struct protected_errcallback_data {
|
|||
void (*errcallback)(apfl_ctx, void *);
|
||||
};
|
||||
|
||||
void
|
||||
static void
|
||||
protected_errcallback(apfl_ctx ctx, void *opaque)
|
||||
{
|
||||
struct protected_errcallback_data *data = opaque;
|
||||
data->errcallback(ctx, data->opaque_outer);
|
||||
}
|
||||
|
||||
static void
|
||||
protected_run_deferreds(apfl_ctx ctx, void *opaque)
|
||||
{
|
||||
struct call_stack_entry *cse = opaque;
|
||||
apfl_cfunc_run_deferred(ctx, cse);
|
||||
}
|
||||
|
||||
static void
|
||||
protected_in_error_handling(
|
||||
apfl_ctx ctx,
|
||||
void (*callback)(apfl_ctx, void *),
|
||||
void *opaque,
|
||||
enum apfl_result *result,
|
||||
bool *with_error_on_stack
|
||||
) {
|
||||
switch (apfl_do_protected(ctx, callback, opaque, NULL)) {
|
||||
case APFL_RESULT_OK:
|
||||
break;
|
||||
case APFL_RESULT_ERR:
|
||||
*result = APFL_RESULT_ERRERR;
|
||||
*with_error_on_stack = false;
|
||||
break;
|
||||
case APFL_RESULT_ERRERR:
|
||||
*result = APFL_RESULT_ERRERR;
|
||||
*with_error_on_stack = false;
|
||||
break;
|
||||
case APFL_RESULT_ERR_ALLOC:
|
||||
*result = APFL_RESULT_ERR_ALLOC;
|
||||
*with_error_on_stack = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
enum apfl_result
|
||||
apfl_do_protected(
|
||||
apfl_ctx ctx,
|
||||
|
|
@ -72,22 +105,7 @@ apfl_do_protected(
|
|||
.opaque_outer = opaque,
|
||||
.errcallback = errcallback,
|
||||
};
|
||||
switch (apfl_do_protected(ctx, protected_errcallback, &data, NULL)) {
|
||||
case APFL_RESULT_OK:
|
||||
break;
|
||||
case APFL_RESULT_ERR:
|
||||
result = APFL_RESULT_ERRERR;
|
||||
with_error_on_stack = false;
|
||||
break;
|
||||
case APFL_RESULT_ERRERR:
|
||||
result = APFL_RESULT_ERRERR;
|
||||
with_error_on_stack = false;
|
||||
break;
|
||||
case APFL_RESULT_ERR_ALLOC:
|
||||
result = APFL_RESULT_ERR_ALLOC;
|
||||
with_error_on_stack = false;
|
||||
break;
|
||||
}
|
||||
protected_in_error_handling(ctx, protected_errcallback, &data, &result, &with_error_on_stack);
|
||||
}
|
||||
|
||||
struct apfl_value err;
|
||||
|
|
@ -104,8 +122,12 @@ apfl_do_protected(
|
|||
}
|
||||
|
||||
assert(callstack_len <= ctx->call_stack.cap);
|
||||
for (size_t i = callstack_len; i < ctx->call_stack.len; i++) {
|
||||
apfl_call_stack_entry_deinit(ctx->gc.allocator, &ctx->call_stack.items[i]);
|
||||
for (size_t i = ctx->call_stack.len; i-- > callstack_len; ) {
|
||||
struct call_stack_entry *cse = &ctx->call_stack.items[i];
|
||||
if (cse->type == APFL_CSE_CFUNCTION) {
|
||||
protected_in_error_handling(ctx, protected_run_deferreds, cse, &result, &with_error_on_stack);
|
||||
}
|
||||
apfl_call_stack_entry_deinit(ctx->gc.allocator, cse);
|
||||
}
|
||||
|
||||
bool ok = apfl_resizable_resize(
|
||||
|
|
@ -764,6 +786,12 @@ func_call_stack_entry_deinit(struct apfl_allocator allocator, struct func_call_s
|
|||
FREE_LIST(allocator, cse->matcher_stack.items, cse->matcher_stack.cap);
|
||||
}
|
||||
|
||||
static void
|
||||
cfunc_call_stack_entry_deinit(struct apfl_allocator allocator, struct cfunc_call_stack_entry *cse)
|
||||
{
|
||||
FREE_LIST(allocator, cse->deferred_list, cse->deferred_cap);
|
||||
}
|
||||
|
||||
void
|
||||
apfl_matcher_call_stack_entry_deinit(struct apfl_allocator allocator, struct matcher_call_stack_entry *cse)
|
||||
{
|
||||
|
|
@ -785,6 +813,8 @@ apfl_call_stack_entry_deinit(struct apfl_allocator allocator, struct call_stack_
|
|||
func_call_stack_entry_deinit(allocator, &entry->func);
|
||||
break;
|
||||
case APFL_CSE_CFUNCTION:
|
||||
cfunc_call_stack_entry_deinit(allocator, &entry->cfunc);
|
||||
break;
|
||||
case APFL_CSE_FUNCTION_DISPATCH:
|
||||
break;
|
||||
case APFL_CSE_MATCHER:
|
||||
|
|
@ -816,15 +846,8 @@ init_globals_protected(apfl_ctx ctx, void *opaque)
|
|||
{
|
||||
(void)opaque;
|
||||
|
||||
struct apfl_io_string_reader_data reader = apfl_io_string_reader_create(apfl_mod_globals());
|
||||
struct apfl_io_reader r = apfl_io_string_reader(&reader);
|
||||
apfl_load_bytecode(ctx, r);
|
||||
apfl_list_create(ctx, 0);
|
||||
apfl_call(ctx, -2, -1);
|
||||
apfl_list_create(ctx, 1);
|
||||
apfl_builtins(ctx);
|
||||
apfl_list_append(ctx, -2, -1);
|
||||
apfl_call(ctx, -2, -1);
|
||||
apfl_build_native_and_bytecode_combined_module(ctx, -1, apfl_mod_globals());
|
||||
|
||||
struct apfl_value val = apfl_stack_must_get(ctx, -1);
|
||||
if (val.type != VALUE_DICT) {
|
||||
|
|
@ -846,6 +869,15 @@ init_globals_protected(apfl_ctx ctx, void *opaque)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
init_standard_modules_protected(apfl_ctx ctx, void *opaque)
|
||||
{
|
||||
(void)opaque;
|
||||
|
||||
apfl_push_cfunc(ctx, apfl_module_re, 0);
|
||||
apfl_modules_register(ctx, "re", -1);
|
||||
}
|
||||
|
||||
#define DEBUG_INIT_GLOBALS 1
|
||||
|
||||
#if DEBUG_INIT_GLOBALS
|
||||
|
|
@ -892,6 +924,12 @@ init_globals(apfl_ctx ctx)
|
|||
return apfl_do_protected(ctx, init_globals_protected, NULL, INIT_GLOBALS_ERRCALLBACK) == APFL_RESULT_OK;
|
||||
}
|
||||
|
||||
static bool
|
||||
init_standard_modules(apfl_ctx ctx)
|
||||
{
|
||||
return apfl_do_protected(ctx, init_standard_modules_protected, NULL, INIT_GLOBALS_ERRCALLBACK) == APFL_RESULT_OK;
|
||||
}
|
||||
|
||||
apfl_ctx
|
||||
apfl_ctx_new(struct apfl_config config)
|
||||
{
|
||||
|
|
@ -927,6 +965,10 @@ apfl_ctx_new(struct apfl_config config)
|
|||
goto error;
|
||||
}
|
||||
|
||||
if (!config.no_standard_modules && !init_standard_modules(ctx)) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ctx;
|
||||
|
||||
error:
|
||||
|
|
@ -1311,6 +1353,16 @@ apfl_push_csymbol(apfl_ctx ctx, apfl_cfunc id, const char *name)
|
|||
});
|
||||
}
|
||||
|
||||
apfl_cfunc
|
||||
apfl_pop_csymbol(apfl_ctx ctx, apfl_stackidx idx)
|
||||
{
|
||||
struct apfl_value val = apfl_stack_must_pop(ctx, idx);
|
||||
|
||||
return val.type == VALUE_CSYMBOL
|
||||
? val.csymbol.id
|
||||
: NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
push_symbol_inner(apfl_ctx ctx, apfl_stackidx idx)
|
||||
{
|
||||
|
|
@ -2385,3 +2437,45 @@ apfl_load_bytecode(apfl_ctx ctx, struct apfl_io_reader r)
|
|||
load_bytecode_inner(ctx, r);
|
||||
apfl_gc_tmproots_restore(&ctx->gc, tmproots);
|
||||
}
|
||||
|
||||
struct apfl_allocator
|
||||
apfl_get_allocator(apfl_ctx ctx)
|
||||
{
|
||||
return ctx->gc.allocator;
|
||||
}
|
||||
|
||||
void
|
||||
apfl_cfunc_defer(apfl_ctx ctx, apfl_cfunc_defer_callback cb, void *opaque)
|
||||
{
|
||||
struct call_stack_entry *cse = apfl_call_stack_cur_entry(ctx);
|
||||
if (cse == NULL || cse->type != APFL_CSE_CFUNCTION) {
|
||||
apfl_raise_const_error(ctx, "apfl_cfunc_defer must be called from within a cfunc");
|
||||
}
|
||||
|
||||
if (!apfl_resizable_append(
|
||||
ctx->gc.allocator,
|
||||
sizeof(struct cfunc_deferred),
|
||||
(void **)&cse->cfunc.deferred_list,
|
||||
&cse->cfunc.deferred_len,
|
||||
&cse->cfunc.deferred_cap,
|
||||
&(struct cfunc_deferred) {
|
||||
.cb = cb,
|
||||
.opaque = opaque,
|
||||
},
|
||||
1
|
||||
)) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
apfl_cfunc_run_deferred(apfl_ctx ctx, struct call_stack_entry *cse)
|
||||
{
|
||||
assert(cse != NULL);
|
||||
assert(cse->type == APFL_CSE_CFUNCTION);
|
||||
|
||||
for (size_t i = cse->cfunc.deferred_len; i-- > 0; ) {
|
||||
struct cfunc_deferred *deferred = &cse->cfunc.deferred_list[i];
|
||||
deferred->cb(ctx, deferred->opaque);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,8 +64,17 @@ struct func_call_stack_entry {
|
|||
bool matcher_result;
|
||||
};
|
||||
|
||||
struct cfunc_deferred {
|
||||
apfl_cfunc_defer_callback cb;
|
||||
void *opaque;
|
||||
};
|
||||
|
||||
struct cfunc_call_stack_entry {
|
||||
struct cfunction *func;
|
||||
|
||||
struct cfunc_deferred *deferred_list;
|
||||
size_t deferred_len;
|
||||
size_t deferred_cap;
|
||||
};
|
||||
|
||||
enum matcher_mode {
|
||||
|
|
@ -162,6 +171,8 @@ struct apfl_ctx_data {
|
|||
void apfl_matcher_call_stack_entry_deinit(struct apfl_allocator, struct matcher_call_stack_entry *);
|
||||
void apfl_call_stack_entry_deinit(struct apfl_allocator, struct call_stack_entry *);
|
||||
|
||||
void apfl_cfunc_run_deferred(apfl_ctx ctx, struct call_stack_entry *cse);
|
||||
|
||||
struct stack apfl_stack_new(void);
|
||||
|
||||
bool apfl_stack_push(apfl_ctx, struct apfl_value);
|
||||
|
|
@ -176,7 +187,6 @@ bool apfl_stack_drop(apfl_ctx, apfl_stackidx);
|
|||
bool apfl_stack_drop_multi(apfl_ctx ctx, size_t count, apfl_stackidx *indices);
|
||||
void apfl_stack_clear(apfl_ctx);
|
||||
struct apfl_value *apfl_stack_push_placeholder(apfl_ctx);
|
||||
bool apfl_move_string_onto_stack(apfl_ctx, struct apfl_string);
|
||||
|
||||
// Like apfl_tostring, but ensures it's a dynamically allocated string and returns the underlying string.
|
||||
struct apfl_string *apfl_to_dynamic_string(apfl_ctx ctx, apfl_stackidx index);
|
||||
|
|
|
|||
|
|
@ -509,10 +509,14 @@ call_inner(apfl_ctx ctx, size_t tmproots, apfl_stackidx func_index, apfl_stackid
|
|||
.stack = apfl_stack_new(),
|
||||
.cfunc = {
|
||||
.func = func.cfunc,
|
||||
.deferred_list = NULL,
|
||||
.deferred_len = 0,
|
||||
.deferred_cap = 0,
|
||||
},
|
||||
});
|
||||
|
||||
func.cfunc->func(ctx);
|
||||
apfl_cfunc_run_deferred(ctx, apfl_call_stack_cur_entry(ctx));
|
||||
return_from_function(ctx);
|
||||
break;
|
||||
default:
|
||||
|
|
|
|||
69
src/functional-tests/re.at
Normal file
69
src/functional-tests/re.at
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
===== script =====
|
||||
re := import 're
|
||||
|
||||
match := { ~args ->
|
||||
m := re.match ~args
|
||||
if (== nil m) {
|
||||
print "-- no match --"
|
||||
} {
|
||||
print "-- match --"
|
||||
keach m {
|
||||
0 _ ->
|
||||
i s -> print (& i ":" s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match "^f(o*)" "foooo"
|
||||
match "^f(o*)" "FoOoo"
|
||||
match "^f(o*)"::'i "FoOoo"
|
||||
match "^f(o*)" "f"
|
||||
match "^f(o*)" 1 "foooo"
|
||||
|
||||
print ""
|
||||
|
||||
replace := { ~args ->
|
||||
print (re.replace ~args)
|
||||
}
|
||||
|
||||
replace "(x+)" { _ x -> len x } ""
|
||||
replace "(x+)" { _ x -> len x } "xxx"
|
||||
replace "(x+)" { _ x -> len x } "xxxyxyxx"
|
||||
replace "(x+)" { _ x -> len x } 2 "xxxyxyxx"
|
||||
replace "\\[(\\w+)\\]" "<$1>" "[foo] [bar]"
|
||||
|
||||
print ""
|
||||
|
||||
match-all := { ~args ->
|
||||
print "-----"
|
||||
each (re.match-all ~args) { m ->
|
||||
print (& "- [" (join " ; " m) "]")
|
||||
}
|
||||
}
|
||||
|
||||
match-all "f(\\w+)" "afoobar fizz abcdefg f"
|
||||
match-all "^(\\w+)\\s*=\\s*(.*)$"::'m "foo= bar\nbar = 123"
|
||||
|
||||
===== output =====
|
||||
-- match --
|
||||
1:oooo
|
||||
-- no match --
|
||||
-- match --
|
||||
1:oOoo
|
||||
-- match --
|
||||
1:
|
||||
-- no match --
|
||||
|
||||
|
||||
3
|
||||
3y1y2
|
||||
3y1yxx
|
||||
<foo> <bar>
|
||||
|
||||
-----
|
||||
- [foobar ; oobar]
|
||||
- [fizz ; izz]
|
||||
- [fg ; g]
|
||||
-----
|
||||
- [foo= bar ; foo ; bar]
|
||||
- [bar = 123 ; bar ; 123]
|
||||
|
|
@ -115,12 +115,12 @@
|
|||
substr := {
|
||||
start s ->
|
||||
substr start (len s) s
|
||||
start?(is < 0) end s ->
|
||||
substr (+ start (len s)) end s
|
||||
start end?(is < 0) s ->
|
||||
substr start (+ end (- (len s) start)) s
|
||||
start end s ->
|
||||
-raw-substring s start end
|
||||
start?(is < 0) newlen s ->
|
||||
substr (+ start (len s)) newlen s
|
||||
start newlen?(is < 0) s ->
|
||||
substr start (+ newlen (- (len s) start)) s
|
||||
start newlen s ->
|
||||
-raw-substring s start newlen
|
||||
}
|
||||
|
||||
-raw-stringsearch := builtins.stringsearch
|
||||
|
|
@ -259,13 +259,15 @@
|
|||
|
||||
add-searcher { m ->
|
||||
unwrap-some (get-optional m loaded-modules) { mod ->
|
||||
{ Some::mod }
|
||||
Some::{ Some::mod }
|
||||
}
|
||||
}
|
||||
|
||||
add-searcher { m ->
|
||||
unwrap-some (builtins.cmod-searcher m) { loader ->
|
||||
Some::(loader)
|
||||
Some::{
|
||||
Some::(loader)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -301,6 +303,12 @@
|
|||
modules
|
||||
})
|
||||
|
||||
map := {
|
||||
_ [] -> []
|
||||
f [x ~xs] ->
|
||||
[(f x) ~(map f xs)]
|
||||
}
|
||||
|
||||
# Dictionary of exported functions
|
||||
[
|
||||
'if -> if
|
||||
|
|
@ -365,5 +373,6 @@
|
|||
'find-first -> find-first
|
||||
'unwrap-some -> unwrap-some
|
||||
'import -> modules.import
|
||||
'map -> map
|
||||
]
|
||||
}
|
||||
|
|
|
|||
21
src/modules.c
Normal file
21
src/modules.c
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
#include "apfl.h"
|
||||
|
||||
#include "context.h"
|
||||
|
||||
void
|
||||
apfl_build_native_and_bytecode_combined_module(
|
||||
apfl_ctx ctx,
|
||||
apfl_stackidx native,
|
||||
struct apfl_string_view bytecode
|
||||
) {
|
||||
apfl_move_to_top_of_stack(ctx, native);
|
||||
|
||||
struct apfl_io_string_reader_data reader = apfl_io_string_reader_create(bytecode);
|
||||
struct apfl_io_reader r = apfl_io_string_reader(&reader);
|
||||
apfl_load_bytecode(ctx, r);
|
||||
apfl_list_create(ctx, 0);
|
||||
apfl_call(ctx, -2, -1);
|
||||
apfl_list_create(ctx, 1);
|
||||
apfl_list_append(ctx, -1, -3);
|
||||
apfl_call(ctx, -2, -1);
|
||||
}
|
||||
|
|
@ -7,7 +7,17 @@ extern "C" {
|
|||
|
||||
#include "apfl.h"
|
||||
|
||||
void
|
||||
apfl_build_native_and_bytecode_combined_module(
|
||||
apfl_ctx ctx,
|
||||
apfl_stackidx native,
|
||||
struct apfl_string_view bytecode
|
||||
);
|
||||
|
||||
struct apfl_string_view apfl_mod_globals(void);
|
||||
struct apfl_string_view apfl_mod_re(void);
|
||||
|
||||
void apfl_module_re(apfl_ctx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
97
src/re.apfl
Normal file
97
src/re.apfl
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
{ C ->
|
||||
|
||||
toflags := { s ->
|
||||
flags := [{}]
|
||||
add := { f -> flags = [~flags f] }
|
||||
|
||||
for (len s) { i ->
|
||||
{
|
||||
'i -> add C.CASELESS
|
||||
'm -> add C.MULTILINE
|
||||
'u -> add C.UTF
|
||||
'x -> add C.EXTENDED
|
||||
'U -> add C.UNGREEDY
|
||||
_ ->
|
||||
} (substr i 1 s)
|
||||
}
|
||||
}
|
||||
|
||||
with := {
|
||||
regex::flags?(has type 'string) body ->
|
||||
with regex::(toflags flags) body
|
||||
regex::flags body ->
|
||||
r := C.compile regex flags
|
||||
out := body r
|
||||
C.close r
|
||||
out
|
||||
regex body ->
|
||||
with regex::[] body
|
||||
}
|
||||
|
||||
match := {
|
||||
regex off subject ->
|
||||
with regex { r ->
|
||||
C.match r subject off
|
||||
}
|
||||
regex subject ->
|
||||
match regex 0 subject
|
||||
}
|
||||
|
||||
Plain := (symbol)
|
||||
Var := (symbol)
|
||||
|
||||
replacement-to-function := {
|
||||
replacement?(has type 'string) ->
|
||||
m := nil
|
||||
parts = []
|
||||
while { m = match "^(.*?)\\$(\\d)(.*)$" replacement } {
|
||||
[_ head n replacement] = m
|
||||
parts = [~parts Plain::head Var::(tonumber n)]
|
||||
}
|
||||
parts = [~parts Plain::replacement]
|
||||
|
||||
{ ~m ->
|
||||
join "" (map {
|
||||
Plain:s -> s
|
||||
Var:n -> m@n
|
||||
} parts)
|
||||
}
|
||||
replacement -> replacement
|
||||
}
|
||||
|
||||
replace-aux := { regex replacement countcond subject ->
|
||||
with regex { r ->
|
||||
C.replace r replacement countcond subject
|
||||
}
|
||||
}
|
||||
|
||||
replace := {
|
||||
regex replacement count subject ->
|
||||
replace-aux regex (replacement-to-function replacement) (is < count) subject
|
||||
regex replacement subject ->
|
||||
replace-aux regex (replacement-to-function replacement) {true} subject
|
||||
}
|
||||
|
||||
match-all := {
|
||||
regex off subject ->
|
||||
with regex { r ->
|
||||
C.match-all r subject off
|
||||
}
|
||||
regex subject ->
|
||||
match-all regex 0 subject
|
||||
}
|
||||
|
||||
[
|
||||
'match -> match
|
||||
'replace -> replace
|
||||
'match-all -> match-all
|
||||
|
||||
'CASELESS -> C.CASELESS
|
||||
'DOTALL -> C.DOTALL
|
||||
'EXTENDED -> C.EXTENDED
|
||||
'MULTILINE -> C.MULTILINE
|
||||
'NEVER_UTF -> C.NEVER_UTF
|
||||
'UNGREEDY -> C.UNGREEDY
|
||||
'UTF -> C.UTF
|
||||
]
|
||||
}
|
||||
642
src/re.c
Normal file
642
src/re.c
Normal file
|
|
@ -0,0 +1,642 @@
|
|||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "apfl.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "modules.h"
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
|
||||
struct contexts_for_pcre {
|
||||
pcre2_general_context *gcontext;
|
||||
pcre2_compile_context *ccontext;
|
||||
};
|
||||
|
||||
static void
|
||||
onbeforecollect_contexts(void *opaque)
|
||||
{
|
||||
struct contexts_for_pcre *contexts = opaque;
|
||||
|
||||
if (contexts->ccontext != NULL) {
|
||||
pcre2_compile_context_free(contexts->ccontext);
|
||||
}
|
||||
|
||||
if (contexts->gcontext != NULL) {
|
||||
pcre2_general_context_free(contexts->gcontext);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct apfl_native_object_type contexts_type = {
|
||||
.size = sizeof(struct contexts_for_pcre),
|
||||
.onbeforecollect = onbeforecollect_contexts,
|
||||
};
|
||||
|
||||
static void
|
||||
free_code(pcre2_code **codeptr)
|
||||
{
|
||||
if (*codeptr != NULL) {
|
||||
pcre2_code_free(*codeptr);
|
||||
*codeptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
onbeforecollect_code(void *opaque)
|
||||
{
|
||||
pcre2_code **code = opaque;
|
||||
free_code(code);
|
||||
}
|
||||
|
||||
static const struct apfl_native_object_type code_type = {
|
||||
.size = sizeof(pcre2_code *),
|
||||
.onbeforecollect = onbeforecollect_code,
|
||||
};
|
||||
|
||||
static const size_t extrasize_for_allocsize = ((sizeof(size_t) / _Alignof(max_align_t)) + 1) * _Alignof(max_align_t);
|
||||
|
||||
static void *
|
||||
gcontext_malloc(PCRE2_SIZE size, void *opaque)
|
||||
{
|
||||
apfl_ctx ctx = opaque;
|
||||
char *mem = ALLOC_BYTES(
|
||||
apfl_get_allocator(ctx),
|
||||
extrasize_for_allocsize + size
|
||||
);
|
||||
if (mem == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
*((size_t *)mem) = size;
|
||||
return mem + extrasize_for_allocsize;
|
||||
}
|
||||
|
||||
static void
|
||||
gcontext_free(void *ptr, void *opaque)
|
||||
{
|
||||
if (ptr == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
apfl_ctx ctx = opaque;
|
||||
|
||||
char *mem = ptr;
|
||||
mem -= extrasize_for_allocsize;
|
||||
|
||||
size_t size = *((size_t *)mem);
|
||||
|
||||
FREE_BYTES(apfl_get_allocator(ctx), mem, size + extrasize_for_allocsize);
|
||||
}
|
||||
|
||||
static int contexts_registry_key;
|
||||
|
||||
static APFL_DEFINE_CSYMBOL(sym_caseless, "CASELESS")
|
||||
static APFL_DEFINE_CSYMBOL(sym_dotall, "DOTALL")
|
||||
static APFL_DEFINE_CSYMBOL(sym_extended, "EXTENDED")
|
||||
static APFL_DEFINE_CSYMBOL(sym_multiline, "MULTILINE")
|
||||
static APFL_DEFINE_CSYMBOL(sym_never_utf, "NEVER_UTF")
|
||||
static APFL_DEFINE_CSYMBOL(sym_ungreedy, "UNGREEDY")
|
||||
static APFL_DEFINE_CSYMBOL(sym_utf, "UTF")
|
||||
|
||||
static uint32_t
|
||||
options_from_list(apfl_ctx ctx, apfl_stackidx list)
|
||||
{
|
||||
uint32_t options = 0;
|
||||
|
||||
apfl_move_to_top_of_stack(ctx, list);
|
||||
|
||||
if (apfl_get_type(ctx, -1) != APFL_VALUE_LIST) {
|
||||
apfl_raise_const_error(ctx, "Expected an options list");
|
||||
}
|
||||
|
||||
size_t len = apfl_len(ctx, -1);
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
apfl_get_list_member_by_index(ctx, -1, i);
|
||||
apfl_cfunc sym = apfl_pop_csymbol(ctx, -1);
|
||||
if (sym == sym_caseless) {
|
||||
options |= PCRE2_CASELESS;
|
||||
} else if (sym == sym_dotall) {
|
||||
options |= PCRE2_DOTALL;
|
||||
} else if (sym == sym_extended) {
|
||||
options |= PCRE2_EXTENDED;
|
||||
} else if (sym == sym_multiline) {
|
||||
options |= PCRE2_MULTILINE;
|
||||
} else if (sym == sym_never_utf) {
|
||||
options |= PCRE2_NEVER_UTF;
|
||||
} else if (sym == sym_ungreedy) {
|
||||
options |= PCRE2_UNGREEDY;
|
||||
} else if (sym == sym_utf) {
|
||||
options |= PCRE2_UTF;
|
||||
}
|
||||
}
|
||||
|
||||
apfl_drop(ctx, -1);
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
static struct contexts_for_pcre *
|
||||
get_contexts(apfl_ctx ctx)
|
||||
{
|
||||
if (!apfl_registry_try_get(ctx, &contexts_registry_key, 0)) {
|
||||
apfl_raise_const_error(ctx, "Module not initialized correctly");
|
||||
}
|
||||
|
||||
struct contexts_for_pcre *contexts = apfl_get_native_object(ctx, &contexts_type, -1);
|
||||
|
||||
// We can safely drop the stack element and still have a valid pointer to
|
||||
// the contexts, because it's still kept alive by the registry entry.
|
||||
apfl_drop(ctx, -1);
|
||||
|
||||
if (contexts->gcontext == NULL || contexts->ccontext == NULL) {
|
||||
apfl_raise_const_error(ctx, "Module not initialized correctly");
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
#define BUFSIZE 200
|
||||
|
||||
noreturn static void
|
||||
raise_pcre2_error(apfl_ctx ctx, int errorcode)
|
||||
{
|
||||
struct apfl_allocator allocator = apfl_get_allocator(ctx);
|
||||
|
||||
unsigned char *buf = ALLOC_BYTES(allocator, BUFSIZE);
|
||||
if (buf == NULL) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
int len = pcre2_get_error_message(errorcode, buf, BUFSIZE);
|
||||
switch (len) {
|
||||
case PCRE2_ERROR_BADDATA:
|
||||
FREE_BYTES(allocator, buf, BUFSIZE);
|
||||
apfl_raise_const_error(ctx, "Unknown PCRE error");
|
||||
break;
|
||||
case PCRE2_ERROR_NOMEMORY:
|
||||
FREE_BYTES(allocator, buf, BUFSIZE);
|
||||
apfl_raise_const_error(ctx, "PCRE error does not fit");
|
||||
break;
|
||||
default:
|
||||
if (!apfl_move_string_onto_stack(ctx, (struct apfl_string) {
|
||||
.bytes = buf,
|
||||
.len = len,
|
||||
.cap = BUFSIZE,
|
||||
})) {
|
||||
FREE_BYTES(allocator, buf, BUFSIZE);
|
||||
apfl_raise_alloc_error(ctx);
|
||||
} else {
|
||||
apfl_raise_error(ctx, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
compile(apfl_ctx ctx)
|
||||
{
|
||||
if (apfl_len(ctx, 0) != 2) {
|
||||
apfl_raise_const_error(ctx, "compile needs 2 arguments");
|
||||
}
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 0);
|
||||
struct apfl_string_view s = apfl_get_string(ctx, -1);
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 1);
|
||||
uint32_t options = options_from_list(ctx, -1);
|
||||
|
||||
apfl_drop(ctx, 0);
|
||||
|
||||
struct contexts_for_pcre *contexts = get_contexts(ctx);
|
||||
|
||||
int errorcode;
|
||||
PCRE2_SIZE erroroffset;
|
||||
pcre2_code *code = pcre2_compile(
|
||||
s.bytes,
|
||||
s.len,
|
||||
options,
|
||||
&errorcode,
|
||||
&erroroffset,
|
||||
contexts->ccontext
|
||||
);
|
||||
|
||||
if (code == NULL) {
|
||||
raise_pcre2_error(ctx, errorcode);
|
||||
}
|
||||
|
||||
pcre2_code **codeptr = apfl_push_native_object(ctx, &code_type);
|
||||
*codeptr = code;
|
||||
}
|
||||
|
||||
static void
|
||||
close_code(apfl_ctx ctx)
|
||||
{
|
||||
if (apfl_len(ctx, 0) != 1) {
|
||||
apfl_raise_const_error(ctx, "close needs exatly one argument");
|
||||
}
|
||||
apfl_get_list_member_by_index(ctx, 0, 0);
|
||||
apfl_drop(ctx, 0);
|
||||
pcre2_code **codeptr = apfl_get_native_object(ctx, &code_type, -1);
|
||||
free_code(codeptr);
|
||||
apfl_drop(ctx, -1);
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_match_data(apfl_ctx ctx, void *opaque)
|
||||
{
|
||||
(void)ctx;
|
||||
pcre2_match_data *md = opaque;
|
||||
pcre2_match_data_free(md);
|
||||
}
|
||||
|
||||
static pcre2_code *
|
||||
get_unclosed_code(apfl_ctx ctx, apfl_stackidx index)
|
||||
{
|
||||
pcre2_code **codeptr = apfl_get_native_object(ctx, &code_type, index);
|
||||
if (codeptr == NULL) {
|
||||
apfl_raise_const_error(ctx, "pcre2 code already closed");
|
||||
}
|
||||
|
||||
return *codeptr;
|
||||
}
|
||||
|
||||
static pcre2_match_data *
|
||||
create_md(apfl_ctx ctx, pcre2_code *code)
|
||||
{
|
||||
struct contexts_for_pcre *contexts = get_contexts(ctx);
|
||||
pcre2_match_data *md = pcre2_match_data_create_from_pattern(code, contexts->gcontext);
|
||||
if (md == NULL) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
apfl_cfunc_defer(ctx, cleanup_match_data, md);
|
||||
|
||||
return md;
|
||||
}
|
||||
|
||||
static void
|
||||
advance_utf8_rune(struct apfl_string_view sv, size_t *offset)
|
||||
{
|
||||
while (*offset < sv.len && sv.bytes[*offset] & 0xC0) {
|
||||
(*offset)++;
|
||||
}
|
||||
}
|
||||
|
||||
struct iter_match_patterninfo {
|
||||
bool is_utf8;
|
||||
bool crlf_is_newline;
|
||||
};
|
||||
|
||||
static struct iter_match_patterninfo
|
||||
prepare_iter_match(pcre2_code *code)
|
||||
{
|
||||
uint32_t bits;
|
||||
struct iter_match_patterninfo out;
|
||||
|
||||
(void)pcre2_pattern_info(code, PCRE2_INFO_ALLOPTIONS, &bits);
|
||||
out.is_utf8 = (bits & PCRE2_UTF) != 0;
|
||||
|
||||
(void)pcre2_pattern_info(code, PCRE2_INFO_NEWLINE, &bits);
|
||||
out.crlf_is_newline = bits == PCRE2_NEWLINE_ANY
|
||||
|| bits == PCRE2_NEWLINE_CRLF
|
||||
|| bits == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
static int
|
||||
iter_match(
|
||||
apfl_ctx ctx,
|
||||
struct apfl_string_view subject,
|
||||
pcre2_code *code,
|
||||
pcre2_match_data *md,
|
||||
PCRE2_SIZE offset,
|
||||
struct iter_match_patterninfo patterninfo,
|
||||
size_t i
|
||||
) {
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
PCRE2_SIZE offset_unadjusted;
|
||||
|
||||
if (i > 0) {
|
||||
offset_unadjusted = ovector[1];
|
||||
}
|
||||
again:;
|
||||
|
||||
uint32_t options = 0;
|
||||
bool last_match_was_empty = false;
|
||||
if (i > 0) {
|
||||
// Handle subsequent runs. Pretty much copied from pcre2demo.c
|
||||
|
||||
offset = offset_unadjusted;
|
||||
|
||||
if (ovector[0] == offset_unadjusted) {
|
||||
last_match_was_empty = true;
|
||||
if (ovector[0] == subject.len) {
|
||||
return -1;
|
||||
}
|
||||
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
} else {
|
||||
PCRE2_SIZE startchar = pcre2_get_startchar(md);
|
||||
if (offset <= startchar) {
|
||||
if (startchar >= subject.len) {
|
||||
return -1;
|
||||
}
|
||||
offset = startchar + 1;
|
||||
|
||||
if (patterninfo.is_utf8) {
|
||||
advance_utf8_rune(subject, &offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int rc = pcre2_match(
|
||||
code,
|
||||
subject.bytes,
|
||||
subject.len,
|
||||
(PCRE2_SIZE)offset,
|
||||
options,
|
||||
md,
|
||||
NULL
|
||||
);
|
||||
|
||||
if (rc > 0) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
switch(rc) {
|
||||
case 0:
|
||||
// Happens if the ovector was not large enough, which should never
|
||||
// happen, as we've created the match data from the pattern.
|
||||
apfl_raise_const_error(ctx, "ovector too small. This should not have happened :(");
|
||||
return -1;
|
||||
case PCRE2_ERROR_NOMATCH:
|
||||
if (i == 0 || !last_match_was_empty) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
PCRE2_SIZE new_offset = ovector[1] + 1;
|
||||
if (
|
||||
patterninfo.crlf_is_newline
|
||||
&& offset + 1 < subject.len
|
||||
&& subject.bytes[offset] == '\r'
|
||||
&& subject.bytes[offset+1] == '\n'
|
||||
) {
|
||||
new_offset++;
|
||||
} else if (patterninfo.is_utf8) {
|
||||
advance_utf8_rune(subject, &new_offset);
|
||||
}
|
||||
|
||||
offset_unadjusted = new_offset;
|
||||
|
||||
goto again;
|
||||
default:
|
||||
raise_pcre2_error(ctx, rc);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
build_matches_list(
|
||||
apfl_ctx ctx,
|
||||
struct apfl_string_view sv,
|
||||
int rc,
|
||||
PCRE2_SIZE *ovector
|
||||
) {
|
||||
apfl_list_create(ctx, rc);
|
||||
|
||||
for (int i = 0; i < rc; i++) {
|
||||
size_t a = (size_t)ovector[i*2];
|
||||
size_t b = (size_t)ovector[i*2+1];
|
||||
|
||||
if (a > b) {
|
||||
apfl_push_const_string(ctx, "");
|
||||
} else {
|
||||
size_t newlen = b - a;
|
||||
apfl_push_string_view_copy(
|
||||
ctx,
|
||||
apfl_string_view_substr(sv, a, newlen)
|
||||
);
|
||||
}
|
||||
|
||||
apfl_list_append(ctx, -2, -1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
match(apfl_ctx ctx)
|
||||
{
|
||||
if (apfl_len(ctx, 0) != 3) {
|
||||
apfl_raise_const_error(ctx, "match expects exactly 3 arguments");
|
||||
}
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 0);
|
||||
pcre2_code *code = get_unclosed_code(ctx, -1);
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 1);
|
||||
struct apfl_string_view subject = apfl_get_string(ctx, -1);
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 2);
|
||||
PCRE2_SIZE offset = (PCRE2_SIZE)apfl_get_number(ctx, -1);
|
||||
|
||||
apfl_drop(ctx, 0);
|
||||
|
||||
pcre2_match_data *md = create_md(ctx, code);
|
||||
|
||||
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
|
||||
int rc = iter_match(ctx, subject, code, md, offset, patterninfo, 0);
|
||||
if (rc < 0) {
|
||||
apfl_push_nil(ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
assert(ovector[0] <= ovector[1]);
|
||||
|
||||
build_matches_list(ctx, subject, rc, ovector);
|
||||
}
|
||||
|
||||
static void
|
||||
cleanup_string_builder(apfl_ctx ctx, void *opaque)
|
||||
{
|
||||
struct apfl_string_builder *sb = opaque;
|
||||
apfl_string_builder_deinit(sb);
|
||||
FREE_OBJ(apfl_get_allocator(ctx), sb);
|
||||
}
|
||||
|
||||
static struct apfl_string_builder *
|
||||
create_string_builder(apfl_ctx ctx)
|
||||
{
|
||||
struct apfl_allocator allocator = apfl_get_allocator(ctx);
|
||||
struct apfl_string_builder *sb = ALLOC_OBJ(allocator, struct apfl_string_builder);
|
||||
if (sb == NULL) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
*sb = apfl_string_builder_init(allocator);
|
||||
apfl_cfunc_defer(ctx, cleanup_string_builder, sb);
|
||||
return sb;
|
||||
}
|
||||
|
||||
static bool
|
||||
do_countcheck(apfl_ctx ctx, apfl_stackidx func, size_t i)
|
||||
{
|
||||
apfl_copy(ctx, func);
|
||||
apfl_list_create(ctx, 1);
|
||||
apfl_push_number(ctx, (apfl_number)i);
|
||||
apfl_list_append(ctx, -2, -1);
|
||||
apfl_call(ctx, -2, -1);
|
||||
return apfl_is_truthy(ctx, -1);
|
||||
}
|
||||
|
||||
static void
|
||||
replace(apfl_ctx ctx)
|
||||
{
|
||||
if (apfl_len(ctx, 0) != 4) {
|
||||
apfl_raise_const_error(ctx, "replace expects exactly 4 arguments");
|
||||
}
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 0);
|
||||
pcre2_code *code = get_unclosed_code(ctx, -1);
|
||||
apfl_get_list_member_by_index(ctx, 0, 1);
|
||||
const apfl_stackidx replace = 1;
|
||||
apfl_get_list_member_by_index(ctx, 0, 2);
|
||||
const apfl_stackidx countcheck = 2;
|
||||
apfl_get_list_member_by_index(ctx, 0, 3);
|
||||
struct apfl_string_view subject = apfl_get_string(ctx, -1);
|
||||
apfl_drop(ctx, 0);
|
||||
|
||||
struct apfl_string_builder *sb = create_string_builder(ctx);
|
||||
|
||||
pcre2_match_data *md = create_md(ctx, code);
|
||||
PCRE2_SIZE offset = 0;
|
||||
PCRE2_SIZE old_offset;
|
||||
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
|
||||
for (size_t i = 0; do_countcheck(ctx, countcheck, i); i++) {
|
||||
old_offset = offset;
|
||||
int rc = iter_match(ctx, subject, code, md, offset, patterninfo, i);
|
||||
|
||||
if (rc < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
|
||||
if (!apfl_string_builder_append(sb, apfl_string_view_substr(
|
||||
subject,
|
||||
old_offset,
|
||||
ovector[0] - old_offset
|
||||
))) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
|
||||
apfl_copy(ctx, replace);
|
||||
build_matches_list(ctx, subject, rc, ovector);
|
||||
apfl_call(ctx, -2, -1);
|
||||
apfl_tostring(ctx, -1);
|
||||
struct apfl_string_view replacement = apfl_get_string(ctx, -1);
|
||||
|
||||
if (!apfl_string_builder_append(sb, replacement)) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
|
||||
offset = ovector[1];
|
||||
}
|
||||
|
||||
if (!apfl_string_builder_append(sb, apfl_string_view_substr(
|
||||
subject,
|
||||
offset,
|
||||
subject.len - offset
|
||||
))) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
|
||||
struct apfl_string str = apfl_string_builder_move_string(sb);
|
||||
if (!apfl_move_string_onto_stack(ctx, str)) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
match_all(apfl_ctx ctx)
|
||||
{
|
||||
if (apfl_len(ctx, 0) != 3) {
|
||||
apfl_raise_const_error(ctx, "match-all expects exactly 3 arguments");
|
||||
}
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 0);
|
||||
pcre2_code *code = get_unclosed_code(ctx, -1);
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 1);
|
||||
struct apfl_string_view subject = apfl_get_string(ctx, -1);
|
||||
|
||||
apfl_get_list_member_by_index(ctx, 0, 2);
|
||||
PCRE2_SIZE offset = (PCRE2_SIZE)apfl_get_number(ctx, -1);
|
||||
|
||||
apfl_drop(ctx, 0);
|
||||
|
||||
apfl_list_create(ctx, 0);
|
||||
|
||||
pcre2_match_data *md = create_md(ctx, code);
|
||||
|
||||
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
|
||||
int rc;
|
||||
for (
|
||||
int i = 0;
|
||||
(rc = iter_match(ctx, subject, code, md, offset, patterninfo, i)) >= 0;
|
||||
i++
|
||||
) {
|
||||
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
|
||||
build_matches_list(ctx, subject, rc, ovector);
|
||||
apfl_list_append(ctx, -2, -1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
add_sym_to_mod(apfl_ctx ctx, apfl_cfunc sym, const char *name)
|
||||
{
|
||||
sym(ctx);
|
||||
apfl_push_const_string(ctx, name);
|
||||
apfl_dict_set(ctx, -3, -1, -2);
|
||||
}
|
||||
|
||||
static void
|
||||
add_func_to_mod(apfl_ctx ctx, apfl_cfunc func, const char *name)
|
||||
{
|
||||
apfl_push_cfunc(ctx, func, 0);
|
||||
apfl_push_const_string(ctx, name);
|
||||
apfl_set_func_name(ctx, -2, -1);
|
||||
apfl_push_const_string(ctx, name);
|
||||
apfl_dict_set(ctx, -3, -1, -2);
|
||||
}
|
||||
|
||||
void
|
||||
apfl_module_re(apfl_ctx ctx)
|
||||
{
|
||||
struct contexts_for_pcre *contexts = apfl_push_native_object(ctx, &contexts_type);
|
||||
contexts->gcontext = NULL;
|
||||
contexts->ccontext = NULL;
|
||||
|
||||
if ((contexts->gcontext = pcre2_general_context_create(gcontext_malloc, gcontext_free, ctx)) == NULL) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
if ((contexts->ccontext = pcre2_compile_context_create(contexts->gcontext)) == NULL) {
|
||||
apfl_raise_alloc_error(ctx);
|
||||
}
|
||||
|
||||
apfl_registry_set(ctx, &contexts_registry_key, 0, -1);
|
||||
|
||||
apfl_dict_create(ctx);
|
||||
|
||||
add_sym_to_mod(ctx, sym_caseless, "CASELESS");
|
||||
add_sym_to_mod(ctx, sym_dotall, "DOTALL");
|
||||
add_sym_to_mod(ctx, sym_extended, "EXTENDED");
|
||||
add_sym_to_mod(ctx, sym_multiline, "MULTILINE");
|
||||
add_sym_to_mod(ctx, sym_never_utf, "NEVER_UTF");
|
||||
add_sym_to_mod(ctx, sym_ungreedy, "UNGREEDY");
|
||||
add_sym_to_mod(ctx, sym_utf, "UTF");
|
||||
|
||||
add_func_to_mod(ctx, compile, "compile");
|
||||
add_func_to_mod(ctx, close_code, "close");
|
||||
add_func_to_mod(ctx, match, "match");
|
||||
add_func_to_mod(ctx, replace, "replace");
|
||||
add_func_to_mod(ctx, match_all, "match-all");
|
||||
|
||||
apfl_build_native_and_bytecode_combined_module(ctx, -1, apfl_mod_re());
|
||||
}
|
||||
1
webpage/.gitignore
vendored
1
webpage/.gitignore
vendored
|
|
@ -1,2 +1,3 @@
|
|||
build/
|
||||
build-native/
|
||||
deps/
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
PCRE2VER=10.42
|
||||
|
||||
cd playground
|
||||
rm -rf build-native
|
||||
mkdir build-native
|
||||
|
|
@ -7,10 +10,25 @@ cd build-native
|
|||
cmake ../../../CMakeLists.txt
|
||||
make -j"$(nproc)" apflc
|
||||
cd ..
|
||||
|
||||
rm -rf deps
|
||||
mkdir deps
|
||||
cd deps
|
||||
curl -L -o pcre2.tar.bz2 "https://github.com/PCRE2Project/pcre2/releases/download/pcre2-${PCRE2VER}/pcre2-${PCRE2VER}.tar.bz2"
|
||||
tar xjf pcre2.tar.bz2
|
||||
cd "pcre2-${PCRE2VER}"
|
||||
mkdir build
|
||||
cd build
|
||||
emcmake cmake -DCMAKE_INSTALL_PREFIX="/home/laria/src/apfl/webpage/playground/deps/prefix" ../CMakeLists.txt
|
||||
emmake make -j"$(nproc)" pcre2-8-static
|
||||
emmake make -j"$(nproc)" install
|
||||
|
||||
cd ../../..
|
||||
|
||||
rm -rf build
|
||||
mkdir build
|
||||
cd build
|
||||
emcmake cmake -DCMAKE_C_FLAGS="-O2" -DBUILD_SHARED_LIBS=NO -DApflApflcNative_DIR="$(pwd)/../build-native/" ../../../CMakeLists.txt
|
||||
emcmake cmake -DCMAKE_C_FLAGS="-O2" -DBUILD_SHARED_LIBS=NO -DApflApflcNative_DIR="$(pwd)/../build-native/" -DCMAKE_PREFIX_PATH="/home/laria/src/apfl/webpage/playground/deps/prefix" ../../../CMakeLists.txt
|
||||
emmake make -j"$(nproc)" apfl
|
||||
cd ..
|
||||
emcc -sASYNCIFY -O3 -oplayground.js playground.c build/src/libapfl.a
|
||||
emcc -sASYNCIFY `PKG_CONFIG_PATH="/home/laria/src/apfl/webpage/playground/deps/prefix/lib/pkgconfig" pkg-config --static --cflags --libs libpcre2-8` -O3 -oplayground.js playground.c build/src/libapfl.a
|
||||
|
|
|
|||
Loading…
Reference in a new issue