Add regex module "re"

This uses the PCRE2 library to implement regexes in apfl
This commit is contained in:
Laria 2023-07-03 23:39:29 +02:00
parent 97f5986781
commit 63d64b0778
13 changed files with 1049 additions and 43 deletions

View file

@ -4,6 +4,11 @@ set(CMAKE_C_EXTENSIONS OFF)
option(BUILD_SHARED_LIBS "Build dynamic / shared libraries" ON)
option(TEST_WITH_VALGRIND_MEMCHECK "Also run tests with valgrind / memcheck" ON)
include(FindPkgConfig)
pkg_check_modules(PCRE2 REQUIRED libpcre2-8)
include_directories(${PCRE2_INCLUDE_DIRS})
set(commonfiles
alloc.c
bytecode.c
@ -32,13 +37,17 @@ add_library(apfl
builtins.c
context.c
eval.c
modules.c
re.c
registry.c
scope.c
symbols.c
mod_globals.c
mod_re.c
)
target_link_libraries(apfl PUBLIC m)
target_link_libraries(apfl PUBLIC ${PCRE2_LIBRARIES})
add_executable(apfl-bin main.c)
target_link_libraries(apfl-bin PUBLIC apfl)
@ -51,11 +60,16 @@ else()
export(TARGETS apflc FILE "${CMAKE_BINARY_DIR}/ApflApflcNativeConfig.cmake")
endif()
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/mod_globals.c"
COMMAND apflc -c apfl_mod_globals "${CMAKE_CURRENT_SOURCE_DIR}/globals.apfl" "${CMAKE_CURRENT_BINARY_DIR}/mod_globals.c"
DEPENDS apflc "${CMAKE_CURRENT_SOURCE_DIR}/globals.apfl"
)
function(apfl_to_c apflfile cfile cfuncname)
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${cfile}"
COMMAND apflc -c ${cfuncname} "${CMAKE_CURRENT_SOURCE_DIR}/${apflfile}" "${CMAKE_CURRENT_BINARY_DIR}/${cfile}"
DEPENDS apflc "${CMAKE_CURRENT_SOURCE_DIR}/${apflfile}"
)
endfunction()
apfl_to_c(globals.apfl mod_globals.c apfl_mod_globals)
apfl_to_c(re.apfl mod_re.c apfl_mod_re)
add_executable(functional-test-runner functional-test-runner.c)
target_link_libraries(functional-test-runner PUBLIC apfl)
@ -121,6 +135,7 @@ functionaltest("symbols")
functionaltest("get-optional")
functionaltest("has-key")
functionaltest("tonumber")
functionaltest("re")
install(TARGETS apfl DESTINATION lib)
install(TARGETS apfl-bin DESTINATION bin)

View file

@ -687,6 +687,7 @@ enum apfl_result {
struct apfl_config {
struct apfl_allocator allocator;
struct apfl_io_writer output_writer;
bool no_standard_modules;
};
apfl_ctx apfl_ctx_new(struct apfl_config);
@ -697,6 +698,8 @@ typedef void (*apfl_panic_callback)(apfl_ctx, void *, enum apfl_result);
void apfl_ctx_set_panic_callback(apfl_ctx, apfl_panic_callback, void *);
struct apfl_allocator apfl_get_allocator(apfl_ctx);
typedef struct apfl_iterative_runner_data *apfl_iterative_runner;
apfl_iterative_runner apfl_iterative_runner_new(apfl_ctx, struct apfl_source_reader);
@ -741,8 +744,15 @@ void apfl_push_number(apfl_ctx, apfl_number);
void apfl_push_string_view_copy(apfl_ctx, struct apfl_string_view);
// Push a constant string.
void apfl_push_const_string(apfl_ctx, const char *);
// Move a string onto the stack as a new string value.
// Returns false on error, you should then clean up string manually and throw an
// allocation error.
bool apfl_move_string_onto_stack(apfl_ctx ctx, struct apfl_string string);
// Push a C symbol onto the stack.
void apfl_push_csymbol(apfl_ctx, apfl_cfunc, const char *);
// Returns the csymbol function or NULL, if the element was no csymbol.
// The value is popped from the stack in any case.
apfl_cfunc apfl_pop_csymbol(apfl_ctx, apfl_stackidx);
// Push a symbol onto the stack. s is a string value that will be popped from the stack.
void apfl_push_symbol(apfl_ctx, apfl_stackidx s);
// Push an anonymous symbol onto the stack.
@ -804,6 +814,12 @@ void apfl_cfunc_self_getslot(apfl_ctx, apfl_slotidx);
void apfl_cfunc_setslot(apfl_ctx, apfl_stackidx cfunc, apfl_slotidx, apfl_stackidx value);
void apfl_cfunc_self_setslot(apfl_ctx, apfl_slotidx, apfl_stackidx value);
typedef void (*apfl_cfunc_defer_callback)(apfl_ctx, void *);
// Run a callback at the end of a cfunc. Callbacks added with this will be run
// in the reverse order upon returning from the cfunc.
void apfl_cfunc_defer(apfl_ctx, apfl_cfunc_defer_callback, void *);
void apfl_push_userdata(apfl_ctx, void *);
void *apfl_get_userdata(apfl_ctx, apfl_stackidx);

View file

@ -35,13 +35,46 @@ struct protected_errcallback_data {
void (*errcallback)(apfl_ctx, void *);
};
void
static void
protected_errcallback(apfl_ctx ctx, void *opaque)
{
struct protected_errcallback_data *data = opaque;
data->errcallback(ctx, data->opaque_outer);
}
static void
protected_run_deferreds(apfl_ctx ctx, void *opaque)
{
struct call_stack_entry *cse = opaque;
apfl_cfunc_run_deferred(ctx, cse);
}
static void
protected_in_error_handling(
apfl_ctx ctx,
void (*callback)(apfl_ctx, void *),
void *opaque,
enum apfl_result *result,
bool *with_error_on_stack
) {
switch (apfl_do_protected(ctx, callback, opaque, NULL)) {
case APFL_RESULT_OK:
break;
case APFL_RESULT_ERR:
*result = APFL_RESULT_ERRERR;
*with_error_on_stack = false;
break;
case APFL_RESULT_ERRERR:
*result = APFL_RESULT_ERRERR;
*with_error_on_stack = false;
break;
case APFL_RESULT_ERR_ALLOC:
*result = APFL_RESULT_ERR_ALLOC;
*with_error_on_stack = false;
break;
}
}
enum apfl_result
apfl_do_protected(
apfl_ctx ctx,
@ -72,22 +105,7 @@ apfl_do_protected(
.opaque_outer = opaque,
.errcallback = errcallback,
};
switch (apfl_do_protected(ctx, protected_errcallback, &data, NULL)) {
case APFL_RESULT_OK:
break;
case APFL_RESULT_ERR:
result = APFL_RESULT_ERRERR;
with_error_on_stack = false;
break;
case APFL_RESULT_ERRERR:
result = APFL_RESULT_ERRERR;
with_error_on_stack = false;
break;
case APFL_RESULT_ERR_ALLOC:
result = APFL_RESULT_ERR_ALLOC;
with_error_on_stack = false;
break;
}
protected_in_error_handling(ctx, protected_errcallback, &data, &result, &with_error_on_stack);
}
struct apfl_value err;
@ -104,8 +122,12 @@ apfl_do_protected(
}
assert(callstack_len <= ctx->call_stack.cap);
for (size_t i = callstack_len; i < ctx->call_stack.len; i++) {
apfl_call_stack_entry_deinit(ctx->gc.allocator, &ctx->call_stack.items[i]);
for (size_t i = ctx->call_stack.len; i-- > callstack_len; ) {
struct call_stack_entry *cse = &ctx->call_stack.items[i];
if (cse->type == APFL_CSE_CFUNCTION) {
protected_in_error_handling(ctx, protected_run_deferreds, cse, &result, &with_error_on_stack);
}
apfl_call_stack_entry_deinit(ctx->gc.allocator, cse);
}
bool ok = apfl_resizable_resize(
@ -764,6 +786,12 @@ func_call_stack_entry_deinit(struct apfl_allocator allocator, struct func_call_s
FREE_LIST(allocator, cse->matcher_stack.items, cse->matcher_stack.cap);
}
static void
cfunc_call_stack_entry_deinit(struct apfl_allocator allocator, struct cfunc_call_stack_entry *cse)
{
FREE_LIST(allocator, cse->deferred_list, cse->deferred_cap);
}
void
apfl_matcher_call_stack_entry_deinit(struct apfl_allocator allocator, struct matcher_call_stack_entry *cse)
{
@ -785,6 +813,8 @@ apfl_call_stack_entry_deinit(struct apfl_allocator allocator, struct call_stack_
func_call_stack_entry_deinit(allocator, &entry->func);
break;
case APFL_CSE_CFUNCTION:
cfunc_call_stack_entry_deinit(allocator, &entry->cfunc);
break;
case APFL_CSE_FUNCTION_DISPATCH:
break;
case APFL_CSE_MATCHER:
@ -816,15 +846,8 @@ init_globals_protected(apfl_ctx ctx, void *opaque)
{
(void)opaque;
struct apfl_io_string_reader_data reader = apfl_io_string_reader_create(apfl_mod_globals());
struct apfl_io_reader r = apfl_io_string_reader(&reader);
apfl_load_bytecode(ctx, r);
apfl_list_create(ctx, 0);
apfl_call(ctx, -2, -1);
apfl_list_create(ctx, 1);
apfl_builtins(ctx);
apfl_list_append(ctx, -2, -1);
apfl_call(ctx, -2, -1);
apfl_build_native_and_bytecode_combined_module(ctx, -1, apfl_mod_globals());
struct apfl_value val = apfl_stack_must_get(ctx, -1);
if (val.type != VALUE_DICT) {
@ -846,6 +869,15 @@ init_globals_protected(apfl_ctx ctx, void *opaque)
}
}
static void
init_standard_modules_protected(apfl_ctx ctx, void *opaque)
{
(void)opaque;
apfl_push_cfunc(ctx, apfl_module_re, 0);
apfl_modules_register(ctx, "re", -1);
}
#define DEBUG_INIT_GLOBALS 1
#if DEBUG_INIT_GLOBALS
@ -892,6 +924,12 @@ init_globals(apfl_ctx ctx)
return apfl_do_protected(ctx, init_globals_protected, NULL, INIT_GLOBALS_ERRCALLBACK) == APFL_RESULT_OK;
}
static bool
init_standard_modules(apfl_ctx ctx)
{
return apfl_do_protected(ctx, init_standard_modules_protected, NULL, INIT_GLOBALS_ERRCALLBACK) == APFL_RESULT_OK;
}
apfl_ctx
apfl_ctx_new(struct apfl_config config)
{
@ -927,6 +965,10 @@ apfl_ctx_new(struct apfl_config config)
goto error;
}
if (!config.no_standard_modules && !init_standard_modules(ctx)) {
goto error;
}
return ctx;
error:
@ -1311,6 +1353,16 @@ apfl_push_csymbol(apfl_ctx ctx, apfl_cfunc id, const char *name)
});
}
apfl_cfunc
apfl_pop_csymbol(apfl_ctx ctx, apfl_stackidx idx)
{
struct apfl_value val = apfl_stack_must_pop(ctx, idx);
return val.type == VALUE_CSYMBOL
? val.csymbol.id
: NULL;
}
static void
push_symbol_inner(apfl_ctx ctx, apfl_stackidx idx)
{
@ -2385,3 +2437,45 @@ apfl_load_bytecode(apfl_ctx ctx, struct apfl_io_reader r)
load_bytecode_inner(ctx, r);
apfl_gc_tmproots_restore(&ctx->gc, tmproots);
}
struct apfl_allocator
apfl_get_allocator(apfl_ctx ctx)
{
return ctx->gc.allocator;
}
void
apfl_cfunc_defer(apfl_ctx ctx, apfl_cfunc_defer_callback cb, void *opaque)
{
struct call_stack_entry *cse = apfl_call_stack_cur_entry(ctx);
if (cse == NULL || cse->type != APFL_CSE_CFUNCTION) {
apfl_raise_const_error(ctx, "apfl_cfunc_defer must be called from within a cfunc");
}
if (!apfl_resizable_append(
ctx->gc.allocator,
sizeof(struct cfunc_deferred),
(void **)&cse->cfunc.deferred_list,
&cse->cfunc.deferred_len,
&cse->cfunc.deferred_cap,
&(struct cfunc_deferred) {
.cb = cb,
.opaque = opaque,
},
1
)) {
apfl_raise_alloc_error(ctx);
}
}
void
apfl_cfunc_run_deferred(apfl_ctx ctx, struct call_stack_entry *cse)
{
assert(cse != NULL);
assert(cse->type == APFL_CSE_CFUNCTION);
for (size_t i = cse->cfunc.deferred_len; i-- > 0; ) {
struct cfunc_deferred *deferred = &cse->cfunc.deferred_list[i];
deferred->cb(ctx, deferred->opaque);
}
}

View file

@ -64,8 +64,17 @@ struct func_call_stack_entry {
bool matcher_result;
};
struct cfunc_deferred {
apfl_cfunc_defer_callback cb;
void *opaque;
};
struct cfunc_call_stack_entry {
struct cfunction *func;
struct cfunc_deferred *deferred_list;
size_t deferred_len;
size_t deferred_cap;
};
enum matcher_mode {
@ -162,6 +171,8 @@ struct apfl_ctx_data {
void apfl_matcher_call_stack_entry_deinit(struct apfl_allocator, struct matcher_call_stack_entry *);
void apfl_call_stack_entry_deinit(struct apfl_allocator, struct call_stack_entry *);
void apfl_cfunc_run_deferred(apfl_ctx ctx, struct call_stack_entry *cse);
struct stack apfl_stack_new(void);
bool apfl_stack_push(apfl_ctx, struct apfl_value);
@ -176,7 +187,6 @@ bool apfl_stack_drop(apfl_ctx, apfl_stackidx);
bool apfl_stack_drop_multi(apfl_ctx ctx, size_t count, apfl_stackidx *indices);
void apfl_stack_clear(apfl_ctx);
struct apfl_value *apfl_stack_push_placeholder(apfl_ctx);
bool apfl_move_string_onto_stack(apfl_ctx, struct apfl_string);
// Like apfl_tostring, but ensures it's a dynamically allocated string and returns the underlying string.
struct apfl_string *apfl_to_dynamic_string(apfl_ctx ctx, apfl_stackidx index);

View file

@ -509,10 +509,14 @@ call_inner(apfl_ctx ctx, size_t tmproots, apfl_stackidx func_index, apfl_stackid
.stack = apfl_stack_new(),
.cfunc = {
.func = func.cfunc,
.deferred_list = NULL,
.deferred_len = 0,
.deferred_cap = 0,
},
});
func.cfunc->func(ctx);
apfl_cfunc_run_deferred(ctx, apfl_call_stack_cur_entry(ctx));
return_from_function(ctx);
break;
default:

View file

@ -0,0 +1,69 @@
===== script =====
re := import 're
match := { ~args ->
m := re.match ~args
if (== nil m) {
print "-- no match --"
} {
print "-- match --"
keach m {
0 _ ->
i s -> print (& i ":" s)
}
}
}
match "^f(o*)" "foooo"
match "^f(o*)" "FoOoo"
match "^f(o*)"::'i "FoOoo"
match "^f(o*)" "f"
match "^f(o*)" 1 "foooo"
print ""
replace := { ~args ->
print (re.replace ~args)
}
replace "(x+)" { _ x -> len x } ""
replace "(x+)" { _ x -> len x } "xxx"
replace "(x+)" { _ x -> len x } "xxxyxyxx"
replace "(x+)" { _ x -> len x } 2 "xxxyxyxx"
replace "\\[(\\w+)\\]" "<$1>" "[foo] [bar]"
print ""
match-all := { ~args ->
print "-----"
each (re.match-all ~args) { m ->
print (& "- [" (join " ; " m) "]")
}
}
match-all "f(\\w+)" "afoobar fizz abcdefg f"
match-all "^(\\w+)\\s*=\\s*(.*)$"::'m "foo= bar\nbar = 123"
===== output =====
-- match --
1:oooo
-- no match --
-- match --
1:oOoo
-- match --
1:
-- no match --
3
3y1y2
3y1yxx
<foo> <bar>
-----
- [foobar ; oobar]
- [fizz ; izz]
- [fg ; g]
-----
- [foo= bar ; foo ; bar]
- [bar = 123 ; bar ; 123]

View file

@ -115,12 +115,12 @@
substr := {
start s ->
substr start (len s) s
start?(is < 0) end s ->
substr (+ start (len s)) end s
start end?(is < 0) s ->
substr start (+ end (- (len s) start)) s
start end s ->
-raw-substring s start end
start?(is < 0) newlen s ->
substr (+ start (len s)) newlen s
start newlen?(is < 0) s ->
substr start (+ newlen (- (len s) start)) s
start newlen s ->
-raw-substring s start newlen
}
-raw-stringsearch := builtins.stringsearch
@ -259,13 +259,15 @@
add-searcher { m ->
unwrap-some (get-optional m loaded-modules) { mod ->
{ Some::mod }
Some::{ Some::mod }
}
}
add-searcher { m ->
unwrap-some (builtins.cmod-searcher m) { loader ->
Some::(loader)
Some::{
Some::(loader)
}
}
}
@ -301,6 +303,12 @@
modules
})
map := {
_ [] -> []
f [x ~xs] ->
[(f x) ~(map f xs)]
}
# Dictionary of exported functions
[
'if -> if
@ -365,5 +373,6 @@
'find-first -> find-first
'unwrap-some -> unwrap-some
'import -> modules.import
'map -> map
]
}

21
src/modules.c Normal file
View file

@ -0,0 +1,21 @@
#include "apfl.h"
#include "context.h"
void
apfl_build_native_and_bytecode_combined_module(
apfl_ctx ctx,
apfl_stackidx native,
struct apfl_string_view bytecode
) {
apfl_move_to_top_of_stack(ctx, native);
struct apfl_io_string_reader_data reader = apfl_io_string_reader_create(bytecode);
struct apfl_io_reader r = apfl_io_string_reader(&reader);
apfl_load_bytecode(ctx, r);
apfl_list_create(ctx, 0);
apfl_call(ctx, -2, -1);
apfl_list_create(ctx, 1);
apfl_list_append(ctx, -1, -3);
apfl_call(ctx, -2, -1);
}

View file

@ -7,7 +7,17 @@ extern "C" {
#include "apfl.h"
void
apfl_build_native_and_bytecode_combined_module(
apfl_ctx ctx,
apfl_stackidx native,
struct apfl_string_view bytecode
);
struct apfl_string_view apfl_mod_globals(void);
struct apfl_string_view apfl_mod_re(void);
void apfl_module_re(apfl_ctx);
#ifdef __cplusplus
}

97
src/re.apfl Normal file
View file

@ -0,0 +1,97 @@
{ C ->
toflags := { s ->
flags := [{}]
add := { f -> flags = [~flags f] }
for (len s) { i ->
{
'i -> add C.CASELESS
'm -> add C.MULTILINE
'u -> add C.UTF
'x -> add C.EXTENDED
'U -> add C.UNGREEDY
_ ->
} (substr i 1 s)
}
}
with := {
regex::flags?(has type 'string) body ->
with regex::(toflags flags) body
regex::flags body ->
r := C.compile regex flags
out := body r
C.close r
out
regex body ->
with regex::[] body
}
match := {
regex off subject ->
with regex { r ->
C.match r subject off
}
regex subject ->
match regex 0 subject
}
Plain := (symbol)
Var := (symbol)
replacement-to-function := {
replacement?(has type 'string) ->
m := nil
parts = []
while { m = match "^(.*?)\\$(\\d)(.*)$" replacement } {
[_ head n replacement] = m
parts = [~parts Plain::head Var::(tonumber n)]
}
parts = [~parts Plain::replacement]
{ ~m ->
join "" (map {
Plain:s -> s
Var:n -> m@n
} parts)
}
replacement -> replacement
}
replace-aux := { regex replacement countcond subject ->
with regex { r ->
C.replace r replacement countcond subject
}
}
replace := {
regex replacement count subject ->
replace-aux regex (replacement-to-function replacement) (is < count) subject
regex replacement subject ->
replace-aux regex (replacement-to-function replacement) {true} subject
}
match-all := {
regex off subject ->
with regex { r ->
C.match-all r subject off
}
regex subject ->
match-all regex 0 subject
}
[
'match -> match
'replace -> replace
'match-all -> match-all
'CASELESS -> C.CASELESS
'DOTALL -> C.DOTALL
'EXTENDED -> C.EXTENDED
'MULTILINE -> C.MULTILINE
'NEVER_UTF -> C.NEVER_UTF
'UNGREEDY -> C.UNGREEDY
'UTF -> C.UTF
]
}

642
src/re.c Normal file
View file

@ -0,0 +1,642 @@
#include <assert.h>
#include <stddef.h>
#include "apfl.h"
#include "alloc.h"
#include "modules.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
struct contexts_for_pcre {
pcre2_general_context *gcontext;
pcre2_compile_context *ccontext;
};
static void
onbeforecollect_contexts(void *opaque)
{
struct contexts_for_pcre *contexts = opaque;
if (contexts->ccontext != NULL) {
pcre2_compile_context_free(contexts->ccontext);
}
if (contexts->gcontext != NULL) {
pcre2_general_context_free(contexts->gcontext);
}
}
static const struct apfl_native_object_type contexts_type = {
.size = sizeof(struct contexts_for_pcre),
.onbeforecollect = onbeforecollect_contexts,
};
static void
free_code(pcre2_code **codeptr)
{
if (*codeptr != NULL) {
pcre2_code_free(*codeptr);
*codeptr = NULL;
}
}
static void
onbeforecollect_code(void *opaque)
{
pcre2_code **code = opaque;
free_code(code);
}
static const struct apfl_native_object_type code_type = {
.size = sizeof(pcre2_code *),
.onbeforecollect = onbeforecollect_code,
};
static const size_t extrasize_for_allocsize = ((sizeof(size_t) / _Alignof(max_align_t)) + 1) * _Alignof(max_align_t);
static void *
gcontext_malloc(PCRE2_SIZE size, void *opaque)
{
apfl_ctx ctx = opaque;
char *mem = ALLOC_BYTES(
apfl_get_allocator(ctx),
extrasize_for_allocsize + size
);
if (mem == NULL) {
return NULL;
}
*((size_t *)mem) = size;
return mem + extrasize_for_allocsize;
}
static void
gcontext_free(void *ptr, void *opaque)
{
if (ptr == NULL) {
return;
}
apfl_ctx ctx = opaque;
char *mem = ptr;
mem -= extrasize_for_allocsize;
size_t size = *((size_t *)mem);
FREE_BYTES(apfl_get_allocator(ctx), mem, size + extrasize_for_allocsize);
}
static int contexts_registry_key;
static APFL_DEFINE_CSYMBOL(sym_caseless, "CASELESS")
static APFL_DEFINE_CSYMBOL(sym_dotall, "DOTALL")
static APFL_DEFINE_CSYMBOL(sym_extended, "EXTENDED")
static APFL_DEFINE_CSYMBOL(sym_multiline, "MULTILINE")
static APFL_DEFINE_CSYMBOL(sym_never_utf, "NEVER_UTF")
static APFL_DEFINE_CSYMBOL(sym_ungreedy, "UNGREEDY")
static APFL_DEFINE_CSYMBOL(sym_utf, "UTF")
static uint32_t
options_from_list(apfl_ctx ctx, apfl_stackidx list)
{
uint32_t options = 0;
apfl_move_to_top_of_stack(ctx, list);
if (apfl_get_type(ctx, -1) != APFL_VALUE_LIST) {
apfl_raise_const_error(ctx, "Expected an options list");
}
size_t len = apfl_len(ctx, -1);
for (size_t i = 0; i < len; i++) {
apfl_get_list_member_by_index(ctx, -1, i);
apfl_cfunc sym = apfl_pop_csymbol(ctx, -1);
if (sym == sym_caseless) {
options |= PCRE2_CASELESS;
} else if (sym == sym_dotall) {
options |= PCRE2_DOTALL;
} else if (sym == sym_extended) {
options |= PCRE2_EXTENDED;
} else if (sym == sym_multiline) {
options |= PCRE2_MULTILINE;
} else if (sym == sym_never_utf) {
options |= PCRE2_NEVER_UTF;
} else if (sym == sym_ungreedy) {
options |= PCRE2_UNGREEDY;
} else if (sym == sym_utf) {
options |= PCRE2_UTF;
}
}
apfl_drop(ctx, -1);
return options;
}
static struct contexts_for_pcre *
get_contexts(apfl_ctx ctx)
{
if (!apfl_registry_try_get(ctx, &contexts_registry_key, 0)) {
apfl_raise_const_error(ctx, "Module not initialized correctly");
}
struct contexts_for_pcre *contexts = apfl_get_native_object(ctx, &contexts_type, -1);
// We can safely drop the stack element and still have a valid pointer to
// the contexts, because it's still kept alive by the registry entry.
apfl_drop(ctx, -1);
if (contexts->gcontext == NULL || contexts->ccontext == NULL) {
apfl_raise_const_error(ctx, "Module not initialized correctly");
}
return contexts;
}
#define BUFSIZE 200
noreturn static void
raise_pcre2_error(apfl_ctx ctx, int errorcode)
{
struct apfl_allocator allocator = apfl_get_allocator(ctx);
unsigned char *buf = ALLOC_BYTES(allocator, BUFSIZE);
if (buf == NULL) {
apfl_raise_alloc_error(ctx);
}
int len = pcre2_get_error_message(errorcode, buf, BUFSIZE);
switch (len) {
case PCRE2_ERROR_BADDATA:
FREE_BYTES(allocator, buf, BUFSIZE);
apfl_raise_const_error(ctx, "Unknown PCRE error");
break;
case PCRE2_ERROR_NOMEMORY:
FREE_BYTES(allocator, buf, BUFSIZE);
apfl_raise_const_error(ctx, "PCRE error does not fit");
break;
default:
if (!apfl_move_string_onto_stack(ctx, (struct apfl_string) {
.bytes = buf,
.len = len,
.cap = BUFSIZE,
})) {
FREE_BYTES(allocator, buf, BUFSIZE);
apfl_raise_alloc_error(ctx);
} else {
apfl_raise_error(ctx, -1);
}
}
}
static void
compile(apfl_ctx ctx)
{
if (apfl_len(ctx, 0) != 2) {
apfl_raise_const_error(ctx, "compile needs 2 arguments");
}
apfl_get_list_member_by_index(ctx, 0, 0);
struct apfl_string_view s = apfl_get_string(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 1);
uint32_t options = options_from_list(ctx, -1);
apfl_drop(ctx, 0);
struct contexts_for_pcre *contexts = get_contexts(ctx);
int errorcode;
PCRE2_SIZE erroroffset;
pcre2_code *code = pcre2_compile(
s.bytes,
s.len,
options,
&errorcode,
&erroroffset,
contexts->ccontext
);
if (code == NULL) {
raise_pcre2_error(ctx, errorcode);
}
pcre2_code **codeptr = apfl_push_native_object(ctx, &code_type);
*codeptr = code;
}
static void
close_code(apfl_ctx ctx)
{
if (apfl_len(ctx, 0) != 1) {
apfl_raise_const_error(ctx, "close needs exatly one argument");
}
apfl_get_list_member_by_index(ctx, 0, 0);
apfl_drop(ctx, 0);
pcre2_code **codeptr = apfl_get_native_object(ctx, &code_type, -1);
free_code(codeptr);
apfl_drop(ctx, -1);
}
static void
cleanup_match_data(apfl_ctx ctx, void *opaque)
{
(void)ctx;
pcre2_match_data *md = opaque;
pcre2_match_data_free(md);
}
static pcre2_code *
get_unclosed_code(apfl_ctx ctx, apfl_stackidx index)
{
pcre2_code **codeptr = apfl_get_native_object(ctx, &code_type, index);
if (codeptr == NULL) {
apfl_raise_const_error(ctx, "pcre2 code already closed");
}
return *codeptr;
}
static pcre2_match_data *
create_md(apfl_ctx ctx, pcre2_code *code)
{
struct contexts_for_pcre *contexts = get_contexts(ctx);
pcre2_match_data *md = pcre2_match_data_create_from_pattern(code, contexts->gcontext);
if (md == NULL) {
apfl_raise_alloc_error(ctx);
}
apfl_cfunc_defer(ctx, cleanup_match_data, md);
return md;
}
static void
advance_utf8_rune(struct apfl_string_view sv, size_t *offset)
{
while (*offset < sv.len && sv.bytes[*offset] & 0xC0) {
(*offset)++;
}
}
struct iter_match_patterninfo {
bool is_utf8;
bool crlf_is_newline;
};
static struct iter_match_patterninfo
prepare_iter_match(pcre2_code *code)
{
uint32_t bits;
struct iter_match_patterninfo out;
(void)pcre2_pattern_info(code, PCRE2_INFO_ALLOPTIONS, &bits);
out.is_utf8 = (bits & PCRE2_UTF) != 0;
(void)pcre2_pattern_info(code, PCRE2_INFO_NEWLINE, &bits);
out.crlf_is_newline = bits == PCRE2_NEWLINE_ANY
|| bits == PCRE2_NEWLINE_CRLF
|| bits == PCRE2_NEWLINE_ANYCRLF;
return out;
}
static int
iter_match(
apfl_ctx ctx,
struct apfl_string_view subject,
pcre2_code *code,
pcre2_match_data *md,
PCRE2_SIZE offset,
struct iter_match_patterninfo patterninfo,
size_t i
) {
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
PCRE2_SIZE offset_unadjusted;
if (i > 0) {
offset_unadjusted = ovector[1];
}
again:;
uint32_t options = 0;
bool last_match_was_empty = false;
if (i > 0) {
// Handle subsequent runs. Pretty much copied from pcre2demo.c
offset = offset_unadjusted;
if (ovector[0] == offset_unadjusted) {
last_match_was_empty = true;
if (ovector[0] == subject.len) {
return -1;
}
options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
} else {
PCRE2_SIZE startchar = pcre2_get_startchar(md);
if (offset <= startchar) {
if (startchar >= subject.len) {
return -1;
}
offset = startchar + 1;
if (patterninfo.is_utf8) {
advance_utf8_rune(subject, &offset);
}
}
}
}
int rc = pcre2_match(
code,
subject.bytes,
subject.len,
(PCRE2_SIZE)offset,
options,
md,
NULL
);
if (rc > 0) {
return rc;
}
switch(rc) {
case 0:
// Happens if the ovector was not large enough, which should never
// happen, as we've created the match data from the pattern.
apfl_raise_const_error(ctx, "ovector too small. This should not have happened :(");
return -1;
case PCRE2_ERROR_NOMATCH:
if (i == 0 || !last_match_was_empty) {
return -1;
}
PCRE2_SIZE new_offset = ovector[1] + 1;
if (
patterninfo.crlf_is_newline
&& offset + 1 < subject.len
&& subject.bytes[offset] == '\r'
&& subject.bytes[offset+1] == '\n'
) {
new_offset++;
} else if (patterninfo.is_utf8) {
advance_utf8_rune(subject, &new_offset);
}
offset_unadjusted = new_offset;
goto again;
default:
raise_pcre2_error(ctx, rc);
return -1;
}
}
static void
build_matches_list(
apfl_ctx ctx,
struct apfl_string_view sv,
int rc,
PCRE2_SIZE *ovector
) {
apfl_list_create(ctx, rc);
for (int i = 0; i < rc; i++) {
size_t a = (size_t)ovector[i*2];
size_t b = (size_t)ovector[i*2+1];
if (a > b) {
apfl_push_const_string(ctx, "");
} else {
size_t newlen = b - a;
apfl_push_string_view_copy(
ctx,
apfl_string_view_substr(sv, a, newlen)
);
}
apfl_list_append(ctx, -2, -1);
}
}
static void
match(apfl_ctx ctx)
{
if (apfl_len(ctx, 0) != 3) {
apfl_raise_const_error(ctx, "match expects exactly 3 arguments");
}
apfl_get_list_member_by_index(ctx, 0, 0);
pcre2_code *code = get_unclosed_code(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 1);
struct apfl_string_view subject = apfl_get_string(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 2);
PCRE2_SIZE offset = (PCRE2_SIZE)apfl_get_number(ctx, -1);
apfl_drop(ctx, 0);
pcre2_match_data *md = create_md(ctx, code);
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
int rc = iter_match(ctx, subject, code, md, offset, patterninfo, 0);
if (rc < 0) {
apfl_push_nil(ctx);
return;
}
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
assert(ovector[0] <= ovector[1]);
build_matches_list(ctx, subject, rc, ovector);
}
static void
cleanup_string_builder(apfl_ctx ctx, void *opaque)
{
struct apfl_string_builder *sb = opaque;
apfl_string_builder_deinit(sb);
FREE_OBJ(apfl_get_allocator(ctx), sb);
}
static struct apfl_string_builder *
create_string_builder(apfl_ctx ctx)
{
struct apfl_allocator allocator = apfl_get_allocator(ctx);
struct apfl_string_builder *sb = ALLOC_OBJ(allocator, struct apfl_string_builder);
if (sb == NULL) {
apfl_raise_alloc_error(ctx);
}
*sb = apfl_string_builder_init(allocator);
apfl_cfunc_defer(ctx, cleanup_string_builder, sb);
return sb;
}
static bool
do_countcheck(apfl_ctx ctx, apfl_stackidx func, size_t i)
{
apfl_copy(ctx, func);
apfl_list_create(ctx, 1);
apfl_push_number(ctx, (apfl_number)i);
apfl_list_append(ctx, -2, -1);
apfl_call(ctx, -2, -1);
return apfl_is_truthy(ctx, -1);
}
static void
replace(apfl_ctx ctx)
{
if (apfl_len(ctx, 0) != 4) {
apfl_raise_const_error(ctx, "replace expects exactly 4 arguments");
}
apfl_get_list_member_by_index(ctx, 0, 0);
pcre2_code *code = get_unclosed_code(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 1);
const apfl_stackidx replace = 1;
apfl_get_list_member_by_index(ctx, 0, 2);
const apfl_stackidx countcheck = 2;
apfl_get_list_member_by_index(ctx, 0, 3);
struct apfl_string_view subject = apfl_get_string(ctx, -1);
apfl_drop(ctx, 0);
struct apfl_string_builder *sb = create_string_builder(ctx);
pcre2_match_data *md = create_md(ctx, code);
PCRE2_SIZE offset = 0;
PCRE2_SIZE old_offset;
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
for (size_t i = 0; do_countcheck(ctx, countcheck, i); i++) {
old_offset = offset;
int rc = iter_match(ctx, subject, code, md, offset, patterninfo, i);
if (rc < 0) {
break;
}
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
if (!apfl_string_builder_append(sb, apfl_string_view_substr(
subject,
old_offset,
ovector[0] - old_offset
))) {
apfl_raise_alloc_error(ctx);
}
apfl_copy(ctx, replace);
build_matches_list(ctx, subject, rc, ovector);
apfl_call(ctx, -2, -1);
apfl_tostring(ctx, -1);
struct apfl_string_view replacement = apfl_get_string(ctx, -1);
if (!apfl_string_builder_append(sb, replacement)) {
apfl_raise_alloc_error(ctx);
}
offset = ovector[1];
}
if (!apfl_string_builder_append(sb, apfl_string_view_substr(
subject,
offset,
subject.len - offset
))) {
apfl_raise_alloc_error(ctx);
}
struct apfl_string str = apfl_string_builder_move_string(sb);
if (!apfl_move_string_onto_stack(ctx, str)) {
apfl_raise_alloc_error(ctx);
}
}
static void
match_all(apfl_ctx ctx)
{
if (apfl_len(ctx, 0) != 3) {
apfl_raise_const_error(ctx, "match-all expects exactly 3 arguments");
}
apfl_get_list_member_by_index(ctx, 0, 0);
pcre2_code *code = get_unclosed_code(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 1);
struct apfl_string_view subject = apfl_get_string(ctx, -1);
apfl_get_list_member_by_index(ctx, 0, 2);
PCRE2_SIZE offset = (PCRE2_SIZE)apfl_get_number(ctx, -1);
apfl_drop(ctx, 0);
apfl_list_create(ctx, 0);
pcre2_match_data *md = create_md(ctx, code);
struct iter_match_patterninfo patterninfo = prepare_iter_match(code);
int rc;
for (
int i = 0;
(rc = iter_match(ctx, subject, code, md, offset, patterninfo, i)) >= 0;
i++
) {
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
build_matches_list(ctx, subject, rc, ovector);
apfl_list_append(ctx, -2, -1);
}
}
static void
add_sym_to_mod(apfl_ctx ctx, apfl_cfunc sym, const char *name)
{
sym(ctx);
apfl_push_const_string(ctx, name);
apfl_dict_set(ctx, -3, -1, -2);
}
static void
add_func_to_mod(apfl_ctx ctx, apfl_cfunc func, const char *name)
{
apfl_push_cfunc(ctx, func, 0);
apfl_push_const_string(ctx, name);
apfl_set_func_name(ctx, -2, -1);
apfl_push_const_string(ctx, name);
apfl_dict_set(ctx, -3, -1, -2);
}
void
apfl_module_re(apfl_ctx ctx)
{
struct contexts_for_pcre *contexts = apfl_push_native_object(ctx, &contexts_type);
contexts->gcontext = NULL;
contexts->ccontext = NULL;
if ((contexts->gcontext = pcre2_general_context_create(gcontext_malloc, gcontext_free, ctx)) == NULL) {
apfl_raise_alloc_error(ctx);
}
if ((contexts->ccontext = pcre2_compile_context_create(contexts->gcontext)) == NULL) {
apfl_raise_alloc_error(ctx);
}
apfl_registry_set(ctx, &contexts_registry_key, 0, -1);
apfl_dict_create(ctx);
add_sym_to_mod(ctx, sym_caseless, "CASELESS");
add_sym_to_mod(ctx, sym_dotall, "DOTALL");
add_sym_to_mod(ctx, sym_extended, "EXTENDED");
add_sym_to_mod(ctx, sym_multiline, "MULTILINE");
add_sym_to_mod(ctx, sym_never_utf, "NEVER_UTF");
add_sym_to_mod(ctx, sym_ungreedy, "UNGREEDY");
add_sym_to_mod(ctx, sym_utf, "UTF");
add_func_to_mod(ctx, compile, "compile");
add_func_to_mod(ctx, close_code, "close");
add_func_to_mod(ctx, match, "match");
add_func_to_mod(ctx, replace, "replace");
add_func_to_mod(ctx, match_all, "match-all");
apfl_build_native_and_bytecode_combined_module(ctx, -1, apfl_mod_re());
}

1
webpage/.gitignore vendored
View file

@ -1,2 +1,3 @@
build/
build-native/
deps/

View file

@ -1,5 +1,8 @@
#!/bin/sh
set -e
PCRE2VER=10.42
cd playground
rm -rf build-native
mkdir build-native
@ -7,10 +10,25 @@ cd build-native
cmake ../../../CMakeLists.txt
make -j"$(nproc)" apflc
cd ..
rm -rf deps
mkdir deps
cd deps
curl -L -o pcre2.tar.bz2 "https://github.com/PCRE2Project/pcre2/releases/download/pcre2-${PCRE2VER}/pcre2-${PCRE2VER}.tar.bz2"
tar xjf pcre2.tar.bz2
cd "pcre2-${PCRE2VER}"
mkdir build
cd build
emcmake cmake -DCMAKE_INSTALL_PREFIX="/home/laria/src/apfl/webpage/playground/deps/prefix" ../CMakeLists.txt
emmake make -j"$(nproc)" pcre2-8-static
emmake make -j"$(nproc)" install
cd ../../..
rm -rf build
mkdir build
cd build
emcmake cmake -DCMAKE_C_FLAGS="-O2" -DBUILD_SHARED_LIBS=NO -DApflApflcNative_DIR="$(pwd)/../build-native/" ../../../CMakeLists.txt
emcmake cmake -DCMAKE_C_FLAGS="-O2" -DBUILD_SHARED_LIBS=NO -DApflApflcNative_DIR="$(pwd)/../build-native/" -DCMAKE_PREFIX_PATH="/home/laria/src/apfl/webpage/playground/deps/prefix" ../../../CMakeLists.txt
emmake make -j"$(nproc)" apfl
cd ..
emcc -sASYNCIFY -O3 -oplayground.js playground.c build/src/libapfl.a
emcc -sASYNCIFY `PKG_CONFIG_PATH="/home/laria/src/apfl/webpage/playground/deps/prefix/lib/pkgconfig" pkg-config --static --cflags --libs libpcre2-8` -O3 -oplayground.js playground.c build/src/libapfl.a