tokenizer: Add backtick delimited strings
This is useful when writing strings that contain a lot of backslashes, e.g. in regular expressions.
This commit is contained in:
parent
85bafd0ef9
commit
aa6346eafa
6 changed files with 121 additions and 11 deletions
|
|
@ -75,6 +75,8 @@ The following escape sequences are recognized:
|
|||
|
||||
Also, a `'` followed by a variable name evaluates to a string with the variable name as it's content. `'foo-bar` and `"foo-bar"` are equivalent.
|
||||
|
||||
Additionally a string can also be delimited by backticks (`` ` ``). Inside the backticks `\`-escapes are not interpreted. To add a plain backtick here, double it to escape it. For example, ``` `foo``bar\baz` ``` and ```"foo`bar\\baz" ``` are equivalent.
|
||||
|
||||
### Lists
|
||||
|
||||
A list is created by `[]` with optional expressions inside the brackets. The expressions can be separated by commas (`,`), newlines, semicolons (`;`) or simply whitespace.
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ scope: source.apfl
|
|||
|
||||
contexts:
|
||||
main:
|
||||
- match: '`'
|
||||
push: backtick_string
|
||||
- match: '"'
|
||||
push: string
|
||||
- match: "#.*$"
|
||||
|
|
@ -35,3 +37,9 @@ contexts:
|
|||
scope: constant.character.escape.apfl
|
||||
- match: '"'
|
||||
pop: true
|
||||
backtick_string:
|
||||
- meta_scope: string.quoted.backtick.apfl
|
||||
- match: "``"
|
||||
scope: constant.character.escape.apfl
|
||||
- match: '`(?!`)'
|
||||
pop: true
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@ match := { ~args ->
|
|||
}
|
||||
}
|
||||
|
||||
match "^f(o*)" "foooo"
|
||||
match "^f(o*)" "FoOoo"
|
||||
match "^f(o*)"::'i "FoOoo"
|
||||
match "^f(o*)" "f"
|
||||
match "^f(o*)" 1 "foooo"
|
||||
match `^f(o*)` "foooo"
|
||||
match `^f(o*)` "FoOoo"
|
||||
match `^f(o*)`::'i "FoOoo"
|
||||
match `^f(o*)` "f"
|
||||
match `^f(o*)` 1 "foooo"
|
||||
|
||||
print ""
|
||||
|
||||
|
|
@ -30,7 +30,7 @@ replace "(x+)" { _ x -> len x } ""
|
|||
replace "(x+)" { _ x -> len x } "xxx"
|
||||
replace "(x+)" { _ x -> len x } "xxxyxyxx"
|
||||
replace "(x+)" { _ x -> len x } 2 "xxxyxyxx"
|
||||
replace "\\[(\\w+)\\]" "<$1>" "[foo] [bar]"
|
||||
replace `\[(\w+)\]` "<$1>" "[foo] [bar]"
|
||||
|
||||
print ""
|
||||
|
||||
|
|
@ -41,8 +41,8 @@ match-all := { ~args ->
|
|||
}
|
||||
}
|
||||
|
||||
match-all "f(\\w+)" "afoobar fizz abcdefg f"
|
||||
match-all "^(\\w+)\\s*=\\s*(.*)$"::'m "foo= bar\nbar = 123"
|
||||
match-all `f(\w+)` "afoobar fizz abcdefg f"
|
||||
match-all `^(\w+)\s*=\s*(.*)$`::'m "foo= bar\nbar = 123"
|
||||
|
||||
===== output =====
|
||||
-- match --
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
replacement?(has type 'string) ->
|
||||
m := nil
|
||||
parts = []
|
||||
while { m = match "^(.*?)\\$(\\d)(.*)$" replacement } {
|
||||
while { m = match `^(.*?)\$(\d)(.*)$` replacement } {
|
||||
[_ head n replacement] = m
|
||||
parts = [~parts Plain::head Var::(tonumber n)]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ yield_simple_token(
|
|||
static enum apfl_parse_result comment(apfl_tokenizer_ptr);
|
||||
static enum apfl_parse_result colon(apfl_tokenizer_ptr);
|
||||
static enum apfl_parse_result string(apfl_tokenizer_ptr);
|
||||
static enum apfl_parse_result backtick_string(apfl_tokenizer_ptr);
|
||||
static enum apfl_parse_result maybe_name(apfl_tokenizer_ptr, bool, unsigned char);
|
||||
static enum apfl_parse_result number(apfl_tokenizer_ptr, unsigned, struct apfl_position, bool);
|
||||
static enum apfl_parse_result zero(apfl_tokenizer_ptr, struct apfl_position, bool);
|
||||
|
|
@ -276,6 +277,8 @@ apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need)
|
|||
return yield_simple_token(tokenizer, APFL_TOK_QUESTION_MARK, tokenizer->position);
|
||||
case '\'':
|
||||
return yield_simple_token(tokenizer, APFL_TOK_STRINGIFY, tokenizer->position);
|
||||
case '`':
|
||||
return backtick_string(tokenizer);
|
||||
case '#':
|
||||
return comment(tokenizer);
|
||||
case ':':
|
||||
|
|
@ -533,6 +536,78 @@ string(apfl_tokenizer_ptr tokenizer)
|
|||
return out;
|
||||
}
|
||||
|
||||
static enum apfl_parse_result
|
||||
inner_backtick_string(apfl_tokenizer_ptr tokenizer, struct apfl_string_builder *text)
|
||||
{
|
||||
struct apfl_position pos = tokenizer->position;
|
||||
|
||||
unsigned char byte;
|
||||
|
||||
for (;;) {
|
||||
switch (read_byte(tokenizer, &byte, true)) {
|
||||
case RR_OK:
|
||||
break;
|
||||
case RR_ERR:
|
||||
return APFL_PARSE_ERROR;
|
||||
case RR_EOF:
|
||||
tokenizer->next_mode = NM_EOF;
|
||||
tokenizer->error = (struct apfl_error) { .type = APFL_ERR_UNEXPECTED_EOF };
|
||||
return APFL_PARSE_ERROR;
|
||||
}
|
||||
|
||||
if (byte != '`') {
|
||||
if (!apfl_string_builder_append_byte(text, byte)) {
|
||||
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
||||
return APFL_PARSE_ERROR;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (read_byte(tokenizer, &byte, true)) {
|
||||
case RR_OK:
|
||||
break;
|
||||
case RR_ERR:
|
||||
return APFL_PARSE_ERROR;
|
||||
case RR_EOF:
|
||||
tokenizer->next_mode = NM_EOF;
|
||||
goto finalize;
|
||||
}
|
||||
|
||||
if (byte == '`') {
|
||||
if (!apfl_string_builder_append_byte(text, '`')) {
|
||||
tokenizer->error = apfl_error_simple(APFL_ERR_MALLOC_FAILED);
|
||||
return APFL_PARSE_ERROR;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
unread_byte(tokenizer);
|
||||
|
||||
goto finalize;
|
||||
}
|
||||
|
||||
finalize:
|
||||
tokenizer->token = (struct apfl_token) {
|
||||
.type = APFL_TOK_STRING,
|
||||
.position = pos,
|
||||
.text = apfl_string_builder_move_string(text),
|
||||
};
|
||||
return APFL_PARSE_OK;
|
||||
}
|
||||
|
||||
static enum apfl_parse_result
|
||||
backtick_string(apfl_tokenizer_ptr tokenizer)
|
||||
{
|
||||
struct apfl_string_builder text = apfl_string_builder_init(tokenizer->allocator);
|
||||
|
||||
enum apfl_parse_result out = inner_backtick_string(tokenizer, &text);
|
||||
|
||||
apfl_string_builder_deinit(&text);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
static enum apfl_parse_result
|
||||
finalize_maybe_name(
|
||||
apfl_tokenizer_ptr tokenizer,
|
||||
|
|
@ -612,6 +687,7 @@ maybe_name_inner(
|
|||
case '#':
|
||||
case ':':
|
||||
case '"':
|
||||
case '`':
|
||||
case ' ':
|
||||
case '\r':
|
||||
case '\t':
|
||||
|
|
|
|||
|
|
@ -265,8 +265,10 @@ TEST(all_tokens, t) {
|
|||
"@ . ? ~ -> = :=\n"
|
||||
// 1234567
|
||||
"({[]})\n"
|
||||
// 1234567
|
||||
": :: :="
|
||||
// 12345678
|
||||
": :: :=\n"
|
||||
// 1234567890
|
||||
"`foo``bar`"
|
||||
);
|
||||
|
||||
expect_text_token (tt, 1, 1, APFL_TOK_COMMENT, " test");
|
||||
|
|
@ -306,6 +308,8 @@ TEST(all_tokens, t) {
|
|||
expect_simple_token(tt, 7, 1, APFL_TOK_COLON);
|
||||
expect_simple_token(tt, 7, 3, APFL_TOK_DOUBLE_COLON);
|
||||
expect_simple_token(tt, 7, 6, APFL_TOK_LOCAL_ASSIGN);
|
||||
expect_simple_token(tt, 7, 8, APFL_TOK_LINEBREAK);
|
||||
expect_text_token (tt, 8, 1, APFL_TOK_STRING, "foo`bar");
|
||||
|
||||
expect_eof(tt);
|
||||
|
||||
|
|
@ -325,6 +329,25 @@ TEST(strings_with_binary_data, t) {
|
|||
destroy_tokenizer_test(tt);
|
||||
}
|
||||
|
||||
TEST(backtick_strings, t) {
|
||||
struct tokenizer_test *tt = new_tokenizer_test(
|
||||
t,
|
||||
// 1234567890123456789012345 6789 1
|
||||
"`foo`bar``baz```` `ab``c\"d'e\nf`"
|
||||
);
|
||||
|
||||
expect_text_token (tt, 1, 1, APFL_TOK_STRING, "foo");
|
||||
expect_text_token (tt, 1, 6, APFL_TOK_NAME, "bar");
|
||||
expect_text_token (tt, 1, 9, APFL_TOK_STRING, "");
|
||||
expect_text_token (tt, 1, 11, APFL_TOK_NAME, "baz");
|
||||
expect_text_token (tt, 1, 14, APFL_TOK_STRING, "`");
|
||||
expect_text_token (tt, 1, 19, APFL_TOK_STRING, "ab`c\"d'e\nf");
|
||||
|
||||
expect_eof(tt);
|
||||
|
||||
destroy_tokenizer_test(tt);
|
||||
}
|
||||
|
||||
TEST(err_invalid_bytes, t) {
|
||||
struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
|
||||
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
||||
|
|
@ -345,5 +368,6 @@ TESTS_BEGIN
|
|||
ADDTEST(assignment),
|
||||
ADDTEST(all_tokens),
|
||||
ADDTEST(strings_with_binary_data),
|
||||
ADDTEST(backtick_strings),
|
||||
ADDTEST(err_invalid_bytes),
|
||||
TESTS_END
|
||||
|
|
|
|||
Loading…
Reference in a new issue