Tokenizer: Disallow ASCII control characters outside strings
This commit is contained in:
parent
4eea93ff97
commit
6439f4f8ce
4 changed files with 66 additions and 2 deletions
|
|
@ -142,6 +142,7 @@ enum apfl_error_type {
|
|||
APFL_ERR_INPUT_ERROR,
|
||||
APFL_ERR_UNEXPECTED_EOF,
|
||||
APFL_ERR_EXPECTED_EQ_AFTER_COLON,
|
||||
APFL_ERR_UNEXPECTED_BYTE,
|
||||
APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER,
|
||||
APFL_ERR_EXPECTED_DIGIT,
|
||||
APFL_ERR_EXPECTED_HEX_IN_HEX_ESCAPE,
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ apfl_error_type_name(enum apfl_error_type type)
|
|||
return "APFL_ERR_UNEXPECTED_EOF";
|
||||
case APFL_ERR_EXPECTED_EQ_AFTER_COLON:
|
||||
return "APFL_ERR_EXPECTED_EQ_AFTER_COLON";
|
||||
case APFL_ERR_UNEXPECTED_BYTE:
|
||||
return "APFL_ERR_UNEXPECTED_BYTE";
|
||||
case APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER:
|
||||
return "APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER";
|
||||
case APFL_ERR_EXPECTED_DIGIT:
|
||||
|
|
@ -67,6 +69,9 @@ apfl_error_print(struct apfl_error error, FILE *file)
|
|||
case APFL_ERR_EXPECTED_EQ_AFTER_COLON:
|
||||
fprintf(file, "Expected '=' after ':' at " POSFMT "\n", POSARGS);
|
||||
return;
|
||||
case APFL_ERR_UNEXPECTED_BYTE:
|
||||
fprintf(file, "Unexpected byte '%c' (0x%X) at " POSFMT "\n", error.byte, (unsigned)error.byte, POSARGS);
|
||||
return;
|
||||
case APFL_ERR_UNEXPECTED_BYTE_IN_NUMBER:
|
||||
fprintf(file, "Unexpected byte '%c' while parsing number at " POSFMT "\n", error.byte, POSARGS);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -158,6 +158,12 @@ static enum apfl_parse_result string(apfl_tokenizer_ptr);
|
|||
static enum apfl_parse_result maybe_name(apfl_tokenizer_ptr, bool, char);
|
||||
static enum apfl_parse_result number(apfl_tokenizer_ptr, bool, struct apfl_position, char, bool);
|
||||
|
||||
static bool
|
||||
is_control_byte(unsigned char byte)
|
||||
{
|
||||
return byte < 0x20 || byte == 0x7F;
|
||||
}
|
||||
|
||||
enum apfl_parse_result
|
||||
apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need)
|
||||
{
|
||||
|
|
@ -233,10 +239,19 @@ apfl_tokenizer_next(apfl_tokenizer_ptr tokenizer, bool need)
|
|||
// Skip whitespace
|
||||
break;
|
||||
default:
|
||||
if (isdigit(byte))
|
||||
if (is_control_byte(byte)) {
|
||||
// Disallow ASCII control characters here
|
||||
tokenizer->error = (struct apfl_error) {
|
||||
.type = APFL_ERR_UNEXPECTED_BYTE,
|
||||
.position = tokenizer->position,
|
||||
.byte = byte,
|
||||
};
|
||||
return APFL_PARSE_ERROR;
|
||||
} else if (isdigit(byte)) {
|
||||
return number(tokenizer, need, tokenizer->position, byte, false);
|
||||
else
|
||||
} else {
|
||||
return maybe_name(tokenizer, need, byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -689,6 +704,12 @@ maybe_name_inner(
|
|||
|
||||
break;
|
||||
default:
|
||||
if (is_control_byte(byte)) {
|
||||
// Disallow ASCII control characters in names
|
||||
unread_byte(tokenizer, last_pos);
|
||||
return finalize_maybe_name(tokenizer, text, pos);
|
||||
}
|
||||
|
||||
if (isdigit(byte) && last_byte == '-') {
|
||||
text->len--; // This removes the '-' from the end of text
|
||||
|
||||
|
|
|
|||
|
|
@ -147,6 +147,30 @@ expect_number_token(struct tokenizer_test *tt, int line, int col, enum apfl_toke
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
expect_error(struct tokenizer_test *tt, enum apfl_error_type want)
|
||||
{
|
||||
struct apfl_token tok;
|
||||
|
||||
switch (apfl_tokenizer_next(tt->tokenizer, false)) {
|
||||
case APFL_PARSE_OK:
|
||||
tok = apfl_tokenizer_get_token(tt->tokenizer);
|
||||
test_failf(tt->t, "Expected error, got token of type %s instead", apfl_token_type_name(tok.type));
|
||||
apfl_token_deinit(&tok);
|
||||
return;
|
||||
case APFL_PARSE_EOF:
|
||||
test_fatalf(tt->t, "Got an EOF instead of a token");
|
||||
break;
|
||||
case APFL_PARSE_ERROR:
|
||||
break;
|
||||
}
|
||||
|
||||
struct apfl_error have = apfl_tokenizer_get_error(tt->tokenizer);
|
||||
if (have.type != want) {
|
||||
test_failf(tt->t, "Expected error of type %s, got %s instead", apfl_error_type_name(want), apfl_error_type_name(have.type));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(empty, t) {
|
||||
struct tokenizer_test *tt = new_tokenizer_test(t, "");
|
||||
|
||||
|
|
@ -283,6 +307,18 @@ TEST(strings_with_binary_data, t) {
|
|||
destroy_tokenizer_test(tt);
|
||||
}
|
||||
|
||||
TEST(err_invalid_bytes, t) {
|
||||
struct tokenizer_test *tt = new_tokenizer_test(t, "\x05" "foo\x01_bar\x7F" "baz");
|
||||
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
||||
expect_text_token(tt, 1, 2, APFL_TOK_NAME, "foo");
|
||||
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
||||
expect_text_token(tt, 1, 6, APFL_TOK_NAME, "_bar");
|
||||
expect_error(tt, APFL_ERR_UNEXPECTED_BYTE);
|
||||
expect_text_token(tt, 1, 11, APFL_TOK_NAME, "baz");
|
||||
expect_eof(tt);
|
||||
destroy_tokenizer_test(tt);
|
||||
}
|
||||
|
||||
TESTS_BEGIN
|
||||
ADDTEST(empty),
|
||||
ADDTEST(simple_variable),
|
||||
|
|
@ -291,4 +327,5 @@ TESTS_BEGIN
|
|||
ADDTEST(assignment),
|
||||
ADDTEST(all_tokens),
|
||||
ADDTEST(strings_with_binary_data),
|
||||
ADDTEST(err_invalid_bytes),
|
||||
TESTS_END
|
||||
|
|
|
|||
Loading…
Reference in a new issue