LibRegex: Fix greedy/reluctant modifiers in PosixExtendedParser

Also fixes the issue with assertions causing early termination when
they fail.
This commit is contained in:
AnotherTest 2020-11-19 18:57:39 +03:30 committed by Andreas Kling
parent 45e5661296
commit 92ea9ed4a5
Notes: sideshowbarker 2024-07-19 01:14:53 +09:00
5 changed files with 42 additions and 26 deletions

View file

@ -188,26 +188,26 @@ ALWAYS_INLINE ExecutionResult OpCode_ForkStay::execute(const MatchInput&, MatchS
ALWAYS_INLINE ExecutionResult OpCode_CheckBegin::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
{
if (0 == state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
return ExecutionResult::Failed;
return ExecutionResult::Failed_ExecuteLowPrioForks;
if ((0 == state.string_position && !(input.regex_options & AllFlags::MatchNotBeginOfLine))
|| (0 != state.string_position && (input.regex_options & AllFlags::MatchNotBeginOfLine))
|| (0 == state.string_position && (input.regex_options & AllFlags::Global)))
return ExecutionResult::Continue;
return ExecutionResult::Failed;
return ExecutionResult::Failed_ExecuteLowPrioForks;
}
ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(const MatchInput& input, MatchState& state, MatchOutput&) const
{
if (state.string_position == input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine))
return ExecutionResult::Failed;
return ExecutionResult::Failed_ExecuteLowPrioForks;
if ((state.string_position == input.view.length() && !(input.regex_options & AllFlags::MatchNotEndOfLine))
|| (state.string_position != input.view.length() && (input.regex_options & AllFlags::MatchNotEndOfLine || input.regex_options & AllFlags::MatchNotBeginOfLine)))
return ExecutionResult::Succeeded;
return ExecutionResult::Continue;
return ExecutionResult::Failed;
return ExecutionResult::Failed_ExecuteLowPrioForks;
}
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(const MatchInput& input, MatchState& state, MatchOutput& output) const

View file

@ -204,7 +204,7 @@ public:
void insert_bytecode_alternation(ByteCode&& left, ByteCode&& right)
{
// FORKSTAY _ALT
// FORKJUMP _ALT
// REGEXP ALT1
// JUMP _END
// LABEL _ALT
@ -266,12 +266,12 @@ public:
{
// LABEL _START = -bytecode_to_repeat.size()
// REGEXP
// FORKJUMP _START (FORKSTAY -> Greedy)
// FORKSTAY _START (FORKJUMP -> Greedy)
if (greedy)
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
else
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
else
bytecode_to_repeat.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
bytecode_to_repeat.empend(-(bytecode_to_repeat.size() + 1)); // Jump to the _START label
}
@ -279,7 +279,7 @@ public:
void insert_bytecode_repetition_any(ByteCode& bytecode_to_repeat, bool greedy)
{
// LABEL _START
// FORKSTAY _END (FORKJUMP -> Greedy)
// FORKJUMP _END (FORKSTAY -> Greedy)
// REGEXP
// JUMP _START
// LABEL _END
@ -288,9 +288,9 @@ public:
ByteCode bytecode;
if (greedy)
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
else
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
else
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
bytecode.empend(bytecode_to_repeat.size() + 2); // Jump to the _END label
@ -306,15 +306,15 @@ public:
void insert_bytecode_repetition_zero_or_one(ByteCode& bytecode_to_repeat, bool greedy)
{
// FORKSTAY _END (FORKJUMP -> Greedy)
// FORKJUMP _END (FORKSTAY -> Greedy)
// REGEXP
// LABEL _END
ByteCode bytecode;
if (greedy)
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
else
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkStay));
else
bytecode.empend(static_cast<ByteCodeValueType>(OpCodeId::ForkJump));
bytecode.empend(bytecode_to_repeat.size()); // Jump to the _END label

View file

@ -202,23 +202,23 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
} else if (match(TokenType::Plus)) {
consume();
bool greedy = match(TokenType::Questionmark);
if (greedy)
bool nongreedy = match(TokenType::Questionmark);
if (nongreedy)
consume();
// Note: dont touch match_length_minimum, it's already correct
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, greedy);
bytecode_to_repeat.insert_bytecode_repetition_min_one(bytecode_to_repeat, !nongreedy);
return !has_error();
} else if (match(TokenType::Asterisk)) {
consume();
match_length_minimum = 0;
bool greedy = match(TokenType::Questionmark);
if (greedy)
bool nongreedy = match(TokenType::Questionmark);
if (nongreedy)
consume();
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, greedy);
bytecode_to_repeat.insert_bytecode_repetition_any(bytecode_to_repeat, !nongreedy);
return !has_error();
@ -226,11 +226,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_repetition_symbol(ByteCode& byteco
consume();
match_length_minimum = 0;
bool greedy = match(TokenType::Questionmark);
if (greedy)
bool nongreedy = match(TokenType::Questionmark);
if (nongreedy)
consume();
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, greedy);
bytecode_to_repeat.insert_bytecode_repetition_zero_or_one(bytecode_to_repeat, !nongreedy);
return !has_error();
}

View file

@ -121,9 +121,15 @@ protected:
class PosixExtendedParser final : public Parser {
public:
explicit PosixExtendedParser(Lexer& lexer)
: Parser(lexer) {};
: Parser(lexer)
{
}
PosixExtendedParser(Lexer& lexer, Optional<typename ParserTraits<PosixExtendedParser>::OptionsType> regex_options)
: Parser(lexer, regex_options.value_or({})) {};
: Parser(lexer, regex_options.value_or({}))
{
}
~PosixExtendedParser() = default;
private:

View file

@ -334,6 +334,16 @@ TEST_CASE(match_all_character_class)
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
}
TEST_CASE(match_character_class_with_assertion)
{
Regex<PosixExtended> re("[[:alpha:]]+$");
String str = "abcdef";
RegexResult result = match(str, re);
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.count, 1u);
}
TEST_CASE(example_for_git_commit)
{
Regex<PosixExtended> re("^.*$");