LibRegex+LibJS: Avoid searching for more than one match in JS RegExps

All of JS's regular expression APIs only want a single match, so avoid
trying to produce more (which will be discarded anyway).
This commit is contained in:
Ali Mohammad Pur 2022-02-04 19:29:26 +03:30 committed by Andreas Kling
parent 4c506f91fe
commit 2b028f6faa
Notes: sideshowbarker 2024-07-17 19:47:24 +09:00
5 changed files with 28 additions and 5 deletions

View file

@ -990,3 +990,15 @@ TEST_CASE(negative_lookahead)
EXPECT_EQ(re.match(":foobar").success, true);
}
}
TEST_CASE(single_match_flag)
{
{
// Ensure that only a single match is produced and nothing past that.
Regex<ECMA262> re("[\\u0008-\\uffff]"sv, ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
auto result = re.match("ABC");
EXPECT_EQ(result.success, true);
EXPECT_EQ(result.matches.size(), 1u);
EXPECT_EQ(result.matches.first().view.to_string(), "A"sv);
}
}

View file

@ -81,10 +81,11 @@ enum __RegexAllFlags {
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
__Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
__Regex_Internal_BrowserExtended = __Regex_Global << 15, // Internal flag; enable browser-specific ECMA262 extensions.
__Regex_Internal_ConsiderNewline = __Regex_Global << 16, // Internal flag; allow matchers to consider newlines as line separators.
__Regex_Last = __Regex_SkipTrimEmptyMatches
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
__Regex_Last = __Regex_SingleMatch
};
// Values for the cflags parameter to the regcomp() function:

View file

@ -25,7 +25,12 @@ class RegExpObject : public Object {
public:
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
// FIXME: Enable 'BrowserExtended' only if in a browser context.
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags { (regex::ECMAScriptFlags)regex::AllFlags::Global | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended };
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
| (regex::ECMAScriptFlags)regex::AllFlags::Global
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
| regex::ECMAScriptFlags::BrowserExtended
};
static RegExpObject* create(GlobalObject&);
static RegExpObject* create(GlobalObject&, Regex<ECMA262> regex, String pattern, String flags);

View file

@ -183,6 +183,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
continue_search = false;
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
for (auto const& view : views) {
if (lines_to_skip != 0) {
++input.line;
@ -276,6 +278,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
bool has_zero_length = state.string_position == view_index;
view_index = state.string_position - (has_zero_length ? 0 : 1);
if (single_match_only)
break;
continue;
}
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {

View file

@ -33,6 +33,7 @@ enum class AllFlags {
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.