mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-09-29 08:11:13 +00:00
LibRegex+LibJS: Avoid searching for more than one match in JS RegExps
All of JS's regular expression APIs only want a single match, so avoid trying to produce more (which will be discarded anyway).
This commit is contained in:
parent
4c506f91fe
commit
2b028f6faa
Notes:
sideshowbarker
2024-07-17 19:47:24 +09:00
Author: https://github.com/alimpfard Commit: https://github.com/SerenityOS/serenity/commit/2b028f6faa3 Pull-request: https://github.com/SerenityOS/serenity/pull/12287
|
@ -990,3 +990,15 @@ TEST_CASE(negative_lookahead)
|
||||||
EXPECT_EQ(re.match(":foobar").success, true);
|
EXPECT_EQ(re.match(":foobar").success, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(single_match_flag)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
// Ensure that only a single match is produced and nothing past that.
|
||||||
|
Regex<ECMA262> re("[\\u0008-\\uffff]"sv, ECMAScriptFlags::Global | (ECMAScriptFlags)regex::AllFlags::SingleMatch);
|
||||||
|
auto result = re.match("ABC");
|
||||||
|
EXPECT_EQ(result.success, true);
|
||||||
|
EXPECT_EQ(result.matches.size(), 1u);
|
||||||
|
EXPECT_EQ(result.matches.first().view.to_string(), "A"sv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -81,10 +81,11 @@ enum __RegexAllFlags {
|
||||||
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
||||||
__Regex_Internal_Stateful = __Regex_Global << 14, // Internal flag; enables stateful matches.
|
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
||||||
__Regex_Internal_BrowserExtended = __Regex_Global << 15, // Internal flag; enable browser-specific ECMA262 extensions.
|
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
|
||||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 16, // Internal flag; allow matchers to consider newlines as line separators.
|
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||||
__Regex_Last = __Regex_SkipTrimEmptyMatches
|
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
|
||||||
|
__Regex_Last = __Regex_SingleMatch
|
||||||
};
|
};
|
||||||
|
|
||||||
// Values for the cflags parameter to the regcomp() function:
|
// Values for the cflags parameter to the regcomp() function:
|
||||||
|
|
|
@ -25,7 +25,12 @@ class RegExpObject : public Object {
|
||||||
public:
|
public:
|
||||||
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
// JS regexps are all 'global' by default as per our definition, but the "global" flag enables "stateful".
|
||||||
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
// FIXME: Enable 'BrowserExtended' only if in a browser context.
|
||||||
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags { (regex::ECMAScriptFlags)regex::AllFlags::Global | (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches | regex::ECMAScriptFlags::BrowserExtended };
|
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
|
||||||
|
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||||
|
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||||
|
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
||||||
|
| regex::ECMAScriptFlags::BrowserExtended
|
||||||
|
};
|
||||||
|
|
||||||
static RegExpObject* create(GlobalObject&);
|
static RegExpObject* create(GlobalObject&);
|
||||||
static RegExpObject* create(GlobalObject&, Regex<ECMA262> regex, String pattern, String flags);
|
static RegExpObject* create(GlobalObject&, Regex<ECMA262> regex, String pattern, String flags);
|
||||||
|
|
|
@ -183,6 +183,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||||
continue_search = false;
|
continue_search = false;
|
||||||
|
|
||||||
|
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
|
||||||
|
|
||||||
for (auto const& view : views) {
|
for (auto const& view : views) {
|
||||||
if (lines_to_skip != 0) {
|
if (lines_to_skip != 0) {
|
||||||
++input.line;
|
++input.line;
|
||||||
|
@ -276,6 +278,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
||||||
|
|
||||||
bool has_zero_length = state.string_position == view_index;
|
bool has_zero_length = state.string_position == view_index;
|
||||||
view_index = state.string_position - (has_zero_length ? 0 : 1);
|
view_index = state.string_position - (has_zero_length ? 0 : 1);
|
||||||
|
if (single_match_only)
|
||||||
|
break;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
|
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
|
||||||
|
|
|
@ -33,6 +33,7 @@ enum class AllFlags {
|
||||||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||||
|
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
||||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||||
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
|
Internal_BrowserExtended = __Regex_Internal_BrowserExtended, // Only for ECMA262, Enable the behaviors defined in section B.1.4. of the ECMA262 spec.
|
||||||
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
|
Internal_ConsiderNewline = __Regex_Internal_ConsiderNewline, // Only for ECMA262, Allow multiline matches to consider newlines as line boundaries.
|
||||||
|
|
Loading…
Reference in a new issue