LibURL: Allow inputs containing only whitespace

The check for:

```
    if (start_index >= end_index)
        return {};
```

To prevent an out of bounds when trimming the start and end of the input
of whitespace was preventing valid URLs (only having whitespace in the
input) from being parsed.

Instead, prevent start_index from ever getting above end_index in the
first place, and don't treat empty inputs as an error.

Fixes one WPT test on:

https://wpt.live/url/url-constructor.any.html
This commit is contained in:
Shannon Booth 2024-08-06 02:00:52 +12:00 committed by Tim Ledbetter
parent 4f5af3e90e
commit d6af5bf5eb
Notes: github-actions[bot] 2024-08-05 16:22:19 +00:00
3 changed files with 30 additions and 16 deletions

View file

@ -108,6 +108,16 @@ port => '9000'
pathname => '/path'
search => '?query'
hash => '#frag'
new URL(' \t', 'http://ladybird.org/foo/bar')
protocol => 'http:'
username => ''
password => ''
host => 'ladybird.org'
hostname => 'ladybird.org'
port => ''
pathname => '/foo/bar'
search => ''
hash => ''
=========================================
URL.parse('ftp://serenityos.org:21', undefined)
protocol => 'ftp:'
@ -219,3 +229,13 @@ port => '9000'
pathname => '/path'
search => '?query'
hash => '#frag'
URL.parse(' \t', 'http://ladybird.org/foo/bar')
protocol => 'http:'
username => ''
password => ''
host => 'ladybird.org'
hostname => 'ladybird.org'
port => ''
pathname => '/foo/bar'
search => ''
hash => ''

View file

@ -32,6 +32,7 @@
{ input: 'file://a%C2%ADb/p' },
{ input: 'http://user%20name:pa%40ss%3Aword@www.ladybird.org' },
{ input: 'h\tt\nt\rp://h\to\ns\rt:9\t0\n0\r0/p\ta\nt\rh?q\tu\ne\rry#f\tr\na\rg' },
{ input: ' \t', base: 'http://ladybird.org/foo/bar' },
];
for (url of urls) {

View file

@ -808,29 +808,22 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
// 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error.
// 3. Remove any leading and trailing C0 control or space from input.
bool has_validation_error = false;
for (size_t i = 0; i < raw_input.length(); ++i) {
u8 ch = raw_input[i];
if (is_ascii_c0_control_or_space(ch)) {
++start_index;
has_validation_error = true;
} else {
for (; start_index < raw_input.length(); ++start_index) {
if (!is_ascii_c0_control_or_space(raw_input[start_index]))
break;
}
has_validation_error = true;
}
for (ssize_t i = raw_input.length() - 1; i >= 0; --i) {
u8 ch = raw_input[i];
if (is_ascii_c0_control_or_space(ch)) {
--end_index;
has_validation_error = true;
} else {
for (; end_index > start_index; --end_index) {
if (!is_ascii_c0_control_or_space(raw_input[end_index - 1]))
break;
}
has_validation_error = true;
}
if (has_validation_error)
report_validation_error();
}
if (start_index >= end_index)
return {};
ByteString processed_input = raw_input.substring_view(start_index, end_index - start_index);