LibURL+LibWeb: Pass a mutable reference URL to URL parser

If given, the spec expects the input URL to be manipulated on the fly
as it is being parsed, and may ignore any errors thrown by the URL
parser.

Previously, we were not exactly following the specs assumption here
which resulted in us needed to make awkward copies of the URL in these
situations.

For most cases this is not an issue. But it does cause problems for
situations where URL parsing would result in a failure (which is
ignored by the caller), and the URL is _partially_ updated
while parsing.

Such a situation can occur when setting the host of an href alongside a
port number which is not valid. It is expected that this situation will
result in the host being updates - but not the port number.

Adjust the URL parser API so that it mutates the URL given (if any), and
adjust the callers accordingly.

Fixes two tests on https://wpt.live/url/url-setters-a-area.window.html
This commit is contained in:
Shannon Booth 2024-08-13 19:18:50 +12:00 committed by Andreas Kling
parent 00f75648e5
commit ff71d8f2c9
Notes: github-actions[bot] 2024-08-13 12:39:08 +00:00
7 changed files with 44 additions and 65 deletions

View file

@ -0,0 +1,2 @@
http://example.net/path
http://example.com/path

View file

@ -0,0 +1,10 @@
<a id="id" href="http://example.net/path">
<script src="../include.js"></script>
<script>
test(() => {
let a = document.getElementById('id');
println(a.href);
a.host = "example.com:65536";
println(a.href);
})
</script>

View file

@ -817,7 +817,7 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override, Optional<StringView> encoding)
URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL* url, Optional<State> state_override, Optional<StringView> encoding)
{
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input);
@ -825,9 +825,10 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
size_t end_index = raw_input.length();
// 1. If url is not given:
if (!url.has_value()) {
auto url_buffer = URL();
if (!url) {
// 1. Set url to a new URL.
url = URL();
url = &url_buffer;
// 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error.
// 3. Remove any leading and trailing C0 control or space from input.
@ -1762,7 +1763,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, Opt
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsed URL to be '{}'.", url->serialize());
// 10. Return url.
return url.release_value();
return *url;
}
}

View file

@ -58,7 +58,7 @@ public:
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, Optional<URL> url = {}, Optional<State> state_override = {}, Optional<StringView> encoding = {});
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);

View file

@ -237,9 +237,7 @@ WebIDL::ExceptionOr<void> DOMURL::set_protocol(String const& protocol)
// The protocol setter steps are to basic URL parse the given value, followed by U+003A (:), with thiss URL as
// url and scheme start state as state override.
auto result_url = URL::Parser::basic_parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, m_url, URL::Parser::State::SchemeStart);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, &m_url, URL::Parser::State::SchemeStart);
return {};
}
@ -307,9 +305,7 @@ void DOMURL::set_host(String const& host)
return;
// 2. Basic URL parse the given value with thiss URL as url and host state as state override.
auto result_url = URL::Parser::basic_parse(host, {}, m_url, URL::Parser::State::Host);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(host, {}, &m_url, URL::Parser::State::Host);
}
// https://url.spec.whatwg.org/#dom-url-hostname
@ -333,9 +329,7 @@ void DOMURL::set_hostname(String const& hostname)
return;
// 2. Basic URL parse the given value with thiss URL as url and hostname state as state override.
auto result_url = URL::Parser::basic_parse(hostname, {}, m_url, URL::Parser::State::Hostname);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(hostname, {}, &m_url, URL::Parser::State::Hostname);
}
// https://url.spec.whatwg.org/#dom-url-port
@ -364,9 +358,7 @@ void DOMURL::set_port(String const& port)
}
// 3. Otherwise, basic URL parse the given value with thiss URL as url and port state as state override.
else {
auto result_url = URL::Parser::basic_parse(port, {}, m_url, URL::Parser::State::Port);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(port, {}, &m_url, URL::Parser::State::Port);
}
}
@ -386,13 +378,10 @@ void DOMURL::set_pathname(String const& pathname)
return;
// 2. Empty thiss URLs path.
auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the path change if the parse failed.
url.set_paths({});
m_url.set_paths({});
// 3. Basic URL parse the given value with thiss URL as url and path start state as state override.
auto result_url = URL::Parser::basic_parse(pathname, {}, move(url), URL::Parser::State::PathStart);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(pathname, {}, &m_url, URL::Parser::State::PathStart);
}
// https://url.spec.whatwg.org/#dom-url-search
@ -434,17 +423,13 @@ void DOMURL::set_search(String const& search)
auto input = search_as_string_view.substring_view(search_as_string_view.starts_with('?'));
// 4. Set urls query to the empty string.
auto url_copy = url; // We copy the URL here to follow other browser's behavior of reverting the search change if the parse failed.
url_copy.set_query(String {});
url.set_query(String {});
// 5. Basic URL parse input with url as url and query state as state override.
auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Query);
if (result_url.is_valid()) {
m_url = move(result_url);
(void)URL::Parser::basic_parse(input, {}, &url, URL::Parser::State::Query);
// 6. Set thiss query objects list to the result of parsing input.
m_query->m_list = url_decode(input);
}
// 6. Set thiss query objects list to the result of parsing input.
m_query->m_list = url_decode(input);
}
// https://url.spec.whatwg.org/#dom-url-searchparams
@ -487,13 +472,10 @@ void DOMURL::set_hash(String const& hash)
auto input = hash_as_string_view.substring_view(hash_as_string_view.starts_with('#'));
// 3. Set thiss URLs fragment to the empty string.
auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the hash change if the parse failed.
url.set_fragment(String {});
m_url.set_fragment(String {});
// 4. Basic URL parse input with thiss URL as url and fragment state as state override.
auto result_url = URL::Parser::basic_parse(input, {}, move(url), URL::Parser::State::Fragment);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
}
// https://url.spec.whatwg.org/#concept-url-origin

View file

@ -78,9 +78,7 @@ void HTMLHyperlinkElementUtils::set_protocol(StringView protocol)
return;
// 3. Basic URL parse the given value, followed by ":", with this element's url as url and scheme start state as state override.
auto result_url = URL::Parser::basic_parse(MUST(String::formatted("{}:", protocol)), {}, m_url, URL::Parser::State::SchemeStart);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(MUST(String::formatted("{}:", protocol)), {}, &m_url.value(), URL::Parser::State::SchemeStart);
// 4. Update href.
update_href();
@ -192,9 +190,7 @@ void HTMLHyperlinkElementUtils::set_host(StringView host)
return;
// 4. Basic URL parse the given value, with url as url and host state as state override.
auto result_url = URL::Parser::basic_parse(host, {}, url, URL::Parser::State::Host);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(host, {}, &url.value(), URL::Parser::State::Host);
// 5. Update href.
update_href();
@ -228,9 +224,7 @@ void HTMLHyperlinkElementUtils::set_hostname(StringView hostname)
return;
// 4. Basic URL parse the given value, with url as url and hostname state as state override.
auto result_url = URL::Parser::basic_parse(hostname, {}, m_url, URL::Parser::State::Hostname);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(hostname, {}, &url.value(), URL::Parser::State::Hostname);
// 5. Update href.
update_href();
@ -268,11 +262,10 @@ void HTMLHyperlinkElementUtils::set_port(StringView port)
// 4. If the given value is the empty string, then set url's port to null.
if (port.is_empty()) {
m_url->set_port({});
} else {
// 5. Otherwise, basic URL parse the given value, with url as url and port state as state override.
auto result_url = URL::Parser::basic_parse(port, {}, m_url, URL::Parser::State::Port);
if (result_url.is_valid())
m_url = move(result_url);
}
// 5. Otherwise, basic URL parse the given value, with url as url and port state as state override.
else {
(void)URL::Parser::basic_parse(port, {}, &m_url.value(), URL::Parser::State::Port);
}
// 6. Update href.
@ -308,13 +301,10 @@ void HTMLHyperlinkElementUtils::set_pathname(StringView pathname)
return;
// 4. Set url's path to the empty list.
auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the path change if the parse failed.
url->set_paths({});
m_url->set_paths({});
// 5. Basic URL parse the given value, with url as url and path start state as state override.
auto result_url = URL::Parser::basic_parse(pathname, {}, move(url), URL::Parser::State::PathStart);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(pathname, {}, &m_url.value(), URL::Parser::State::PathStart);
// 6. Update href.
update_href();
@ -355,13 +345,10 @@ void HTMLHyperlinkElementUtils::set_search(StringView search)
auto input = search.substring_view(search.starts_with('?'));
// 2. Set url's query to the empty string.
auto url_copy = m_url; // We copy the URL here to follow other browser's behavior of reverting the search change if the parse failed.
url_copy->set_query(String {});
m_url->set_query(String {});
// 3. Basic URL parse input, with null, this element's node document's document's character encoding, url as url, and query state as state override.
auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Query);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(input, {}, &m_url.value(), URL::Parser::State::Query);
}
// 6. Update href.
@ -403,13 +390,10 @@ void HTMLHyperlinkElementUtils::set_hash(StringView hash)
auto input = hash.substring_view(hash.starts_with('#'));
// 2. Set url's fragment to the empty string.
auto url_copy = m_url; // We copy the URL here to follow other browser's behavior of reverting the hash change if the parse failed.
url_copy->set_fragment(String {});
m_url->set_fragment(String {});
// 3. Basic URL parse input, with url as url and fragment state as state override.
auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Fragment);
if (result_url.is_valid())
m_url = move(result_url);
(void)URL::Parser::basic_parse(input, {}, &m_url.value(), URL::Parser::State::Fragment);
}
// 6. Update href.

View file

@ -333,7 +333,7 @@ WebIDL::ExceptionOr<void> Location::set_hash(String const& value)
copy_url.set_fragment(String {});
// 6. Basic URL parse input, with copyURL as url and fragment state as state override.
copy_url = URL::Parser::basic_parse(input, {}, copy_url, URL::Parser::State::Fragment);
(void)URL::Parser::basic_parse(input, {}, &copy_url, URL::Parser::State::Fragment);
// 7. If copyURL's fragment is this's url's fragment, then return.
if (copy_url.fragment() == this->url().fragment())