From ff71d8f2c97441bff5975c117a7e7c8820e33e44 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Tue, 13 Aug 2024 19:18:50 +1200 Subject: [PATCH] LibURL+LibWeb: Pass a mutable reference URL to URL parser If given, the spec expects the input URL to be manipulated on the fly as it is being parsed, and may ignore any errors thrown by the URL parser. Previously, we were not exactly following the specs assumption here which resulted in us needed to make awkward copies of the URL in these situations. For most cases this is not an issue. But it does cause problems for situations where URL parsing would result in a failure (which is ignored by the caller), and the URL is _partially_ updated while parsing. Such a situation can occur when setting the host of an href alongside a port number which is not valid. It is expected that this situation will result in the host being updates - but not the port number. Adjust the URL parser API so that it mutates the URL given (if any), and adjust the callers accordingly. Fixes two tests on https://wpt.live/url/url-setters-a-area.window.html --- ...f-with-valid-hostname-but-invalid-port.txt | 2 + ...-with-valid-hostname-but-invalid-port.html | 10 +++++ Userland/Libraries/LibURL/Parser.cpp | 9 ++-- Userland/Libraries/LibURL/Parser.h | 2 +- Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp | 42 ++++++------------- .../LibWeb/HTML/HTMLHyperlinkElementUtils.cpp | 42 ++++++------------- Userland/Libraries/LibWeb/HTML/Location.cpp | 2 +- 7 files changed, 44 insertions(+), 65 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/URL/set-href-with-valid-hostname-but-invalid-port.txt create mode 100644 Tests/LibWeb/Text/input/URL/set-href-with-valid-hostname-but-invalid-port.html diff --git a/Tests/LibWeb/Text/expected/URL/set-href-with-valid-hostname-but-invalid-port.txt b/Tests/LibWeb/Text/expected/URL/set-href-with-valid-hostname-but-invalid-port.txt new file mode 100644 index 00000000000..892b6b988aa --- /dev/null +++ b/Tests/LibWeb/Text/expected/URL/set-href-with-valid-hostname-but-invalid-port.txt @@ -0,0 +1,2 @@ + http://example.net/path +http://example.com/path diff --git a/Tests/LibWeb/Text/input/URL/set-href-with-valid-hostname-but-invalid-port.html b/Tests/LibWeb/Text/input/URL/set-href-with-valid-hostname-but-invalid-port.html new file mode 100644 index 00000000000..e50b4f7d7c3 --- /dev/null +++ b/Tests/LibWeb/Text/input/URL/set-href-with-valid-hostname-but-invalid-port.html @@ -0,0 +1,10 @@ + + + diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index 3c2f3c4a3eb..f05e7861d27 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -817,7 +817,7 @@ String Parser::percent_encode_after_encoding(TextCodec::Encoder& encoder, String } // https://url.spec.whatwg.org/#concept-basic-url-parser -URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Optional url, Optional state_override, Optional encoding) +URL Parser::basic_parse(StringView raw_input, Optional const& base_url, URL* url, Optional state_override, Optional encoding) { dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsing '{}'", raw_input); @@ -825,9 +825,10 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt size_t end_index = raw_input.length(); // 1. If url is not given: - if (!url.has_value()) { + auto url_buffer = URL(); + if (!url) { // 1. Set url to a new URL. - url = URL(); + url = &url_buffer; // 2. If input contains any leading or trailing C0 control or space, invalid-URL-unit validation error. // 3. Remove any leading and trailing C0 control or space from input. @@ -1762,7 +1763,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsed URL to be '{}'.", url->serialize()); // 10. Return url. - return url.release_value(); + return *url; } } diff --git a/Userland/Libraries/LibURL/Parser.h b/Userland/Libraries/LibURL/Parser.h index c52f9e6d84a..b605993247a 100644 --- a/Userland/Libraries/LibURL/Parser.h +++ b/Userland/Libraries/LibURL/Parser.h @@ -58,7 +58,7 @@ public: } // https://url.spec.whatwg.org/#concept-basic-url-parser - static URL basic_parse(StringView input, Optional const& base_url = {}, Optional url = {}, Optional state_override = {}, Optional encoding = {}); + static URL basic_parse(StringView input, Optional const& base_url = {}, URL* url = nullptr, Optional state_override = {}, Optional encoding = {}); // https://url.spec.whatwg.org/#string-percent-encode-after-encoding static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false); diff --git a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp index 0d1fddeb9bb..0e494a94f88 100644 --- a/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Userland/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -237,9 +237,7 @@ WebIDL::ExceptionOr DOMURL::set_protocol(String const& protocol) // The protocol setter steps are to basic URL parse the given value, followed by U+003A (:), with this’s URL as // url and scheme start state as state override. - auto result_url = URL::Parser::basic_parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, m_url, URL::Parser::State::SchemeStart); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(TRY_OR_THROW_OOM(vm, String::formatted("{}:", protocol)), {}, &m_url, URL::Parser::State::SchemeStart); return {}; } @@ -307,9 +305,7 @@ void DOMURL::set_host(String const& host) return; // 2. Basic URL parse the given value with this’s URL as url and host state as state override. - auto result_url = URL::Parser::basic_parse(host, {}, m_url, URL::Parser::State::Host); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(host, {}, &m_url, URL::Parser::State::Host); } // https://url.spec.whatwg.org/#dom-url-hostname @@ -333,9 +329,7 @@ void DOMURL::set_hostname(String const& hostname) return; // 2. Basic URL parse the given value with this’s URL as url and hostname state as state override. - auto result_url = URL::Parser::basic_parse(hostname, {}, m_url, URL::Parser::State::Hostname); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(hostname, {}, &m_url, URL::Parser::State::Hostname); } // https://url.spec.whatwg.org/#dom-url-port @@ -364,9 +358,7 @@ void DOMURL::set_port(String const& port) } // 3. Otherwise, basic URL parse the given value with this’s URL as url and port state as state override. else { - auto result_url = URL::Parser::basic_parse(port, {}, m_url, URL::Parser::State::Port); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(port, {}, &m_url, URL::Parser::State::Port); } } @@ -386,13 +378,10 @@ void DOMURL::set_pathname(String const& pathname) return; // 2. Empty this’s URL’s path. - auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the path change if the parse failed. - url.set_paths({}); + m_url.set_paths({}); // 3. Basic URL parse the given value with this’s URL as url and path start state as state override. - auto result_url = URL::Parser::basic_parse(pathname, {}, move(url), URL::Parser::State::PathStart); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(pathname, {}, &m_url, URL::Parser::State::PathStart); } // https://url.spec.whatwg.org/#dom-url-search @@ -434,17 +423,13 @@ void DOMURL::set_search(String const& search) auto input = search_as_string_view.substring_view(search_as_string_view.starts_with('?')); // 4. Set url’s query to the empty string. - auto url_copy = url; // We copy the URL here to follow other browser's behavior of reverting the search change if the parse failed. - url_copy.set_query(String {}); + url.set_query(String {}); // 5. Basic URL parse input with url as url and query state as state override. - auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Query); - if (result_url.is_valid()) { - m_url = move(result_url); + (void)URL::Parser::basic_parse(input, {}, &url, URL::Parser::State::Query); - // 6. Set this’s query object’s list to the result of parsing input. - m_query->m_list = url_decode(input); - } + // 6. Set this’s query object’s list to the result of parsing input. + m_query->m_list = url_decode(input); } // https://url.spec.whatwg.org/#dom-url-searchparams @@ -487,13 +472,10 @@ void DOMURL::set_hash(String const& hash) auto input = hash_as_string_view.substring_view(hash_as_string_view.starts_with('#')); // 3. Set this’s URL’s fragment to the empty string. - auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the hash change if the parse failed. - url.set_fragment(String {}); + m_url.set_fragment(String {}); // 4. Basic URL parse input with this’s URL as url and fragment state as state override. - auto result_url = URL::Parser::basic_parse(input, {}, move(url), URL::Parser::State::Fragment); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment); } // https://url.spec.whatwg.org/#concept-url-origin diff --git a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp index ad4ac0890cb..badb1ae304c 100644 --- a/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp +++ b/Userland/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp @@ -78,9 +78,7 @@ void HTMLHyperlinkElementUtils::set_protocol(StringView protocol) return; // 3. Basic URL parse the given value, followed by ":", with this element's url as url and scheme start state as state override. - auto result_url = URL::Parser::basic_parse(MUST(String::formatted("{}:", protocol)), {}, m_url, URL::Parser::State::SchemeStart); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(MUST(String::formatted("{}:", protocol)), {}, &m_url.value(), URL::Parser::State::SchemeStart); // 4. Update href. update_href(); @@ -192,9 +190,7 @@ void HTMLHyperlinkElementUtils::set_host(StringView host) return; // 4. Basic URL parse the given value, with url as url and host state as state override. - auto result_url = URL::Parser::basic_parse(host, {}, url, URL::Parser::State::Host); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(host, {}, &url.value(), URL::Parser::State::Host); // 5. Update href. update_href(); @@ -228,9 +224,7 @@ void HTMLHyperlinkElementUtils::set_hostname(StringView hostname) return; // 4. Basic URL parse the given value, with url as url and hostname state as state override. - auto result_url = URL::Parser::basic_parse(hostname, {}, m_url, URL::Parser::State::Hostname); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(hostname, {}, &url.value(), URL::Parser::State::Hostname); // 5. Update href. update_href(); @@ -268,11 +262,10 @@ void HTMLHyperlinkElementUtils::set_port(StringView port) // 4. If the given value is the empty string, then set url's port to null. if (port.is_empty()) { m_url->set_port({}); - } else { - // 5. Otherwise, basic URL parse the given value, with url as url and port state as state override. - auto result_url = URL::Parser::basic_parse(port, {}, m_url, URL::Parser::State::Port); - if (result_url.is_valid()) - m_url = move(result_url); + } + // 5. Otherwise, basic URL parse the given value, with url as url and port state as state override. + else { + (void)URL::Parser::basic_parse(port, {}, &m_url.value(), URL::Parser::State::Port); } // 6. Update href. @@ -308,13 +301,10 @@ void HTMLHyperlinkElementUtils::set_pathname(StringView pathname) return; // 4. Set url's path to the empty list. - auto url = m_url; // We copy the URL here to follow other browser's behavior of reverting the path change if the parse failed. - url->set_paths({}); + m_url->set_paths({}); // 5. Basic URL parse the given value, with url as url and path start state as state override. - auto result_url = URL::Parser::basic_parse(pathname, {}, move(url), URL::Parser::State::PathStart); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(pathname, {}, &m_url.value(), URL::Parser::State::PathStart); // 6. Update href. update_href(); @@ -355,13 +345,10 @@ void HTMLHyperlinkElementUtils::set_search(StringView search) auto input = search.substring_view(search.starts_with('?')); // 2. Set url's query to the empty string. - auto url_copy = m_url; // We copy the URL here to follow other browser's behavior of reverting the search change if the parse failed. - url_copy->set_query(String {}); + m_url->set_query(String {}); // 3. Basic URL parse input, with null, this element's node document's document's character encoding, url as url, and query state as state override. - auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Query); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(input, {}, &m_url.value(), URL::Parser::State::Query); } // 6. Update href. @@ -403,13 +390,10 @@ void HTMLHyperlinkElementUtils::set_hash(StringView hash) auto input = hash.substring_view(hash.starts_with('#')); // 2. Set url's fragment to the empty string. - auto url_copy = m_url; // We copy the URL here to follow other browser's behavior of reverting the hash change if the parse failed. - url_copy->set_fragment(String {}); + m_url->set_fragment(String {}); // 3. Basic URL parse input, with url as url and fragment state as state override. - auto result_url = URL::Parser::basic_parse(input, {}, move(url_copy), URL::Parser::State::Fragment); - if (result_url.is_valid()) - m_url = move(result_url); + (void)URL::Parser::basic_parse(input, {}, &m_url.value(), URL::Parser::State::Fragment); } // 6. Update href. diff --git a/Userland/Libraries/LibWeb/HTML/Location.cpp b/Userland/Libraries/LibWeb/HTML/Location.cpp index 0b3f399ef0c..df74d3e5f87 100644 --- a/Userland/Libraries/LibWeb/HTML/Location.cpp +++ b/Userland/Libraries/LibWeb/HTML/Location.cpp @@ -333,7 +333,7 @@ WebIDL::ExceptionOr Location::set_hash(String const& value) copy_url.set_fragment(String {}); // 6. Basic URL parse input, with copyURL as url and fragment state as state override. - copy_url = URL::Parser::basic_parse(input, {}, copy_url, URL::Parser::State::Fragment); + (void)URL::Parser::basic_parse(input, {}, ©_url, URL::Parser::State::Fragment); // 7. If copyURL's fragment is this's url's fragment, then return. if (copy_url.fragment() == this->url().fragment())