AK: Properly implement steps for shortening a URLs path

Instead of implementing this inline, put it into a function. Use this
new function to correctly implement shortening paths for some places
where this logic was previously missing.

Before these changes, the pathname for the included test was incorrectly
being set to '/' as we were not considering the windows drive letter.
This commit is contained in:
Shannon Booth 2023-09-17 13:15:52 +12:00 committed by Andreas Kling
parent bfdf7779ce
commit 453dd0cf44
Notes: sideshowbarker 2024-07-17 00:25:35 +09:00
4 changed files with 36 additions and 6 deletions

View file

@ -683,6 +683,24 @@ constexpr bool is_double_dot_path_segment(StringView input)
return input == ".."sv || input.equals_ignoring_ascii_case(".%2e"sv) || input.equals_ignoring_ascii_case("%2e."sv) || input.equals_ignoring_ascii_case("%2e%2e"sv);
}
// https://url.spec.whatwg.org/#shorten-a-urls-path
void URLParser::shorten_urls_path(URL& url)
{
// 1. Assert: url does not have an opaque path.
VERIFY(!url.cannot_be_a_base_url());
// 2. Let path be urls path.
auto& path = url.m_paths;
// 3. If urls scheme is "file", paths size is 1, and path[0] is a normalized Windows drive letter, then return.
if (url.scheme() == "file" && path.size() == 1 && is_normalized_windows_drive_letter(path[0]))
return;
// 4. Remove paths last item, if any.
if (!path.is_empty())
path.take_last();
}
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
ErrorOr<String> URLParser::percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus)
{
@ -1025,8 +1043,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
url->m_query = {};
// 2. Shorten urls path.
if (url->m_paths.size())
url->m_paths.remove(url->m_paths.size() - 1);
shorten_urls_path(*url);
// 3. Set state to path state and decrease pointer by 1.
state = State::Path;
@ -1337,8 +1354,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
// 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten urls path.
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer)) {
if (!url->m_paths.is_empty() && !(url->scheme() == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
url->m_paths.remove(url->m_paths.size() - 1);
shorten_urls_path(*url);
}
// 3. Otherwise:
else {
@ -1505,8 +1521,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
// 2. If buffer is a double-dot URL path segment, then:
if (is_double_dot_path_segment(buffer.string_view())) {
// 1. Shorten urls path.
if (!url->m_paths.is_empty())
url->m_paths.remove(url->m_paths.size() - 1);
shorten_urls_path(*url);
// 2. If neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to urls path.
if (code_point != '/' && !(url->is_special() && code_point == '\\'))

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
* Copyright (c) 2023, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -63,6 +64,9 @@ public:
// https://url.spec.whatwg.org/#concept-host-serializer
static ErrorOr<String> serialize_host(URL::Host const&);
// https://url.spec.whatwg.org/#shorten-a-urls-path
static void shorten_urls_path(URL&);
};
#undef ENUMERATE_STATES

View file

@ -68,3 +68,13 @@ port => ''
pathname => '/hello'
search => ''
hash => ''
new URL('//d:/..', 'file:///C:/a/b')
protocol => 'file:'
username => ''
password => ''
host => ''
hostname => ''
port => ''
pathname => '/d:/'
search => ''
hash => ''

View file

@ -27,6 +27,7 @@
{ input: 'unknown://serenityos.org:0' },
{ input: 'http://serenityos.org/cat?dog#meow"woof' },
{ input: '/hello', base: 'file://friends/' },
{ input: '//d:/..', base: 'file:///C:/a/b' },
]) {
printURL(url.input, url.base);
}