ladybird/Userland/Libraries/LibURL/Parser.h
Shannon Booth ff71d8f2c9 LibURL+LibWeb: Pass a mutable reference URL to URL parser
If given, the spec expects the input URL to be manipulated on the fly
as it is being parsed, and may ignore any errors thrown by the URL
parser.

Previously, we were not exactly following the specs assumption here
which resulted in us needed to make awkward copies of the URL in these
situations.

For most cases this is not an issue. But it does cause problems for
situations where URL parsing would result in a failure (which is
ignored by the caller), and the URL is _partially_ updated
while parsing.

Such a situation can occur when setting the host of an href alongside a
port number which is not valid. It is expected that this situation will
result in the host being updates - but not the port number.

Adjust the URL parser API so that it mutates the URL given (if any), and
adjust the callers accordingly.

Fixes two tests on https://wpt.live/url/url-setters-a-area.window.html
2024-08-13 14:14:34 +02:00

76 lines
2.3 KiB
C++

/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <LibTextCodec/Encoder.h>
#include <LibURL/URL.h>
namespace URL {
#define ENUMERATE_STATES \
STATE(SchemeStart) \
STATE(Scheme) \
STATE(NoScheme) \
STATE(SpecialRelativeOrAuthority) \
STATE(PathOrAuthority) \
STATE(Relative) \
STATE(RelativeSlash) \
STATE(SpecialAuthoritySlashes) \
STATE(SpecialAuthorityIgnoreSlashes) \
STATE(Authority) \
STATE(Host) \
STATE(Hostname) \
STATE(Port) \
STATE(File) \
STATE(FileSlash) \
STATE(FileHost) \
STATE(PathStart) \
STATE(Path) \
STATE(CannotBeABaseUrlPath) \
STATE(Query) \
STATE(Fragment)
class Parser {
public:
enum class State {
#define STATE(state) state,
ENUMERATE_STATES
#undef STATE
};
static char const* state_name(State const& state)
{
switch (state) {
#define STATE(state) \
case State::state: \
return #state;
ENUMERATE_STATES
#undef STATE
}
VERIFY_NOT_REACHED();
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static URL basic_parse(StringView input, Optional<URL> const& base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
// https://url.spec.whatwg.org/#concept-host-serializer
static ErrorOr<String> serialize_host(Host const&);
// https://url.spec.whatwg.org/#shorten-a-urls-path
static void shorten_urls_path(URL&);
};
#undef ENUMERATE_STATES
}