From 0ca5675d59dbcb52cedea56729de26b41074024a Mon Sep 17 00:00:00 2001
From: BenJilks <benjyjilks@gmail.com>
Date: Tue, 6 Aug 2024 11:06:05 +0100
Subject: [PATCH] LibTextCodec: Implement `iso-2022-jp` encoder

Implements the `iso-2022-jp` encoder, as specified by
https://encoding.spec.whatwg.org/#iso-2022-jp-encoder
---
 .../LibTextCodec/GenerateEncodingIndexes.cpp  |   1 +
 Tests/LibTextCodec/TestTextEncoders.cpp       |  73 +++++---
 Userland/Libraries/LibTextCodec/Encoder.cpp   | 157 ++++++++++++++++--
 Userland/Libraries/LibTextCodec/Encoder.h     |  28 +++-
 Userland/Libraries/LibURL/Parser.cpp          |  57 ++++---
 5 files changed, 255 insertions(+), 61 deletions(-)
diff --git a/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp b/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp
index 53918f50d54..cb8bbfea3ee 100644
--- a/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp
+++ b/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp
@@ -272,6 +272,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
             { "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) },
             { "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) },
             { "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) },
+            { "iso_2022_jp_katakana"sv, prepare_table(data.get("iso-2022-jp-katakana"sv)->as_array(), GenerateAccessor::Yes) },
             { "iso_8859_2"sv, prepare_table(data.get("iso-8859-2"sv)->as_array()) },
             { "iso_8859_3"sv, prepare_table(data.get("iso-8859-3"sv)->as_array()) },
             { "iso_8859_4"sv, prepare_table(data.get("iso-8859-4"sv)->as_array()) },
diff --git a/Tests/LibTextCodec/TestTextEncoders.cpp b/Tests/LibTextCodec/TestTextEncoders.cpp
index d9898d0931e..8ed0759bb68 100644
--- a/Tests/LibTextCodec/TestTextEncoders.cpp
+++ b/Tests/LibTextCodec/TestTextEncoders.cpp
@@ -14,9 +14,10 @@ TEST_CASE(test_utf8_encode)
     auto test_string = "\U0001F600"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 4);
     EXPECT(processed_bytes[0] == 0xF0);
     EXPECT(processed_bytes[1] == 0x9F);
@@ -33,9 +34,10 @@ TEST_CASE(test_euc_jp_encoder)
     auto test_string = "\U000000A5\U00003088\U000030C4"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 5);
     EXPECT(processed_bytes[0] == 0x5C);
     EXPECT(processed_bytes[1] == 0xA4);
@@ -44,6 +46,36 @@ TEST_CASE(test_euc_jp_encoder)
     EXPECT(processed_bytes[4] == 0xC4);
 }
 
+TEST_CASE(test_iso_2022_jp_encoder)
+{
+    TextCodec::ISO2022JPEncoder encoder;
+    // U+A5 Yen Sign
+    // U+3088 Hiragana Letter Yo
+    // U+30C4 Katakana Letter Tu
+    auto test_string = "\U000000A5\U00003088\U000030C4"sv;
+
+    Vector<u8> processed_bytes;
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
+    EXPECT(processed_bytes.size() == 14);
+    EXPECT(processed_bytes[0] == 0x1B);
+    EXPECT(processed_bytes[1] == 0x28);
+    EXPECT(processed_bytes[2] == 0x4A);
+    EXPECT(processed_bytes[3] == 0x5C);
+    EXPECT(processed_bytes[4] == 0x1B);
+    EXPECT(processed_bytes[5] == 0x24);
+    EXPECT(processed_bytes[6] == 0x42);
+    EXPECT(processed_bytes[7] == 0x24);
+    EXPECT(processed_bytes[8] == 0x68);
+    EXPECT(processed_bytes[9] == 0x25);
+    EXPECT(processed_bytes[10] == 0x44);
+    EXPECT(processed_bytes[11] == 0x1B);
+    EXPECT(processed_bytes[12] == 0x28);
+    EXPECT(processed_bytes[13] == 0x42);
+}
+
 TEST_CASE(test_shift_jis_encoder)
 {
     TextCodec::ShiftJISEncoder encoder;
@@ -53,9 +85,10 @@ TEST_CASE(test_shift_jis_encoder)
     auto test_string = "\U000000A5\U00003088\U000030C4"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 5);
     EXPECT(processed_bytes[0] == 0x5C);
     EXPECT(processed_bytes[1] == 0x82);
@@ -72,9 +105,10 @@ TEST_CASE(test_euc_kr_encoder)
     auto test_string = "\U0000B29F\U00007C97"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 4);
     EXPECT(processed_bytes[0] == 0x88);
     EXPECT(processed_bytes[1] == 0x6B);
@@ -90,9 +124,10 @@ TEST_CASE(test_big5_encoder)
     auto test_string = "\U000000A7\U000070D7"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 4);
     EXPECT(processed_bytes[0] == 0xA1);
     EXPECT(processed_bytes[1] == 0xB1);
@@ -108,10 +143,10 @@ TEST_CASE(test_gb18030_encoder)
     auto test_string = "\U000020AC\U0000E4C5"sv;
 
     Vector<u8> processed_bytes;
-    MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
-        return processed_bytes.try_append(byte);
-    }));
-
+    MUST(encoder.process(
+        Utf8View(test_string),
+        [&](u8 byte) { return processed_bytes.try_append(byte); },
+        [&](u32) -> ErrorOr<void> { EXPECT(false); return {}; }));
     EXPECT(processed_bytes.size() == 4);
     EXPECT(processed_bytes[0] == 0xA2);
     EXPECT(processed_bytes[1] == 0xE3);
diff --git a/Userland/Libraries/LibTextCodec/Encoder.cpp b/Userland/Libraries/LibTextCodec/Encoder.cpp
index 3448defb43d..540e3e782bd 100644
--- a/Userland/Libraries/LibTextCodec/Encoder.cpp
+++ b/Userland/Libraries/LibTextCodec/Encoder.cpp
@@ -19,6 +19,7 @@ GB18030Encoder s_gb18030_encoder;
 GB18030Encoder s_gbk_encoder(GB18030Encoder::IsGBK::Yes);
 Big5Encoder s_big5_encoder;
 EUCJPEncoder s_euc_jp_encoder;
+ISO2022JPEncoder s_iso_2022_jp_encoder;
 ShiftJISEncoder s_shift_jis_encoder;
 EUCKREncoder s_euc_kr_encoder;
 }
@@ -31,6 +32,8 @@ Optional<Encoder&> encoder_for_exact_name(StringView encoding)
         return s_big5_encoder;
     if (encoding.equals_ignoring_ascii_case("euc-jp"sv))
         return s_euc_jp_encoder;
+    if (encoding.equals_ignoring_ascii_case("iso-2022-jp"sv))
+        return s_iso_2022_jp_encoder;
     if (encoding.equals_ignoring_ascii_case("shift_jis"sv))
         return s_shift_jis_encoder;
     if (encoding.equals_ignoring_ascii_case("euc-kr"sv))
@@ -50,7 +53,7 @@ Optional<Encoder&> encoder_for(StringView label)
 }
 
 // https://encoding.spec.whatwg.org/#utf-8-encoder
-ErrorOr<void> UTF8Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> UTF8Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)>)
 {
     ReadonlyBytes bytes { input.bytes(), input.byte_length() };
     for (auto byte : bytes)
@@ -59,7 +62,7 @@ ErrorOr<void> UTF8Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> o
 }
 
 // https://encoding.spec.whatwg.org/#euc-jp-encoder
-ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
 {
     for (auto item : input) {
         // 1. If code point is end-of-queue, return finished.
@@ -98,7 +101,7 @@ ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)>
 
         // 8. If pointer is null, return error with code point.
         if (!pointer.has_value()) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
@@ -116,6 +119,136 @@ ErrorOr<void> EUCJPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)>
     return {};
 }
 
+// https://encoding.spec.whatwg.org/#iso-2022-jp-encoder
+ErrorOr<ISO2022JPEncoder::State> ISO2022JPEncoder::process_item(u32 item, State state, Function<ErrorOr<void>(u8)>& on_byte, Function<ErrorOr<void>(u32)>& on_error)
+{
+    // 3. If ISO-2022-JP encoder state is ASCII or Roman, and code point is U+000E, U+000F, or U+001B, return error with U+FFFD.
+    if (state == State::ASCII || state == State::Roman) {
+        if (item == 0x000E || item == 0x000F || item == 0x001B) {
+            TRY(on_error(0xFFFD));
+            return state;
+        }
+    }
+
+    // 4. If ISO-2022-JP encoder state is ASCII and code point is an ASCII code point, return a byte whose value is code point.
+    if (state == State::ASCII && is_ascii(item)) {
+        TRY(on_byte(static_cast<u8>(item)));
+        return state;
+    }
+
+    // 5. If ISO-2022-JP encoder state is Roman and code point is an ASCII code point, excluding U+005C and U+007E, or is U+00A5 or U+203E, then:
+    if (state == State::Roman && ((is_ascii(item) && item != 0x005C && item != 0x007E) || (item == 0x00A5 || item == 0x203E))) {
+        // 1. If code point is an ASCII code point, return a byte whose value is code point.
+        if (is_ascii(item)) {
+            TRY(on_byte(static_cast<u8>(item)));
+            return state;
+        }
+
+        // 2. If code point is U+00A5, return byte 0x5C.
+        if (item == 0x00A5) {
+            TRY(on_byte(0x5C));
+            return state;
+        }
+
+        // 3. If code point is U+203E, return byte 0x7E.
+        if (item == 0x203E) {
+            TRY(on_byte(0x7E));
+            return state;
+        }
+    }
+
+    // 6. If code point is an ASCII code point, and ISO-2022-JP encoder state is not ASCII, restore code point to ioQueue, set
+    //    ISO-2022-JP encoder state to ASCII, and return three bytes 0x1B 0x28 0x42.
+    if (is_ascii(item) && state != State::ASCII) {
+        TRY(on_byte(0x1B));
+        TRY(on_byte(0x28));
+        TRY(on_byte(0x42));
+        return process_item(item, State::ASCII, on_byte, on_error);
+    }
+
+    // 7. If code point is either U+00A5 or U+203E, and ISO-2022-JP encoder state is not Roman, restore code point to ioQueue,
+    //    set ISO-2022-JP encoder state to Roman, and return three bytes 0x1B 0x28 0x4A.
+    if ((item == 0x00A5 || item == 0x203E) && state != State::Roman) {
+        TRY(on_byte(0x1B));
+        TRY(on_byte(0x28));
+        TRY(on_byte(0x4A));
+        return process_item(item, State::Roman, on_byte, on_error);
+    }
+
+    // 8. If code point is U+2212, set it to U+FF0D.
+    if (item == 0x2212)
+        item = 0xFF0D;
+
+    // 9. If code point is in the range U+FF61 to U+FF9F, inclusive, set it to the index code point for code point − 0xFF61
+    //    in index ISO-2022-JP katakana.
+    if (item >= 0xFF61 && item <= 0xFF9F) {
+        item = *index_iso_2022_jp_katakana_code_point(item - 0xFF61);
+    }
+
+    // 10. Let pointer be the index pointer for code point in index jis0208.
+    auto pointer = code_point_jis0208_index(item);
+
+    // 11. If pointer is null, then:
+    if (!pointer.has_value()) {
+        // 1. If ISO-2022-JP encoder state is jis0208, then restore code point to ioQueue, set ISO-2022-JP encoder state to
+        //    ASCII, and return three bytes 0x1B 0x28 0x42.
+        if (state == State::jis0208) {
+            TRY(on_byte(0x1B));
+            TRY(on_byte(0x28));
+            TRY(on_byte(0x4A));
+            return process_item(item, State::ASCII, on_byte, on_error);
+        }
+
+        // 2. Return error with code point.
+        TRY(on_error(item));
+        return state;
+    }
+
+    // 12. If ISO-2022-JP encoder state is not jis0208, restore code point to ioQueue, set ISO-2022-JP encoder state to
+    //     jis0208, and return three bytes 0x1B 0x24 0x42.
+    if (state != State::jis0208) {
+        TRY(on_byte(0x1B));
+        TRY(on_byte(0x24));
+        TRY(on_byte(0x42));
+        return process_item(item, State::jis0208, on_byte, on_error);
+    }
+
+    // 13. Let lead be pointer / 94 + 0x21.
+    auto lead = *pointer / 94 + 0x21;
+
+    // 14. Let trail be pointer % 94 + 0x21.
+    auto trail = *pointer % 94 + 0x21;
+
+    // 15. Return two bytes whose values are lead and trail.
+    TRY(on_byte(static_cast<u8>(lead)));
+    TRY(on_byte(static_cast<u8>(trail)));
+    return state;
+}
+
+// https://encoding.spec.whatwg.org/#iso-2022-jp-encoder
+ErrorOr<void> ISO2022JPEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
+{
+    // ISO-2022-JP’s encoder has an associated ISO-2022-JP encoder state which is ASCII, Roman, or jis0208 (initially ASCII).
+    auto state = State::ASCII;
+
+    for (u32 item : input) {
+        state = TRY(process_item(item, state, on_byte, on_error));
+    }
+
+    // 1. If code point is end-of-queue and ISO-2022-JP encoder state is not ASCII, set ISO-2022-JP
+    //    encoder state to ASCII, and return three bytes 0x1B 0x28 0x42.
+    if (state != State::ASCII) {
+        state = State::ASCII;
+        TRY(on_byte(0x1B));
+        TRY(on_byte(0x28));
+        TRY(on_byte(0x42));
+        return {};
+    }
+
+    // 2. If code point is end-of-queue and ISO-2022-JP encoder state is ASCII, return finished.
+    return {};
+}
+
 static Optional<u32> code_point_jis0208_index_skipping_range(u32 code_point, u32 skip_from, u32 skip_to)
 {
     VERIFY(skip_to >= skip_from);
@@ -141,7 +274,7 @@ static Optional<u32> index_shift_jis_pointer(u32 code_point)
 }
 
 // https://encoding.spec.whatwg.org/#shift_jis-encoder
-ErrorOr<void> ShiftJISEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> ShiftJISEncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
 {
     for (u32 item : input) {
         // 1. If code point is end-of-queue, return finished.
@@ -179,7 +312,7 @@ ErrorOr<void> ShiftJISEncoder::process(Utf8View input, Function<ErrorOr<void>(u8
 
         // 8. If pointer is null, return error with code point.
         if (!pointer.has_value()) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
@@ -208,7 +341,7 @@ ErrorOr<void> ShiftJISEncoder::process(Utf8View input, Function<ErrorOr<void>(u8
 }
 
 // https://encoding.spec.whatwg.org/#euc-kr-encoder
-ErrorOr<void> EUCKREncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> EUCKREncoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
 {
     for (u32 item : input) {
         // 1. If code point is end-of-queue, return finished.
@@ -224,7 +357,7 @@ ErrorOr<void> EUCKREncoder::process(Utf8View input, Function<ErrorOr<void>(u8)>
 
         // 4. If pointer is null, return error with code point.
         if (!pointer.has_value()) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
@@ -269,7 +402,7 @@ static Optional<u32> index_big5_pointer(u32 code_point)
 }
 
 // https://encoding.spec.whatwg.org/#big5-encoder
-ErrorOr<void> Big5Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> Big5Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
 {
     for (u32 item : input) {
         // 1. If code point is end-of-queue, return finished.
@@ -285,7 +418,7 @@ ErrorOr<void> Big5Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> o
 
         // 4. If pointer is null, return error with code point.
         if (!pointer.has_value()) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
@@ -334,7 +467,7 @@ GB18030Encoder::GB18030Encoder(IsGBK is_gbk)
 }
 
 // https://encoding.spec.whatwg.org/#gb18030-encoder
-ErrorOr<void> GB18030Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte)
+ErrorOr<void> GB18030Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error)
 {
     bool gbk = (m_is_gbk == IsGBK::Yes);
 
@@ -349,7 +482,7 @@ ErrorOr<void> GB18030Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)
 
         // 3. If code point is U+E5E5, return error with code point.
         if (item == 0xE5E5) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
@@ -383,7 +516,7 @@ ErrorOr<void> GB18030Encoder::process(Utf8View input, Function<ErrorOr<void>(u8)
 
         // 7. If is GBK is true, return error with code point.
         if (gbk) {
-            // TODO: Report error.
+            TRY(on_error(item));
             continue;
         }
 
diff --git a/Userland/Libraries/LibTextCodec/Encoder.h b/Userland/Libraries/LibTextCodec/Encoder.h
index d21828dfa47..8241fb67153 100644
--- a/Userland/Libraries/LibTextCodec/Encoder.h
+++ b/Userland/Libraries/LibTextCodec/Encoder.h
@@ -13,7 +13,7 @@ namespace TextCodec {
 
 class Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) = 0;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) = 0;
 
 protected:
     virtual ~Encoder() = default;
@@ -21,27 +21,41 @@ protected:
 
 class UTF8Encoder final : public Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
 };
 
 class EUCJPEncoder final : public Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
+};
+
+class ISO2022JPEncoder final : public Encoder {
+public:
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
+
+private:
+    enum class State {
+        ASCII,
+        Roman,
+        jis0208,
+    };
+
+    ErrorOr<State> process_item(u32 item, State, Function<ErrorOr<void>(u8)>& on_byte, Function<ErrorOr<void>(u32)>& on_error);
 };
 
 class ShiftJISEncoder final : public Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
 };
 
 class EUCKREncoder final : public Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
 };
 
 class Big5Encoder final : public Encoder {
 public:
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
 };
 
 class GB18030Encoder final : public Encoder {
@@ -53,7 +67,7 @@ public:
 
     GB18030Encoder(IsGBK is_gbk = IsGBK::No);
 
-    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte) override;
+    virtual ErrorOr<void> process(Utf8View, Function<ErrorOr<void>(u8)> on_byte, Function<ErrorOr<void>(u32)> on_error) override;
 
 private:
     IsGBK m_is_gbk { IsGBK::No };
diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp
index f752410f958..af1c710acb1 100644
--- a/Userland/Libraries/LibURL/Parser.cpp
+++ b/Userland/Libraries/LibURL/Parser.cpp
@@ -775,31 +775,42 @@ ErrorOr<String> Parser::percent_encode_after_encoding(TextCodec::Encoder& encode
     // 1. Let encodeOutput be an empty I/O queue.
     StringBuilder output;
 
-    // 3. For each byte of encodeOutput converted to a byte sequence:
-    TRY(encoder.process(Utf8View(input), [&](u8 byte) -> ErrorOr<void> {
-        // 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
-        if (space_as_plus && byte == ' ') {
-            output.append('+');
+    // 2. Set potentialError to the result of running encode or fail with inputQueue, encoder, and encodeOutput.
+    TRY(encoder.process(
+        Utf8View(input),
+
+        // 3. For each byte of encodeOutput converted to a byte sequence:
+        [&](u8 byte) -> ErrorOr<void> {
+            // 1. If spaceAsPlus is true and byte is 0x20 (SP), then append U+002B (+) to output and continue.
+            if (space_as_plus && byte == ' ') {
+                output.append('+');
+                return {};
+            }
+
+            // 2. Let isomorph be a code point whose value is byte’s value.
+            u32 isomorph = byte;
+
+            // 3. Assert: percentEncodeSet includes all non-ASCII code points.
+
+            // 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
+            if (!code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
+                output.append_code_point(isomorph);
+            }
+
+            // 5. Otherwise, percent-encode byte and append the result to output.
+            else {
+                output.appendff("%{:02X}", byte);
+            }
+
             return {};
-        }
+        },
 
-        // 2. Let isomorph be a code point whose value is byte’s value.
-        u32 isomorph = byte;
-
-        // 3. Assert: percentEncodeSet includes all non-ASCII code points.
-
-        // 4. If isomorphic is not in percentEncodeSet, then append isomorph to output.
-        if (!code_point_is_in_percent_encode_set(isomorph, percent_encode_set)) {
-            output.append_code_point(isomorph);
-        }
-
-        // 5. Otherwise, percent-encode byte and append the result to output.
-        else {
-            output.appendff("%{:02X}", byte);
-        }
-
-        return {};
-    }));
+        // 4. If potentialError is non-null, then append "%26%23", followed by the shortest sequence of ASCII digits
+        //    representing potentialError in base ten, followed by "%3B", to output.
+        [&](u32 error) -> ErrorOr<void> {
+            output.appendff("%26%23{}%3B", error);
+            return {};
+        }));
 
     // 6. Return output.
     return output.to_string();