AK: Add base64url encoding and decoding methods

This encoding scheme comes from section 5 of RFC 4648, as an
alternative to the standard base64 encode/decode methods.

The only difference is that the last two characters are replaced
with '-' and '_', as '+' and '/' are not safe in URLs or filenames.
This commit is contained in:
Andrew Kaster 2024-03-20 06:21:59 -06:00 committed by Tim Flynn
parent 6e2685f091
commit e9b16970fe
Notes: sideshowbarker 2024-07-17 09:49:48 +09:00
3 changed files with 116 additions and 8 deletions

View file

@ -24,10 +24,9 @@ size_t calculate_base64_encoded_length(ReadonlyBytes input)
return ((4 * input.size() / 3) + 3) & ~3;
}
ErrorOr<ByteBuffer> decode_base64(StringView input)
template<auto alphabet_lookup_table>
ErrorOr<ByteBuffer> decode_base64_impl(StringView input)
{
auto alphabet_lookup_table = base64_lookup_table();
auto get = [&](size_t& offset, bool* is_padding, bool& parsed_something) -> ErrorOr<u8> {
while (offset < input.length() && is_ascii_space(input[offset]))
++offset;
@ -80,7 +79,8 @@ ErrorOr<ByteBuffer> decode_base64(StringView input)
return ByteBuffer::copy(output);
}
ErrorOr<String> encode_base64(ReadonlyBytes input)
template<auto alphabet>
ErrorOr<String> encode_base64_impl(ReadonlyBytes input)
{
StringBuilder output(calculate_base64_encoded_length(input));
@ -106,10 +106,10 @@ ErrorOr<String> encode_base64(ReadonlyBytes input)
const u8 index2 = ((in1 << 2) | (in2 >> 6)) & 0x3f;
const u8 index3 = in2 & 0x3f;
char const out0 = base64_alphabet[index0];
char const out1 = base64_alphabet[index1];
char const out2 = is_16bit ? '=' : base64_alphabet[index2];
char const out3 = is_8bit ? '=' : base64_alphabet[index3];
char const out0 = alphabet[index0];
char const out1 = alphabet[index1];
char const out2 = is_16bit ? '=' : alphabet[index2];
char const out3 = is_8bit ? '=' : alphabet[index3];
TRY(output.try_append(out0));
TRY(output.try_append(out1));
@ -120,4 +120,23 @@ ErrorOr<String> encode_base64(ReadonlyBytes input)
return output.to_string();
}
ErrorOr<ByteBuffer> decode_base64(StringView input)
{
return decode_base64_impl<base64_lookup_table()>(input);
}
ErrorOr<ByteBuffer> decode_base64url(StringView input)
{
return decode_base64_impl<base64url_lookup_table()>(input);
}
ErrorOr<String> encode_base64(ReadonlyBytes input)
{
return encode_base64_impl<base64_alphabet>(input);
}
ErrorOr<String> encode_base64url(ReadonlyBytes input)
{
return encode_base64_impl<base64url_alphabet>(input);
}
}

View file

@ -14,6 +14,7 @@
namespace AK {
// https://datatracker.ietf.org/doc/html/rfc4648#section-4
constexpr Array base64_alphabet = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
@ -25,6 +26,18 @@ constexpr Array base64_alphabet = {
'4', '5', '6', '7', '8', '9', '+', '/'
};
// https://datatracker.ietf.org/doc/html/rfc4648#section-5
constexpr Array base64url_alphabet = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
'w', 'x', 'y', 'z', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', '-', '_'
};
consteval auto base64_lookup_table()
{
Array<i16, 256> table;
@ -35,16 +48,30 @@ consteval auto base64_lookup_table()
return table;
}
consteval auto base64url_lookup_table()
{
Array<i16, 256> table;
table.fill(-1);
for (size_t i = 0; i < base64url_alphabet.size(); ++i) {
table[base64url_alphabet[i]] = static_cast<i16>(i);
}
return table;
}
[[nodiscard]] size_t calculate_base64_decoded_length(StringView);
[[nodiscard]] size_t calculate_base64_encoded_length(ReadonlyBytes);
[[nodiscard]] ErrorOr<ByteBuffer> decode_base64(StringView);
[[nodiscard]] ErrorOr<ByteBuffer> decode_base64url(StringView);
[[nodiscard]] ErrorOr<String> encode_base64(ReadonlyBytes);
[[nodiscard]] ErrorOr<String> encode_base64url(ReadonlyBytes);
}
#if USING_AK_GLOBALLY
using AK::decode_base64;
using AK::decode_base64url;
using AK::encode_base64;
using AK::encode_base64url;
#endif

View file

@ -27,6 +27,8 @@ TEST_CASE(test_decode)
decode_equal("Zm9vYmFy"sv, "foobar"sv);
decode_equal("Z m\r9\n v\v Ym\tFy"sv, "foobar"sv);
EXPECT_EQ(decode_base64(" ZD Qg\r\nPS An Zm91cic\r\n 7"sv).value(), decode_base64("ZDQgPSAnZm91cic7"sv).value());
decode_equal("aGVsbG8/d29ybGQ="sv, "hello?world"sv);
}
TEST_CASE(test_decode_invalid)
@ -35,6 +37,23 @@ TEST_CASE(test_decode_invalid)
EXPECT(decode_base64(("asdf\x80qwe"sv)).is_error());
EXPECT(decode_base64(("asdf:qwe"sv)).is_error());
EXPECT(decode_base64(("asdf=qwe"sv)).is_error());
EXPECT(decode_base64("aGVsbG8_d29ybGQ="sv).is_error());
EXPECT(decode_base64url("aGVsbG8/d29ybGQ="sv).is_error());
}
TEST_CASE(test_decode_only_padding)
{
// Only padding is not allowed
EXPECT(decode_base64("="sv).is_error());
EXPECT(decode_base64("=="sv).is_error());
EXPECT(decode_base64("==="sv).is_error());
EXPECT(decode_base64("===="sv).is_error());
EXPECT(decode_base64url("="sv).is_error());
EXPECT(decode_base64url("=="sv).is_error());
EXPECT(decode_base64url("==="sv).is_error());
EXPECT(decode_base64url("===="sv).is_error());
}
TEST_CASE(test_encode)
@ -53,3 +72,46 @@ TEST_CASE(test_encode)
encode_equal("fooba"sv, "Zm9vYmE="sv);
encode_equal("foobar"sv, "Zm9vYmFy"sv);
}
TEST_CASE(test_urldecode)
{
auto decode_equal = [&](StringView input, StringView expected) {
auto decoded = TRY_OR_FAIL(decode_base64url(input));
EXPECT(ByteString::copy(decoded) == expected);
EXPECT(expected.length() <= calculate_base64_decoded_length(input.bytes()));
};
decode_equal(""sv, ""sv);
decode_equal("Zg=="sv, "f"sv);
decode_equal("Zm8="sv, "fo"sv);
decode_equal("Zm9v"sv, "foo"sv);
decode_equal("Zm9vYg=="sv, "foob"sv);
decode_equal("Zm9vYmE="sv, "fooba"sv);
decode_equal("Zm9vYmFy"sv, "foobar"sv);
decode_equal("Z m\r9\n v\v Ym\tFy"sv, "foobar"sv);
decode_equal("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEu"sv, "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."sv);
decode_equal("aGVsbG8_d29ybGQ="sv, "hello?world"sv);
}
TEST_CASE(test_urlencode)
{
auto encode_equal = [&](StringView input, StringView expected) {
auto encoded = MUST(encode_base64url(input.bytes()));
EXPECT(encoded == expected);
EXPECT_EQ(expected.length(), calculate_base64_encoded_length(input.bytes()));
};
encode_equal(""sv, ""sv);
encode_equal("f"sv, "Zg=="sv);
encode_equal("fo"sv, "Zm8="sv);
encode_equal("foo"sv, "Zm9v"sv);
encode_equal("foob"sv, "Zm9vYg=="sv);
encode_equal("fooba"sv, "Zm9vYmE="sv);
encode_equal("foobar"sv, "Zm9vYmFy"sv);
encode_equal("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."sv, "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEu"sv);
encode_equal("hello?world"sv, "aGVsbG8_d29ybGQ="sv);
encode_equal("hello!!world"sv, "aGVsbG8hIXdvcmxk"sv);
}