diff --git a/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt b/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt new file mode 100644 index 00000000000..095b06b0e73 --- /dev/null +++ b/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt @@ -0,0 +1,34 @@ +Viewport <#document> at (0,0) content-size 800x600 children: not-inline + BlockContainer at (0,0) content-size 800x600 [BFC] children: not-inline + BlockContainer at (8,16) content-size 784x83 children: not-inline + BlockContainer

at (8,16) content-size 784x17 children: inline + frag 0 from TextNode start: 0, length: 11, rect: [8,16 20.3125x17] baseline: 13.296875 + "🧑‍🚒" + TextNode <#text> + BlockContainer <(anonymous)> at (8,49) content-size 784x0 children: inline + TextNode <#text> + BlockContainer

at (8,49) content-size 784x17 children: inline + frag 0 from TextNode start: 0, length: 13, rect: [8,49 20.3125x17] baseline: 13.296875 + "🏴‍☠️" + TextNode <#text> + BlockContainer <(anonymous)> at (8,82) content-size 784x0 children: inline + TextNode <#text> + BlockContainer

at (8,82) content-size 784x17 children: inline + frag 0 from TextNode start: 0, length: 25, rect: [8,82 20.3125x17] baseline: 13.296875 + "🧑‍🧑‍🧒‍🧒" + TextNode <#text> + BlockContainer <(anonymous)> at (8,115) content-size 784x0 children: inline + TextNode <#text> + +ViewportPaintable (Viewport<#document>) [0,0 800x600] + PaintableWithLines (BlockContainer) [0,0 800x600] + PaintableWithLines (BlockContainer) [8,16 784x83] overflow: [8,16 784x99] + PaintableWithLines (BlockContainer

) [8,16 784x17] + TextPaintable (TextNode<#text>) + PaintableWithLines (BlockContainer(anonymous)) [8,49 784x0] + PaintableWithLines (BlockContainer

) [8,49 784x17] + TextPaintable (TextNode<#text>) + PaintableWithLines (BlockContainer(anonymous)) [8,82 784x0] + PaintableWithLines (BlockContainer

) [8,82 784x17] + TextPaintable (TextNode<#text>) + PaintableWithLines (BlockContainer(anonymous)) [8,115 784x0] diff --git a/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html b/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html new file mode 100644 index 00000000000..52edb1961ee --- /dev/null +++ b/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html @@ -0,0 +1,3 @@ +

🧑‍🚒

+

🏴‍☠️

+

🧑‍🧑‍🧒‍🧒

diff --git a/Tests/LibWeb/Screenshot/images/text-direction-ref.png b/Tests/LibWeb/Screenshot/images/text-direction-ref.png index a6cb860b88a..6c2db188a7b 100644 Binary files a/Tests/LibWeb/Screenshot/images/text-direction-ref.png and b/Tests/LibWeb/Screenshot/images/text-direction-ref.png differ diff --git a/Userland/Libraries/LibWeb/Layout/TextNode.cpp b/Userland/Libraries/LibWeb/Layout/TextNode.cpp index 39ba6c9a434..d0998f2e74e 100644 --- a/Userland/Libraries/LibWeb/Layout/TextNode.cpp +++ b/Userland/Libraries/LibWeb/Layout/TextNode.cpp @@ -391,13 +391,14 @@ void TextNode::compute_text_for_rendering() m_text_for_rendering = MUST(builder.to_string()); } -TextNode::ChunkIterator::ChunkIterator(StringView text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const& font_cascade_list) +TextNode::ChunkIterator::ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const& font_cascade_list) : m_wrap_lines(wrap_lines) , m_respect_linebreaks(respect_linebreaks) , m_utf8_view(text) - , m_iterator(m_utf8_view.begin()) , m_font_cascade_list(font_cascade_list) + , m_segmenter(Unicode::Segmenter::create(Unicode::SegmenterGranularity::Grapheme)) { + m_segmenter->set_segmented_text(text); } static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point) @@ -462,75 +463,85 @@ Optional TextNode::ChunkIterator::peek(size_t count) Optional TextNode::ChunkIterator::next_without_peek() { - if (m_iterator == m_utf8_view.end()) + if (m_current_index >= m_utf8_view.byte_length()) return {}; - auto start_of_chunk = m_iterator; + auto current_code_point = [this]() { + return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index); + }; + auto next_grapheme_boundary = [this]() { + return m_segmenter->next_boundary(m_current_index).value_or(m_utf8_view.byte_length()); + }; - Gfx::Font const& font = m_font_cascade_list.font_for_code_point(*m_iterator); - auto text_type = text_type_for_code_point(*m_iterator); - while (m_iterator != m_utf8_view.end()) { - if (&font != &m_font_cascade_list.font_for_code_point(*m_iterator)) { - if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value()) + auto code_point = current_code_point(); + auto start_of_chunk = m_current_index; + + Gfx::Font const& font = m_font_cascade_list.font_for_code_point(code_point); + auto text_type = text_type_for_code_point(code_point); + + while (m_current_index < m_utf8_view.byte_length()) { + code_point = current_code_point(); + + if (&font != &m_font_cascade_list.font_for_code_point(code_point)) { + if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) return result.release_value(); } - if (m_respect_linebreaks && *m_iterator == '\n') { + if (m_respect_linebreaks && code_point == '\n') { // Newline encountered, and we're supposed to preserve them. // If we have accumulated some code points in the current chunk, commit them now and continue with the newline next time. - if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value()) + if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) return result.release_value(); // Otherwise, commit the newline! - ++m_iterator; - auto result = try_commit_chunk(start_of_chunk, m_iterator, true, font, text_type); + m_current_index = next_grapheme_boundary(); + auto result = try_commit_chunk(start_of_chunk, m_current_index, true, font, text_type); VERIFY(result.has_value()); return result.release_value(); } if (m_wrap_lines) { - if (text_type != text_type_for_code_point(*m_iterator)) { - if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value()) + if (text_type != text_type_for_code_point(code_point)) { + if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) { return result.release_value(); + } } - if (is_ascii_space(*m_iterator)) { + if (is_ascii_space(code_point)) { // Whitespace encountered, and we're allowed to break on whitespace. // If we have accumulated some code points in the current chunk, commit them now and continue with the whitespace next time. - if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value()) + if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) { return result.release_value(); + } // Otherwise, commit the whitespace! - ++m_iterator; - if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value()) + m_current_index = next_grapheme_boundary(); + if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) return result.release_value(); continue; } } - ++m_iterator; + m_current_index = next_grapheme_boundary(); } - if (start_of_chunk != m_utf8_view.end()) { + if (start_of_chunk != m_utf8_view.byte_length()) { // Try to output whatever's left at the end of the text node. - if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, font, text_type); result.has_value()) + if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.byte_length(), false, font, text_type); result.has_value()) return result.release_value(); } return {}; } -Optional TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const +Optional TextNode::ChunkIterator::try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const { - auto byte_offset = m_utf8_view.byte_offset_of(start); - auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset; - - if (byte_length > 0) { - auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length); + if (auto byte_length = end - start; byte_length > 0) { + auto chunk_view = m_utf8_view.substring_view(start, byte_length); return Chunk { .view = chunk_view, .font = font, - .start = byte_offset, + .start = start, .length = byte_length, .has_breaking_newline = has_breaking_newline, .is_all_whitespace = is_all_whitespace(chunk_view.as_string()), diff --git a/Userland/Libraries/LibWeb/Layout/TextNode.h b/Userland/Libraries/LibWeb/Layout/TextNode.h index c6f3d691cfb..40a9cd62bfa 100644 --- a/Userland/Libraries/LibWeb/Layout/TextNode.h +++ b/Userland/Libraries/LibWeb/Layout/TextNode.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include @@ -38,20 +39,23 @@ public: class ChunkIterator { public: - ChunkIterator(StringView text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const&); + ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const&); + Optional next(); Optional peek(size_t); private: Optional next_without_peek(); - Optional try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, Gfx::Font const&, Gfx::GlyphRun::TextType) const; + Optional try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, Gfx::Font const&, Gfx::GlyphRun::TextType) const; bool const m_wrap_lines; bool const m_respect_linebreaks; Utf8View m_utf8_view; - Utf8View::Iterator m_iterator; Gfx::FontCascadeList const& m_font_cascade_list; + NonnullOwnPtr m_segmenter; + size_t m_current_index { 0 }; + Vector m_peek_queue; };