diff --git a/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt b/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt
new file mode 100644
index 00000000000..095b06b0e73
--- /dev/null
+++ b/Tests/LibWeb/Layout/expected/multi-code-point-graphemes.txt
@@ -0,0 +1,34 @@
+Viewport <#document> at (0,0) content-size 800x600 children: not-inline
+ BlockContainer at (0,0) content-size 800x600 [BFC] children: not-inline
+ BlockContainer
at (8,16) content-size 784x83 children: not-inline
+ BlockContainer at (8,16) content-size 784x17 children: inline
+ frag 0 from TextNode start: 0, length: 11, rect: [8,16 20.3125x17] baseline: 13.296875
+ "🧑🚒"
+ TextNode <#text>
+ BlockContainer <(anonymous)> at (8,49) content-size 784x0 children: inline
+ TextNode <#text>
+ BlockContainer
at (8,49) content-size 784x17 children: inline
+ frag 0 from TextNode start: 0, length: 13, rect: [8,49 20.3125x17] baseline: 13.296875
+ "🏴☠️"
+ TextNode <#text>
+ BlockContainer <(anonymous)> at (8,82) content-size 784x0 children: inline
+ TextNode <#text>
+ BlockContainer
at (8,82) content-size 784x17 children: inline
+ frag 0 from TextNode start: 0, length: 25, rect: [8,82 20.3125x17] baseline: 13.296875
+ "🧑🧑🧒🧒"
+ TextNode <#text>
+ BlockContainer <(anonymous)> at (8,115) content-size 784x0 children: inline
+ TextNode <#text>
+
+ViewportPaintable (Viewport<#document>) [0,0 800x600]
+ PaintableWithLines (BlockContainer) [0,0 800x600]
+ PaintableWithLines (BlockContainer
) [8,16 784x83] overflow: [8,16 784x99]
+ PaintableWithLines (BlockContainer) [8,16 784x17]
+ TextPaintable (TextNode<#text>)
+ PaintableWithLines (BlockContainer(anonymous)) [8,49 784x0]
+ PaintableWithLines (BlockContainer
) [8,49 784x17]
+ TextPaintable (TextNode<#text>)
+ PaintableWithLines (BlockContainer(anonymous)) [8,82 784x0]
+ PaintableWithLines (BlockContainer
) [8,82 784x17]
+ TextPaintable (TextNode<#text>)
+ PaintableWithLines (BlockContainer(anonymous)) [8,115 784x0]
diff --git a/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html b/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html
new file mode 100644
index 00000000000..52edb1961ee
--- /dev/null
+++ b/Tests/LibWeb/Layout/input/multi-code-point-graphemes.html
@@ -0,0 +1,3 @@
+
🧑🚒
+🏴☠️
+🧑🧑🧒🧒
diff --git a/Tests/LibWeb/Screenshot/images/text-direction-ref.png b/Tests/LibWeb/Screenshot/images/text-direction-ref.png
index a6cb860b88a..6c2db188a7b 100644
Binary files a/Tests/LibWeb/Screenshot/images/text-direction-ref.png and b/Tests/LibWeb/Screenshot/images/text-direction-ref.png differ
diff --git a/Userland/Libraries/LibWeb/Layout/TextNode.cpp b/Userland/Libraries/LibWeb/Layout/TextNode.cpp
index 39ba6c9a434..d0998f2e74e 100644
--- a/Userland/Libraries/LibWeb/Layout/TextNode.cpp
+++ b/Userland/Libraries/LibWeb/Layout/TextNode.cpp
@@ -391,13 +391,14 @@ void TextNode::compute_text_for_rendering()
m_text_for_rendering = MUST(builder.to_string());
}
-TextNode::ChunkIterator::ChunkIterator(StringView text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const& font_cascade_list)
+TextNode::ChunkIterator::ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const& font_cascade_list)
: m_wrap_lines(wrap_lines)
, m_respect_linebreaks(respect_linebreaks)
, m_utf8_view(text)
- , m_iterator(m_utf8_view.begin())
, m_font_cascade_list(font_cascade_list)
+ , m_segmenter(Unicode::Segmenter::create(Unicode::SegmenterGranularity::Grapheme))
{
+ m_segmenter->set_segmented_text(text);
}
static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point)
@@ -462,75 +463,85 @@ Optional TextNode::ChunkIterator::peek(size_t count)
Optional TextNode::ChunkIterator::next_without_peek()
{
- if (m_iterator == m_utf8_view.end())
+ if (m_current_index >= m_utf8_view.byte_length())
return {};
- auto start_of_chunk = m_iterator;
+ auto current_code_point = [this]() {
+ return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index);
+ };
+ auto next_grapheme_boundary = [this]() {
+ return m_segmenter->next_boundary(m_current_index).value_or(m_utf8_view.byte_length());
+ };
- Gfx::Font const& font = m_font_cascade_list.font_for_code_point(*m_iterator);
- auto text_type = text_type_for_code_point(*m_iterator);
- while (m_iterator != m_utf8_view.end()) {
- if (&font != &m_font_cascade_list.font_for_code_point(*m_iterator)) {
- if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value())
+ auto code_point = current_code_point();
+ auto start_of_chunk = m_current_index;
+
+ Gfx::Font const& font = m_font_cascade_list.font_for_code_point(code_point);
+ auto text_type = text_type_for_code_point(code_point);
+
+ while (m_current_index < m_utf8_view.byte_length()) {
+ code_point = current_code_point();
+
+ if (&font != &m_font_cascade_list.font_for_code_point(code_point)) {
+ if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value())
return result.release_value();
}
- if (m_respect_linebreaks && *m_iterator == '\n') {
+ if (m_respect_linebreaks && code_point == '\n') {
// Newline encountered, and we're supposed to preserve them.
// If we have accumulated some code points in the current chunk, commit them now and continue with the newline next time.
- if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value())
+ if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value())
return result.release_value();
// Otherwise, commit the newline!
- ++m_iterator;
- auto result = try_commit_chunk(start_of_chunk, m_iterator, true, font, text_type);
+ m_current_index = next_grapheme_boundary();
+ auto result = try_commit_chunk(start_of_chunk, m_current_index, true, font, text_type);
VERIFY(result.has_value());
return result.release_value();
}
if (m_wrap_lines) {
- if (text_type != text_type_for_code_point(*m_iterator)) {
- if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value())
+ if (text_type != text_type_for_code_point(code_point)) {
+ if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) {
return result.release_value();
+ }
}
- if (is_ascii_space(*m_iterator)) {
+ if (is_ascii_space(code_point)) {
// Whitespace encountered, and we're allowed to break on whitespace.
// If we have accumulated some code points in the current chunk, commit them now and continue with the whitespace next time.
- if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value())
+ if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value()) {
return result.release_value();
+ }
// Otherwise, commit the whitespace!
- ++m_iterator;
- if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false, font, text_type); result.has_value())
+ m_current_index = next_grapheme_boundary();
+ if (auto result = try_commit_chunk(start_of_chunk, m_current_index, false, font, text_type); result.has_value())
return result.release_value();
continue;
}
}
- ++m_iterator;
+ m_current_index = next_grapheme_boundary();
}
- if (start_of_chunk != m_utf8_view.end()) {
+ if (start_of_chunk != m_utf8_view.byte_length()) {
// Try to output whatever's left at the end of the text node.
- if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, font, text_type); result.has_value())
+ if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.byte_length(), false, font, text_type); result.has_value())
return result.release_value();
}
return {};
}
-Optional TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const
+Optional TextNode::ChunkIterator::try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, Gfx::Font const& font, Gfx::GlyphRun::TextType text_type) const
{
- auto byte_offset = m_utf8_view.byte_offset_of(start);
- auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;
-
- if (byte_length > 0) {
- auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
+ if (auto byte_length = end - start; byte_length > 0) {
+ auto chunk_view = m_utf8_view.substring_view(start, byte_length);
return Chunk {
.view = chunk_view,
.font = font,
- .start = byte_offset,
+ .start = start,
.length = byte_length,
.has_breaking_newline = has_breaking_newline,
.is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
diff --git a/Userland/Libraries/LibWeb/Layout/TextNode.h b/Userland/Libraries/LibWeb/Layout/TextNode.h
index c6f3d691cfb..40a9cd62bfa 100644
--- a/Userland/Libraries/LibWeb/Layout/TextNode.h
+++ b/Userland/Libraries/LibWeb/Layout/TextNode.h
@@ -7,6 +7,7 @@
#pragma once
#include
+#include
#include
#include
@@ -38,20 +39,23 @@ public:
class ChunkIterator {
public:
- ChunkIterator(StringView text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const&);
+ ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const&);
+
Optional next();
Optional peek(size_t);
private:
Optional next_without_peek();
- Optional try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, Gfx::Font const&, Gfx::GlyphRun::TextType) const;
+ Optional try_commit_chunk(size_t start, size_t end, bool has_breaking_newline, Gfx::Font const&, Gfx::GlyphRun::TextType) const;
bool const m_wrap_lines;
bool const m_respect_linebreaks;
Utf8View m_utf8_view;
- Utf8View::Iterator m_iterator;
Gfx::FontCascadeList const& m_font_cascade_list;
+ NonnullOwnPtr m_segmenter;
+ size_t m_current_index { 0 };
+
Vector m_peek_queue;
};