LibWeb: Move initial creation of Unicode segmenters to the Document

The expensive part of creating a segmenter is doing the locale and UCD
data lookups at creation time. Instead of doing this once per text node,
cache the segmenters on the document, and clone them as needed (cloning
is much, much cheaper).

On a profile loading Ladybird's GitHub repo, the following hot methods
changed as follows:

    ChunkIterator ctor: 6.08% -> 0.21%
    Segmenter factory:  5.86% ->    0%
    Segmenter clone:    N/A   -> 0.09%
This commit is contained in:
Timothy Flynn 2024-09-22 10:03:23 -04:00 committed by Andreas Kling
parent 42a1a0bd73
commit 5d71758742
Notes: github-actions[bot] 2024-09-22 16:43:21 +00:00
8 changed files with 52 additions and 18 deletions

View file

@ -157,20 +157,20 @@ WebIDL::ExceptionOr<void> CharacterData::delete_data(size_t offset, size_t count
return replace_data(offset, count, String {});
}
Unicode::Segmenter& CharacterData::grapheme_segmenter()
Unicode::Segmenter& CharacterData::grapheme_segmenter() const
{
if (!m_grapheme_segmenter) {
m_grapheme_segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Grapheme);
m_grapheme_segmenter = document().grapheme_segmenter().clone();
m_grapheme_segmenter->set_segmented_text(m_data);
}
return *m_grapheme_segmenter;
}
Unicode::Segmenter& CharacterData::word_segmenter()
Unicode::Segmenter& CharacterData::word_segmenter() const
{
if (!m_word_segmenter) {
m_word_segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
m_word_segmenter = document().word_segmenter().clone();
m_word_segmenter->set_segmented_text(m_data);
}

View file

@ -40,8 +40,8 @@ public:
WebIDL::ExceptionOr<void> delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units);
WebIDL::ExceptionOr<void> replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&);
Unicode::Segmenter& grapheme_segmenter();
Unicode::Segmenter& word_segmenter();
Unicode::Segmenter& grapheme_segmenter() const;
Unicode::Segmenter& word_segmenter() const;
protected:
CharacterData(Document&, NodeType, String const&);
@ -51,8 +51,8 @@ protected:
private:
String m_data;
OwnPtr<Unicode::Segmenter> m_grapheme_segmenter;
OwnPtr<Unicode::Segmenter> m_word_segmenter;
mutable OwnPtr<Unicode::Segmenter> m_grapheme_segmenter;
mutable OwnPtr<Unicode::Segmenter> m_word_segmenter;
};
}

View file

@ -18,6 +18,7 @@
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/FunctionObject.h>
#include <LibJS/Runtime/NativeFunction.h>
#include <LibUnicode/Segmenter.h>
#include <LibWeb/Animations/Animation.h>
#include <LibWeb/Animations/AnimationPlaybackEvent.h>
#include <LibWeb/Animations/AnimationTimeline.h>
@ -5606,4 +5607,18 @@ RefPtr<Painting::DisplayList> Document::record_display_list(PaintConfig config)
return display_list;
}
Unicode::Segmenter& Document::grapheme_segmenter() const
{
if (!m_grapheme_segmenter)
m_grapheme_segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Grapheme);
return *m_grapheme_segmenter;
}
Unicode::Segmenter& Document::word_segmenter() const
{
if (!m_word_segmenter)
m_word_segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
return *m_word_segmenter;
}
}

View file

@ -19,6 +19,7 @@
#include <LibJS/Console.h>
#include <LibJS/Forward.h>
#include <LibURL/URL.h>
#include <LibUnicode/Forward.h>
#include <LibWeb/CSS/CSSStyleSheet.h>
#include <LibWeb/CSS/StyleSheetList.h>
#include <LibWeb/Cookie/Cookie.h>
@ -720,6 +721,9 @@ public:
void invalidate_display_list();
Unicode::Segmenter& grapheme_segmenter() const;
Unicode::Segmenter& word_segmenter() const;
protected:
virtual void initialize(JS::Realm&) override;
virtual void visit_edges(Cell::Visitor&) override;
@ -998,6 +1002,9 @@ private:
Optional<PaintConfig> m_cached_display_list_paint_config;
RefPtr<Painting::DisplayList> m_cached_display_list;
mutable OwnPtr<Unicode::Segmenter> m_grapheme_segmenter;
mutable OwnPtr<Unicode::Segmenter> m_word_segmenter;
};
template<>

View file

@ -357,7 +357,7 @@ void InlineLevelIterator::enter_text_node(Layout::TextNode const& text_node)
.do_respect_linebreaks = do_respect_linebreaks,
.is_first_chunk = true,
.is_last_chunk = false,
.chunk_iterator = TextNode::ChunkIterator { text_node.text_for_rendering(), do_wrap_lines, do_respect_linebreaks, text_node.computed_values().font_list() },
.chunk_iterator = TextNode::ChunkIterator { text_node, do_wrap_lines, do_respect_linebreaks },
};
}

View file

@ -391,14 +391,23 @@ void TextNode::compute_text_for_rendering()
m_text_for_rendering = MUST(builder.to_string());
}
TextNode::ChunkIterator::ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const& font_cascade_list)
Unicode::Segmenter& TextNode::grapheme_segmenter() const
{
if (!m_grapheme_segmenter) {
m_grapheme_segmenter = document().grapheme_segmenter().clone();
m_grapheme_segmenter->set_segmented_text(text_for_rendering());
}
return *m_grapheme_segmenter;
}
TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, bool wrap_lines, bool respect_linebreaks)
: m_wrap_lines(wrap_lines)
, m_respect_linebreaks(respect_linebreaks)
, m_utf8_view(text)
, m_font_cascade_list(font_cascade_list)
, m_segmenter(Unicode::Segmenter::create(Unicode::SegmenterGranularity::Grapheme))
, m_utf8_view(text_node.text_for_rendering())
, m_font_cascade_list(text_node.computed_values().font_list())
, m_grapheme_segmenter(text_node.grapheme_segmenter())
{
m_segmenter->set_segmented_text(text);
}
static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point)
@ -470,7 +479,7 @@ Optional<TextNode::Chunk> TextNode::ChunkIterator::next_without_peek()
return *m_utf8_view.iterator_at_byte_offset_without_validation(m_current_index);
};
auto next_grapheme_boundary = [this]() {
return m_segmenter->next_boundary(m_current_index).value_or(m_utf8_view.byte_length());
return m_grapheme_segmenter.next_boundary(m_current_index).value_or(m_utf8_view.byte_length());
};
auto code_point = current_code_point();

View file

@ -39,7 +39,7 @@ public:
class ChunkIterator {
public:
ChunkIterator(String const& text, bool wrap_lines, bool respect_linebreaks, Gfx::FontCascadeList const&);
ChunkIterator(TextNode const&, bool wrap_lines, bool respect_linebreaks);
Optional<Chunk> next();
Optional<Chunk> peek(size_t);
@ -53,7 +53,7 @@ public:
Utf8View m_utf8_view;
Gfx::FontCascadeList const& m_font_cascade_list;
NonnullOwnPtr<Unicode::Segmenter> m_segmenter;
Unicode::Segmenter& m_grapheme_segmenter;
size_t m_current_index { 0 };
Vector<Chunk> m_peek_queue;
@ -62,12 +62,15 @@ public:
void invalidate_text_for_rendering();
void compute_text_for_rendering();
Unicode::Segmenter& grapheme_segmenter() const;
virtual JS::GCPtr<Painting::Paintable> create_paintable() const override;
private:
virtual bool is_text_node() const final { return true; }
Optional<String> m_text_for_rendering;
mutable OwnPtr<Unicode::Segmenter> m_grapheme_segmenter;
};
template<>

View file

@ -1225,7 +1225,7 @@ void EventHandler::update_selection_range_for_input_or_textarea()
Unicode::Segmenter& EventHandler::word_segmenter()
{
if (!m_word_segmenter)
m_word_segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
m_word_segmenter = m_navigable->active_document()->word_segmenter().clone();
return *m_word_segmenter;
}