diff --git a/AK/GenericLexer.cpp b/AK/GenericLexer.cpp index 69039d81c8a..09c912cb31f 100644 --- a/AK/GenericLexer.cpp +++ b/AK/GenericLexer.cpp @@ -176,6 +176,31 @@ ErrorOr GenericLexer::consume_decimal_integer() } } +LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const +{ + auto& [cached_index, cached_line, cached_column] = m_cached_position; + + if (cached_index <= index) { + for (size_t i = cached_index; i < index; ++i) { + if (m_input[i] == '\n') + ++cached_line, cached_column = 0; + else + ++cached_column; + } + } else { + auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n'); + cached_line -= lines_backtracked; + if (lines_backtracked == 0) { + cached_column -= cached_index - index; + } else { + auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0); + cached_column = index - current_line_start; + } + } + cached_index = index; + return m_cached_position; +} + template ErrorOr GenericLexer::consume_decimal_integer(); template ErrorOr GenericLexer::consume_decimal_integer(); template ErrorOr GenericLexer::consume_decimal_integer(); diff --git a/AK/GenericLexer.h b/AK/GenericLexer.h index 2fc66ead9f8..89e6368e63a 100644 --- a/AK/GenericLexer.h +++ b/AK/GenericLexer.h @@ -234,6 +234,34 @@ private: #endif }; +class LineTrackingLexer : public GenericLexer { +public: + using GenericLexer::GenericLexer; + + struct Position { + size_t offset { 0 }; + size_t line { 0 }; + size_t column { 0 }; + }; + + LineTrackingLexer(StringView input, Position start_position) + : GenericLexer(input) + , m_cached_position { + .line = start_position.line, + .column = start_position.column, + } + { + } + + Position cached_position() const { return m_cached_position; } + void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; } + Position position_for(size_t) const; + Position current_position() const { return position_for(m_index); } + +protected: + mutable Position m_cached_position; +}; + constexpr auto is_any_of(StringView values) { return [values](auto c) { return values.contains(c); }; @@ -254,4 +282,5 @@ using AK::GenericLexer; using AK::is_any_of; using AK::is_path_separator; using AK::is_quote; +using AK::LineTrackingLexer; #endif diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp index e1e1e860665..e855075440b 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp @@ -14,7 +14,7 @@ namespace JSSpecCompiler { namespace { -Optional consume_number(XML::LineTrackingLexer& lexer, Location& location) +Optional consume_number(LineTrackingLexer& lexer, Location& location) { u64 start = lexer.tell(); @@ -73,14 +73,14 @@ void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, St { "+"sv, TokenType::Plus }, }; - XML::LineTrackingLexer lexer(view, node->offset); + LineTrackingLexer lexer(view, node->offset); while (!lexer.is_eof()) { lexer.ignore_while(is_ascii_space); // FIXME: This is incorrect since we count text offset after XML reference resolution. To do // this properly, we need support from XML::Parser. - Location token_location = ctx.location_from_xml_offset(lexer.offset_for(lexer.tell())); + Location token_location = ctx.location_from_xml_offset(lexer.position_for(lexer.tell())); if (auto result = consume_number(lexer, token_location); result.has_value()) { tokens.append(result.release_value()); diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp index 8070ed6c83b..c56ec18a363 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp @@ -50,12 +50,12 @@ Location SpecificationParsingContext::file_scope() const return { .filename = m_translation_unit->filename() }; } -Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const +Location SpecificationParsingContext::location_from_xml_offset(LineTrackingLexer::Position position) const { return { .filename = m_translation_unit->filename(), - .line = offset.line, - .column = offset.column, + .line = position.line, + .column = position.column, .logical_location = m_current_logical_scope, }; } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.h index 6431e394265..dcb2f2e2654 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.h @@ -37,7 +37,7 @@ public: int step_list_nesting_level() const; Location file_scope() const; - Location location_from_xml_offset(XML::Offset offset) const; + Location location_from_xml_offset(LineTrackingLexer::Position position) const; private: TranslationUnitRef m_translation_unit; diff --git a/Userland/Libraries/LibXML/DOM/Node.h b/Userland/Libraries/LibXML/DOM/Node.h index f223500878f..644d2ee6ec3 100644 --- a/Userland/Libraries/LibXML/DOM/Node.h +++ b/Userland/Libraries/LibXML/DOM/Node.h @@ -7,6 +7,7 @@ #pragma once #include +#include #include #include #include @@ -19,12 +20,6 @@ struct Attribute { ByteString value; }; -struct Offset { - size_t offset { 0 }; - size_t line { 0 }; - size_t column { 0 }; -}; - struct Node { struct Text { StringBuilder builder; @@ -40,7 +35,7 @@ struct Node { bool operator==(Node const&) const; - Offset offset; + LineTrackingLexer::Position offset; Variant content; Node* parent { nullptr }; diff --git a/Userland/Libraries/LibXML/Parser/Parser.cpp b/Userland/Libraries/LibXML/Parser/Parser.cpp index cc8af65a5a7..d3d6debbb6a 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.cpp +++ b/Userland/Libraries/LibXML/Parser/Parser.cpp @@ -66,31 +66,6 @@ consteval static auto set_to_search() namespace XML { -Offset LineTrackingLexer::offset_for(size_t index) const -{ - auto& [cached_index, cached_line, cached_column] = m_cached_offset; - - if (cached_index <= index) { - for (size_t i = cached_index; i < index; ++i) { - if (m_input[i] == '\n') - ++cached_line, cached_column = 0; - else - ++cached_column; - } - } else { - auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n'); - cached_line -= lines_backtracked; - if (lines_backtracked == 0) { - cached_column -= cached_index - index; - } else { - auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0); - cached_column = index - current_line_start; - } - } - cached_index = index; - return m_cached_offset; -} - size_t Parser::s_debug_indent_level { 0 }; void Parser::append_node(NonnullOwnPtr node) @@ -105,7 +80,7 @@ void Parser::append_node(NonnullOwnPtr node) } } -void Parser::append_text(StringView text, Offset offset) +void Parser::append_text(StringView text, LineTrackingLexer::Position position) { if (m_listener) { m_listener->text(text); @@ -115,7 +90,7 @@ void Parser::append_text(StringView text, Offset offset) if (!m_entered_node) { Node::Text node; node.builder.append(text); - m_root_node = make(offset, move(node)); + m_root_node = make(position, move(node)); return; } @@ -130,7 +105,7 @@ void Parser::append_text(StringView text, Offset offset) } Node::Text text_node; text_node.builder.append(text); - node.children.append(make(offset, move(text_node), m_entered_node)); + node.children.append(make(position, move(text_node), m_entered_node)); }, [&](auto&) { // Can't enter a text or comment node. @@ -138,7 +113,7 @@ void Parser::append_text(StringView text, Offset offset) }); } -void Parser::append_comment(StringView text, Offset offset) +void Parser::append_comment(StringView text, LineTrackingLexer::Position position) { if (m_listener) { m_listener->comment(text); @@ -152,7 +127,7 @@ void Parser::append_comment(StringView text, Offset offset) m_entered_node->content.visit( [&](Node::Element& node) { - node.children.append(make(offset, Node::Comment { text }, m_entered_node)); + node.children.append(make(position, Node::Comment { text }, m_entered_node)); }, [&](auto&) { // Can't enter a text or comment node. @@ -507,7 +482,7 @@ ErrorOr Parser::parse_comment() TRY(expect("-->"sv)); if (m_options.preserve_comments) - append_comment(text, m_lexer.offset_for(comment_start)); + append_comment(text, m_lexer.position_for(comment_start)); rollback.disarm(); return {}; @@ -699,7 +674,7 @@ ErrorOr, ParseError> Parser::parse_empty_element_tag() TRY(expect("/>"sv)); rollback.disarm(); - return make(m_lexer.offset_for(tag_start), Node::Element { move(name), move(attributes), {} }); + return make(m_lexer.position_for(tag_start), Node::Element { move(name), move(attributes), {} }); } // 3.1.41. Attribute, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute @@ -851,7 +826,7 @@ ErrorOr, ParseError> Parser::parse_start_tag() TRY(expect(">"sv)); rollback.disarm(); - return make(m_lexer.offset_for(tag_start), Node::Element { move(name), move(attributes), {} }); + return make(m_lexer.position_for(tag_start), Node::Element { move(name), move(attributes), {} }); } // 3.1.42 ETag, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ETag @@ -881,7 +856,7 @@ ErrorOr Parser::parse_content() // content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* auto content_start = m_lexer.tell(); if (auto result = parse_char_data(); !result.is_error()) - append_text(result.release_value(), m_lexer.offset_for(content_start)); + append_text(result.release_value(), m_lexer.position_for(content_start)); while (true) { auto node_start = m_lexer.tell(); @@ -890,7 +865,7 @@ ErrorOr Parser::parse_content() goto try_char_data; if (auto result = parse_reference(); !result.is_error()) { auto reference = result.release_value(); - auto reference_offset = m_lexer.offset_for(node_start); + auto reference_offset = m_lexer.position_for(node_start); if (auto char_reference = reference.get_pointer()) append_text(*char_reference, reference_offset); else @@ -899,7 +874,7 @@ ErrorOr Parser::parse_content() } if (auto result = parse_cdata_section(); !result.is_error()) { if (m_options.preserve_cdata) - append_text(result.release_value(), m_lexer.offset_for(node_start)); + append_text(result.release_value(), m_lexer.position_for(node_start)); goto try_char_data; } if (auto result = parse_processing_instruction(); !result.is_error()) @@ -911,7 +886,7 @@ ErrorOr Parser::parse_content() try_char_data:; if (auto result = parse_char_data(); !result.is_error()) - append_text(result.release_value(), m_lexer.offset_for(node_start)); + append_text(result.release_value(), m_lexer.position_for(node_start)); } rollback.disarm(); diff --git a/Userland/Libraries/LibXML/Parser/Parser.h b/Userland/Libraries/LibXML/Parser/Parser.h index 3529d08ede2..4b0588c1d59 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.h +++ b/Userland/Libraries/LibXML/Parser/Parser.h @@ -39,29 +39,6 @@ struct Listener { virtual void error(ParseError const&) { } }; -// FIXME: This is also used in JSSpecCompiler, so should probably live in AK or even merged with -// AK::GenericLexer. -class LineTrackingLexer : public GenericLexer { -public: - using GenericLexer::GenericLexer; - - LineTrackingLexer(StringView input, XML::Offset start_offset) - : GenericLexer(input) - , m_cached_offset { - .line = start_offset.line, - .column = start_offset.column, - } - { - } - - Offset cached_offset() const { return m_cached_offset; } - void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; } - Offset offset_for(size_t) const; - -protected: - mutable Offset m_cached_offset; -}; - class Parser { public: struct Options { @@ -96,8 +73,8 @@ private: ErrorOr parse_internal(); void append_node(NonnullOwnPtr); - void append_text(StringView, Offset); - void append_comment(StringView, Offset); + void append_text(StringView, LineTrackingLexer::Position); + void append_comment(StringView, LineTrackingLexer::Position); void enter_node(Node&); void leave_node(); @@ -170,9 +147,9 @@ private: [[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current()) { return ArmedScopeGuard { - [this, position = m_lexer.tell(), cached_offset = m_lexer.cached_offset(), location] { + [this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] { m_lexer.retreat(m_lexer.tell() - position); - m_lexer.restore_cached_offset(cached_offset); + m_lexer.restore_cached_offset(cached_position); (void)location; dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All)); }