markdown-check: Allow generating a manpage link graph

Using the cross-page links, we can generate a directed graph showing the
topology of which pages refer to other pages. This is not just for fun:
the links show how often a page is linked (since links are not
deduplicated on purpose), which pairs of pages only have links in one
direction (where a link in the other direction may be useful), which
groups of closely-interlinked pages exist, and which pages have few or
no links to other pages.

The EXTRA_MARKDOWN_CHECK_ARGS argument to the check-markdown script can
be used to inject the -g flag for generating the graph on all manpages.
This commit is contained in:
kleines Filmröllchen 2023-07-02 13:20:23 +02:00 committed by Linus Groh
parent f53aa959df
commit d7a2b5e65b
Notes: sideshowbarker 2024-07-17 05:19:06 +09:00
4 changed files with 96 additions and 3 deletions

View file

@ -400,6 +400,7 @@ if (BUILD_LAGOM)
JS
Line
Locale
Manual
Markdown
PDF
Regex
@ -548,7 +549,7 @@ if (BUILD_LAGOM)
target_link_libraries(lzcat LibCompress LibCore LibMain)
add_executable(markdown-check ../../Userland/Utilities/markdown-check.cpp)
target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain)
target_link_libraries(markdown-check LibFileSystem LibMarkdown LibMain LibManual)
if (NOT EMSCRIPTEN)
add_executable(ntpquery ../../Userland/Utilities/ntpquery.cpp)

View file

@ -23,4 +23,5 @@ if [ -z "$SERENITY_SOURCE_DIR" ] ; then
export SERENITY_SOURCE_DIR
fi
find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" README.md CONTRIBUTING.md
# shellcheck disable=SC2086 # Word splitting is intentional here
find AK Base Documentation Kernel Meta Ports Tests Userland -path Tests/LibWeb/WPT/wpt -prune -o -type f -name '*.md' -print0 | xargs -0 "${MARKDOWN_CHECK_BINARY}" -b "${SERENITY_SOURCE_DIR}/Base" $EXTRA_MARKDOWN_CHECK_ARGS README.md CONTRIBUTING.md

View file

@ -113,7 +113,7 @@ target_link_libraries(lspci PRIVATE LibPCIDB)
target_link_libraries(lsusb PRIVATE LibUSBDB)
target_link_libraries(lzcat PRIVATE LibCompress)
target_link_libraries(man PRIVATE LibMarkdown LibManual)
target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown)
target_link_libraries(markdown-check PRIVATE LibFileSystem LibMarkdown LibManual)
target_link_libraries(matroska PRIVATE LibVideo)
target_link_libraries(md PRIVATE LibMarkdown)
target_link_libraries(mktemp PRIVATE LibFileSystem)

View file

@ -22,6 +22,9 @@
#include <LibCore/File.h>
#include <LibFileSystem/FileSystem.h>
#include <LibMain/Main.h>
#include <LibManual/PageNode.h>
#include <LibManual/Path.h>
#include <LibManual/SectionNode.h>
#include <LibMarkdown/Document.h>
#include <LibMarkdown/Visitor.h>
#include <stdlib.h>
@ -239,11 +242,55 @@ RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_no
return RecursionDecision::Recurse;
}
static ErrorOr<String> generate_link_graph(HashMap<NonnullRefPtr<Manual::PageNode const>, Vector<NonnullRefPtr<Manual::PageNode const>>> const& page_links)
{
auto const header = "digraph manpage_links {\n"sv;
StringBuilder builder;
TRY(builder.try_append(header));
// Not displayed to the user.
HashMap<NonnullRefPtr<Manual::PageNode const>, String> page_identifiers;
for (auto const& page : page_links.keys()) {
auto path = TRY(page->path());
StringBuilder identifier_builder;
// Only allow alphanumerics, replace everything else with underscores.
for (auto const& character : path.code_points()) {
if (AK::is_ascii_alphanumeric(character))
TRY(identifier_builder.try_append_code_point(character));
else
TRY(identifier_builder.try_append('_'));
}
auto const identifier = TRY(identifier_builder.to_string());
TRY(builder.try_appendff("{} [label=\"{}({})\"];\n", identifier, TRY(page->name()), page->section_number()));
TRY(page_identifiers.try_set(page, identifier));
}
for (auto const& from_page_list : page_links) {
auto const& from_page = from_page_list.key;
for (auto const& to_page : from_page_list.value) {
auto const to_page_identifier = page_identifiers.get(to_page);
// Target page doesn't actually exist; it's probably an ignored page.
if (!to_page_identifier.has_value())
continue;
TRY(builder.try_appendff("{} -> {};\n", page_identifiers.get(from_page).value(), page_identifiers.get(to_page).value()));
}
}
TRY(builder.try_append("}\n"sv));
return builder.to_string();
}
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
Core::ArgsParser args_parser;
Vector<StringView> file_paths;
bool output_link_graph { false };
StringView base_path = "/"sv;
args_parser.add_positional_argument(file_paths, "Path to markdown files to read and parse", "paths", Core::ArgsParser::Required::Yes);
args_parser.add_option(base_path, "System base path (default: \"/\")", "base", 'b', "path");
args_parser.add_option(output_link_graph, "Output a page link graph into \"manpage-links.gv\". The recommended tool to process this graph is `fdp`.", "link-graph", 'g');
args_parser.parse(arguments);
outln("Reading and parsing Markdown files ...");
@ -331,6 +378,50 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
}
}
if (output_link_graph) {
// First, collect all pages, and collect links between pages in a second step after all pages must have been collected.
HashMap<String, NonnullRefPtr<Manual::PageNode const>> pages;
for (auto const& file : files) {
auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(file.key, base_path)));
auto page = Manual::Node::try_create_from_query({ base_relative_path });
if (page.is_error()) {
dbgln("Not including {} in the link graph since it's not a man page.", file.key);
continue;
}
TRY(pages.try_set(file.key, page.value()));
for (auto const& link : file.value.file_links()) {
auto base_relative_path = TRY(String::formatted("/{}", LexicalPath::relative_path(link.file_path, base_path)));
auto maybe_target_page = Manual::Node::try_create_from_query({ base_relative_path });
if (maybe_target_page.is_error()) {
dbgln("Not including {} in the link graph since it's not a man page.", link.file_path);
continue;
}
TRY(pages.try_set(TRY(String::from_deprecated_string(link.file_path)), maybe_target_page.value()));
}
}
HashMap<NonnullRefPtr<Manual::PageNode const>, Vector<NonnullRefPtr<Manual::PageNode const>>> page_links;
for (auto const& file : files) {
auto page = pages.get(file.key);
if (!page.has_value())
continue;
Vector<NonnullRefPtr<Manual::PageNode const>> linked_pages;
for (auto const& link : file.value.file_links()) {
auto linked_page = pages.get(TRY(String::from_deprecated_string(link.file_path)));
if (!linked_page.has_value())
continue;
TRY(linked_pages.try_append(*linked_page.value()));
}
TRY(page_links.try_set(*page.value(), move(linked_pages)));
}
auto const graph_text = TRY(generate_link_graph(page_links));
auto const graph_file = TRY(Core::File::open("manpage-links.gv"sv, Core::File::OpenMode::Write | Core::File::OpenMode::Truncate));
TRY(graph_file->write_until_depleted(graph_text.bytes()));
}
if (any_problems) {
outln("Done. Some errors were encountered, please check above log.");
return 1;