diff --git a/Makefile b/Makefile index c071338435..8322e0f3cf 100644 --- a/Makefile +++ b/Makefile @@ -127,13 +127,6 @@ endif src/nvim/testdir/%.vim: phony_force +$(SINGLE_MAKE) -C src/nvim/testdir NVIM_PRG=$(NVIM_PRG) SCRIPTS= $(MAKEOVERRIDES) $(patsubst src/nvim/testdir/%.vim,%,$@) -build/runtime/doc/tags helptags: | nvim - +$(BUILD_TOOL) -C build runtime/doc/tags - -# Builds help HTML _and_ checks for invalid help tags. -helphtml: | nvim build/runtime/doc/tags - +$(BUILD_TOOL) -C build doc_html - functionaltest functionaltest-lua unittest benchmark: | nvim $(BUILD_TOOL) -C build $@ diff --git a/runtime/doc/autocmd.txt b/runtime/doc/autocmd.txt index 7a2c540ea2..e55534b163 100644 --- a/runtime/doc/autocmd.txt +++ b/runtime/doc/autocmd.txt @@ -57,7 +57,7 @@ The special pattern or defines a buffer-local autocommand. See |autocmd-buflocal|. Note: The ":autocmd" command can only be followed by another command when the -'|' appears where the pattern is expected. This works: > +"|" appears where the pattern is expected. This works: > :augroup mine | au! BufRead | augroup END But this sees "augroup" as part of the defined command: > :augroup mine | au! BufRead * | augroup END diff --git a/scripts/gen_help_html.lua b/scripts/gen_help_html.lua new file mode 100644 index 0000000000..d79ece53f3 --- /dev/null +++ b/scripts/gen_help_html.lua @@ -0,0 +1,830 @@ +-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors). +-- +-- USAGE (GENERATE HTML): +-- 1. Run `make helptags` first; this script depends on vim.fn.taglist(). +-- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')" +-- - Read the docstring at gen(). +-- 3. cd target/dir/ && jekyll serve --host 0.0.0.0 +-- 4. Visit http://localhost:4000/…/help.txt.html +-- +-- USAGE (VALIDATE): +-- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()" +-- - validate() is 10x faster than gen(), so it is used in CI. +-- +-- SELF-TEST MODE: +-- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()" +-- +-- NOTES: +-- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too +-- slow (~1 min) to run in per-commit CI. +-- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML. +-- * visit_validate() is the core function used by validate(). +-- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. +-- +-- parser bugs: +-- * Should NOT be code_block: +-- tab:xy The 'x' is always used, then 'y' as many times as will +-- fit. Thus "tab:>-" displays: +-- > +-- >- +-- >-- +-- etc. +-- +-- tab:xyz The 'z' is always used, then 'x' is prepended, and +-- then 'y' is used as many times as will fit. Thus +-- "tab:<->" displays: +-- > +-- <> +-- <-> +-- <--> +-- etc. +-- * Should NOT be a "headline". Perhaps a "table" (or just "line"). +-- expr5 and expr6 *expr5* *expr6* +-- --------------- +-- expr6 + expr6 Number addition, |List| or |Blob| concatenation *expr-+* +-- expr6 - expr6 Number subtraction *expr--* +-- expr6 . expr6 String concatenation *expr-.* +-- expr6 .. expr6 String concatenation *expr-..* + +local tagmap = nil +local helpfiles = nil +local invalid_tags = {} + +local commit = '?' +local api = vim.api +local M = {} + +-- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs). +-- All other files are "legacy" files which require fixed-width layout. +local new_layout = { + ['api.txt'] = true, + ['channel.txt'] = true, + ['develop.txt'] = true, + ['nvim.txt'] = true, + ['pi_health.txt'] = true, + ['provider.txt'] = true, + ['ui.txt'] = true, +} + +-- TODO: treesitter gets stuck on these files... +local exclude = { + ['filetype.txt'] = true, + ['usr_24.txt'] = true, +} + +local function tofile(fname, text) + local f = io.open(fname, 'w') + if not f then + error(('failed to write: %s'):format(f)) + else + f:write(text) + f:close() + end +end + +local function html_esc(s) + if s:find('', '>') +end + +local function url_encode(s) + -- Credit: tpope / vim-unimpaired + -- NOTE: these chars intentionally *not* escaped: ' ( ) + return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'), + [=[[^A-Za-z0-9()'_.~-]]=], + [=[\="%".printf("%02X",char2nr(submatch(0)))]=], + 'g') +end + +-- Removes the ">" and "<" chars that delineate a codeblock in Vim :help files. +local function trim_gt_lt(s) + return s:gsub('^%s*>%s*\n', ''):gsub('\n<', '') +end + +local function expandtabs(s) + return s:gsub('\t', (' '):rep(8)) +end + +local function to_titlecase(s) + local text = '' + for w in vim.gsplit(s, '[ \t]+') do + text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2)) + end + return text +end + +local function to_heading_tag(text) + -- Prepend "_" to avoid conflicts with actual :help tags. + return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown' +end + +local function basename_noext(f) + return vim.fs.basename(f:gsub('%.txt', '')) +end + +local function is_blank(s) + return not not s:find('^%s*$') +end + +local function trim(s) + return vim.trim(s) +end + +local function trim_bullet(s) + return s:gsub('^%s*[-*•]%s', '') +end + +local function startswith_bullet(s) + return s:find('^%s*[-*•]%s') +end + +-- Checks if a given line is a "noise" line that doesn't look good in HTML form. +local function is_noise(line) + return ( + line:find('Type .*gO.* to see the table of contents') + -- Title line of traditional :help pages. + -- Example: "NVIM REFERENCE MANUAL by ..." + or line:find('^%s*N?VIM REFERENCE MANUAL') + -- First line of traditional :help pages. + -- Example: "*api.txt* Nvim" + or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$') + -- modeline + -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:" + or line:find('^%s*vi[m]%:.*ft=help') + or line:find('^%s*vi[m]%:.*filetype=help') + ) +end + +-- Creates a github issue URL at vigoux/tree-sitter-vimdoc with prefilled content. +local function get_bug_url_vimdoc(fname, to_fname, sample_text) + local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) + local bug_url = ('https://github.com/vigoux/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+' + ..vim.fs.basename(fname) + ..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+' + ..this_url + ..'%0D%0DContext%3A%0D%0D%60%60%60%0D' + ..url_encode(sample_text) + ..'%0D%60%60%60') + return bug_url +end + +-- Creates a github issue URL at neovim/neovim with prefilled content. +local function get_bug_url_nvim(fname, to_fname, sample_text, token_name) + local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) + local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+' + ..vim.fs.basename(fname) + ..'+&body=%60gen_help_html.lua%60+problem+at%3A+' + ..this_url + ..'%0D' + ..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '') + ..'%0DContext%3A%0D%0D%60%60%60%0D' + ..url_encode(sample_text) + ..'%0D%60%60%60') + return bug_url +end + +-- Gets a "foo.html" name from a "foo.txt" helpfile name. +local function get_helppage(f) + if not f then + return nil + end + -- Special case: help.txt is the "main landing page" of :help files, not index.txt. + if f == 'index.txt' then + return 'vimindex.html' + elseif f == 'help.txt' then + return 'index.html' + end + + return f:gsub('%.txt$', '.html') +end + +-- Counts leading spaces (tab=8) to decide the indent size of multiline text. +-- +-- Blank lines (empty or whitespace-only) are ignored. +local function get_indent(s) + local min_indent = nil + for line in vim.gsplit(s, '\n') do + if line and not is_blank(line) then + local ws = expandtabs(line:match('^%s+') or '') + min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent + end + end + return min_indent or 0 +end + +-- Removes the common indent level, after expanding tabs to 8 spaces. +local function trim_indent(s) + local indent_size = get_indent(s) + local trimmed = '' + for line in vim.gsplit(s, '\n') do + line = expandtabs(line) + trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1)) + end + return trimmed:sub(1, -2) +end + +-- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string. +local function getbuflinestr(node, bufnr, offset) + local line1, _, line2, _ = node:range() + line1 = line1 - offset + line2 = line2 + offset + local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1) + return table.concat(lines, '\n') +end + +-- Gets the whitespace just before `node` from the raw buffer text. +-- Needed for preformatted `old` lines. +local function getws(node, bufnr) + local line1, c1, line2, _ = node:range() + local raw = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)[1] + local text_before = raw:sub(1, c1) + local leading_ws = text_before:match('%s+$') or '' + return leading_ws +end + +local function get_tagname(node, bufnr, link) + local node_name = (node.named and node:named()) and node:type() or nil + local node_text = vim.treesitter.get_node_text(node, bufnr) + local tag = ((node_name == 'option' and node_text) + or (link and node_text:gsub('^|', ''):gsub('|$', '') or node_text:gsub('^%*', ''):gsub('%*$', ''))) + local helpfile = tag and vim.fs.basename(tagmap[tag]) or nil -- "api.txt" + local helppage = get_helppage(helpfile) -- "api.html" + return helppage, tag +end + +-- Traverses the tree at `root` and checks that |tag| links point to valid helptags. +local function visit_validate(root, level, lang_tree, opt, stats) + level = level or 0 + local node_name = (root.named and root:named()) and root:type() or nil + local toplevel = level < 1 + + if root:child_count() > 0 then + for node, _ in root:iter_children() do + if node:named() then + visit_validate(node, level + 1, lang_tree, opt, stats) + end + end + end + + if node_name == 'ERROR' then + -- Store the raw text to give context to the bug report. + local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' + table.insert(stats.parse_errors, sample_text) + elseif node_name == 'hotlink' or node_name == 'option' then + local _, tagname = get_tagname(root, opt.buf, true) + if not root:has_error() and not tagmap[tagname] then + invalid_tags[tagname] = vim.fs.basename(opt.fname) + end + end +end + +-- Generates HTML from node `root` recursively. +local function visit_node(root, level, lang_tree, headings, opt, stats) + level = level or 0 + + local node_name = (root.named and root:named()) and root:type() or nil + -- Previous sibling kind (string). + local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil + -- Next sibling kind (string). + local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil + -- Parent kind (string). + local parent = root:parent() and root:parent():type() or nil + local text = '' + local toplevel = level < 1 + local function node_text() + return vim.treesitter.get_node_text(root, opt.buf) + end + + if root:child_count() == 0 then + text = node_text() + else + -- Process children and join them with whitespace. + for node, _ in root:iter_children() do + if node:named() then + local r = visit_node(node, level + 1, lang_tree, headings, opt, stats) + local ws = r == '' and '' or ((opt.old and (node:type() == 'word' or not node:named())) and getws(node, opt.buf) or ' ') + text = string.format('%s%s%s', text, ws, r) + end + end + end + local trimmed = trim(text) + + if node_name == 'help_file' then -- root node + return text + elseif node_name == 'word' or node_name == 'uppercase_name' then + if parent == 'headline' then + -- Start a new heading item, or update the current one. + local n = (prev == nil or #headings == 0) and #headings + 1 or #headings + headings[n] = string.format('%s%s', headings[n] and headings[n]..' ' or '', text) + end + + return html_esc(text) + elseif node_name == 'headline' then + return ('

%s

\n'):format(to_heading_tag(headings[#headings]), text) + elseif node_name == 'column_heading' or node_name == 'column_name' then + return ('

%s

\n'):format(trimmed) + elseif node_name == 'line' then + -- TODO: remove these "sibling inspection" hacks once the parser provides structured info + -- about paragraphs and listitems: https://github.com/vigoux/tree-sitter-vimdoc/issues/12 + local next_text = root:next_sibling() and vim.treesitter.get_node_text(root:next_sibling(), opt.buf) or '' + local li = startswith_bullet(text) -- Listitem? + local next_li = startswith_bullet(next_text) -- Next is listitem? + -- Close the paragraph/listitem if the next sibling is not a line. + local close = (next_ ~= 'line' or next_li or is_blank(next_text)) and '\n' or '' + + -- HACK: discard common "noise" lines. + if is_noise(text) then + table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0)) + return (opt.old or prev ~= 'line') and '' or close + end + + if opt.old then + -- XXX: Treat old docs as preformatted. Until those docs are "fixed" or we get better info + -- from tree-sitter-vimdoc, this avoids broken layout for legacy docs. + return ('
%s
\n'):format(text) + end + + if li then + return string.format('
%s%s', trim_bullet(expandtabs(text)), close) + end + if prev ~= 'line' then -- Start a new paragraph. + return string.format('
%s%s', expandtabs(text), close) + end + + -- Continue in the current paragraph/listitem. + return string.format('%s%s', expandtabs(text), close) + elseif node_name == 'hotlink' or node_name == 'option' then + local helppage, tagname = get_tagname(root, opt.buf, true) + if not root:has_error() and not tagmap[tagname] then + invalid_tags[tagname] = vim.fs.basename(opt.fname) + end + return ('%s'):format(helppage, url_encode(tagname), html_esc(tagname)) + elseif node_name == 'backtick' then + return ('%s'):format(html_esc(text)) + elseif node_name == 'argument' then + return ('{%s}'):format(html_esc(trimmed)) + elseif node_name == 'code_block' then + return ('
\n%s
\n'):format(html_esc(trim_indent(trim_gt_lt(text)))) + elseif node_name == 'tag' then -- anchor + local _, tagname = get_tagname(root, opt.buf, false) + local s = ('%s'):format(url_encode(tagname), trimmed) + if parent == 'headline' and prev ~= 'tag' then + -- Start the container for tags in a heading. + -- This makes "justify-content:space-between" right-align the tags. + --

foo bartag1 tag2

+ return string.format('%s', s) + elseif parent == 'headline' and next_ == nil then + -- End the container for tags in a heading. + return string.format('%s', s) + end + return s + elseif node_name == 'ERROR' then + -- Store the raw text to give context to the bug report. + local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' + table.insert(stats.parse_errors, sample_text) + if prev == 'ERROR' then + -- Avoid trashing the text with cascading errors. + return trimmed, ('parse-error:"%s"'):format(node_text()) + end + return ('%s'):format( + get_bug_url_vimdoc(opt.fname, opt.to_fname, sample_text), trimmed) + else -- Unknown token. + local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' + return ('%s'):format( + node_name, get_bug_url_nvim(opt.fname, opt.to_fname, sample_text, node_name), trimmed), ('unknown-token:"%s"'):format(node_name) + end +end + +local function get_helpfiles(include) + local dir = './build/runtime/doc' + local rv = {} + for f, type in vim.fs.dir(dir) do + if (vim.endswith(f, '.txt') + and type == 'file' + and (not include or vim.tbl_contains(include, f)) + and (not exclude[f])) then + local fullpath = vim.fn.fnamemodify(('%s/%s'):format(dir, f), ':p') + table.insert(rv, fullpath) + end + end + return rv +end + +-- Populates the helptags map. +local function get_helptags(help_dir) + local m = {} + -- Load a random help file to convince taglist() to do its job. + vim.cmd(string.format('split %s/api.txt', help_dir)) + vim.cmd('lcd %:p:h') + for _, item in ipairs(vim.fn.taglist('.*')) do + if vim.endswith(item.filename, '.txt') then + m[item.name] = item.filename + end + end + vim.cmd('q!') + return m +end + +-- Opens `fname` in a buffer and gets a treesitter parser for the buffer contents. +-- +-- @returns lang_tree, bufnr +local function parse_buf(fname) + local buf + if type(fname) == 'string' then + vim.cmd('split '..vim.fn.fnameescape(fname)) -- Filename. + buf = api.nvim_get_current_buf() + else + buf = fname + vim.cmd('sbuffer '..tostring(fname)) -- Buffer number. + end + -- vim.treesitter.require_language('help', './build/lib/nvim/parser/help.so') + local lang_tree = vim.treesitter.get_parser(buf, 'help') + return lang_tree, buf +end + +-- Validates one :help file `fname`: +-- - checks that |tag| links point to valid helptags. +-- - recursively counts parse errors ("ERROR" nodes) +-- +-- @returns { invalid_tags: number, parse_errors: number } +local function validate_one(fname) + local stats = { + invalid_tags = {}, + parse_errors = {}, + } + local lang_tree, buf = parse_buf(fname) + for _, tree in ipairs(lang_tree:trees()) do + visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname, }, stats) + end + lang_tree:destroy() + vim.cmd.close() + return { + invalid_tags = invalid_tags, + parse_errors = stats.parse_errors, + } +end + +-- Generates HTML from one :help file `fname` and writes the result to `to_fname`. +-- +-- @param fname Source :help file +-- @param to_fname Destination .html file +-- @param old boolean Preformat paragraphs (for old :help files which are full of arbitrary whitespace) +-- +-- @returns html, stats +local function gen_one(fname, to_fname, old) + local stats = { + noise_lines = {}, + parse_errors = {}, + } + local lang_tree, buf = parse_buf(fname) + local headings = {} -- Headings (for ToC). + local title = to_titlecase(basename_noext(fname)) + + local html = ([[ + + + + + + + + + + + + %s - Neovim docs + + + ]]):format(title) + + local logo_svg = [[ + + Neovim + + + + + + + + + + + + + + + + + + + + + + + + + + ]] + + local main = ([[ +
+ +
+ +
+
+

%s

+

+ + Nvim help pages, updated automatically + from source. + Parsing by tree-sitter-vimdoc. + +

+ ]]):format(logo_svg, title, vim.fs.basename(fname)) + for _, tree in ipairs(lang_tree:trees()) do + main = main .. (visit_node(tree:root(), 0, tree, headings, { buf = buf, old = old, fname = fname, to_fname = to_fname }, stats)) + end + main = main .. '
\n' + + local toc = [[ +
+ + + +
+ ]] + for _, heading in ipairs(headings) do + toc = toc .. ('\n'):format(to_heading_tag(heading), heading) + end + toc = toc .. '
\n' + + local bug_url = get_bug_url_nvim(fname, to_fname, 'TODO', nil) + local bug_link = string.format('(report docs bug...)', bug_url) + + local footer = ([[ +
+
+
+ Generated on %s from {%s} +
+
+ parse_errors: %d %s | noise_lines: %d +
+
+
+ ]]):format( + os.date('%Y-%m-%d %H:%M:%S'), commit, #stats.parse_errors, bug_link, + html_esc(table.concat(stats.noise_lines, '\n')), #stats.noise_lines) + + html = ('%s%s%s
\n%s\n\n'):format( + html, main, toc, footer) + vim.cmd('q!') + lang_tree:destroy() + return html, stats +end + +local function gen_css(fname) + local css = [[ + @media (min-width: 40em) { + .toc { + position: fixed; + left: 67%; + } + } + .toc { + /* max-width: 12rem; */ + } + .toc > div { + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + } + html { + scroll-behavior: auto; + } + h1, h2, h3, h4 { + font-family: sans-serif; + } + .help-body { + padding-bottom: 2em; + } + .help-line { + /* font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; */ + } + .help-item { + display: list-item; + margin-left: 1.5rem; /* padding-left: 1rem; */ + } + .help-para { + padding-top: 10px; + padding-bottom: 10px; + } + .old-help-line { + /* Tabs are used for alignment in old docs, so we must match Vim's 8-char expectation. */ + tab-size: 8; + white-space: pre; + font-size: .875em; + font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; + } + a.help-tag, a.help-tag:focus, a.help-tag:hover { + color: inherit; + text-decoration: none; + } + .help-tag { + color: gray; + } + h1 .help-tag, h2 .help-tag { + font-size: smaller; + } + .help-heading { + overflow: hidden; + white-space: nowrap; + display: flex; + justify-content: space-between; + } + /* The (right-aligned) "tags" part of a section heading. */ + .help-heading-tags { + margin-left: 10px; + } + .parse-error { + background-color: red; + } + .unknown-token { + color: black; + background-color: yellow; + } + pre { + /* Tabs are used in code_blocks only for indentation, not alignment, so we can aggressively shrink them. */ + tab-size: 2; + white-space: pre; + overflow: visible; + /* font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; */ + /* font-size: 14px; */ + /* border: 0px; */ + /* margin: 0px; */ + } + pre:hover, + .help-heading:hover { + overflow: visible; + } + .generator-stats { + color: gray; + font-size: smaller; + } + .golden-grid { + display: grid; + grid-template-columns: 65% auto; + grid-gap: 1em; + } + ]] + tofile(fname, css) +end + +function M._test() + tagmap = get_helptags('./build/runtime/doc') + helpfiles = get_helpfiles() + + local function ok(cond, expected, actual) + assert((not expected and not actual) or (expected and actual), 'if "expected" is given, "actual" is also required') + if expected then + return assert(cond, ('expected %s, got: %s'):format(vim.inspect(expected), vim.inspect(actual))) + else + return assert(cond) + end + end + local function eq(expected, actual) + return ok(expected == actual, expected, actual) + end + + eq(119, #helpfiles) + ok(vim.tbl_count(tagmap) > 3000, '>3000', vim.tbl_count(tagmap)) + ok(vim.endswith(tagmap['vim.diagnostic.set()'], 'diagnostic.txt'), tagmap['vim.diagnostic.set()'], 'diagnostic.txt') + ok(vim.endswith(tagmap['%:s'], 'cmdline.txt'), tagmap['%:s'], 'cmdline.txt') + ok(is_noise([[vim:tw=78:isk=!-~,^*,^\|,^\":ts=8:noet:ft=help:norl:]])) + ok(is_noise([[ VIM REFERENCE MANUAL by Abe Lincoln ]])) + ok(not is_noise([[vim:tw=78]])) + + eq(0, get_indent('a')) + eq(1, get_indent(' a')) + eq(2, get_indent(' a\n b\n c\n')) + eq(5, get_indent(' a\n \n b\n c\n d\n e\n')) + eq('a\n \n b\n c\n d\n e\n', trim_indent(' a\n \n b\n c\n d\n e\n')) + + print('all tests passed') +end + +--- Generates HTML from :help docs located in `help_dir` and writes the result in `to_dir`. +--- +--- Example: +--- +--- gen('./build/runtime/doc', '/path/to/neovim.github.io/_site/doc/', {'api.txt', 'autocmd.txt', 'channel.txt'}, nil) +--- +--- @param help_dir string Source directory containing the :help files. Must run `make helptags` first. +--- @param to_dir string Target directory where the .html files will be written. +--- @param include table|nil Process only these filenames. Example: {'api.txt', 'autocmd.txt', 'channel.txt'} +--- +--- @returns info dict +function M.gen(help_dir, to_dir, include) + vim.validate{ + help_dir={help_dir, function(d) return vim.fn.isdirectory(d) == 1 end, 'valid directory'}, + to_dir={to_dir, 's'}, + include={include, 't', true}, + } + + local err_count = 0 + tagmap = get_helptags(help_dir) + helpfiles = get_helpfiles(include) + + print(('output dir: %s'):format(to_dir)) + vim.fn.mkdir(to_dir, 'p') + gen_css(('%s/help.css'):format(to_dir)) + + for _, f in ipairs(helpfiles) do + local helpfile = vim.fs.basename(f) + local to_fname = ('%s/%s'):format(to_dir, get_helppage(helpfile)) + local html, stats = gen_one(f, to_fname, not new_layout[helpfile]) + tofile(to_fname, html) + print(('generated (%-4s errors): %-15s => %s'):format(#stats.parse_errors, helpfile, vim.fs.basename(to_fname))) + err_count = err_count + #stats.parse_errors + end + print(('generated %d html pages'):format(#helpfiles)) + print(('total errors: %d'):format(err_count)) + print(('invalid tags:\n%s'):format(vim.inspect(invalid_tags))) + + return { + helpfiles = helpfiles, + err_count = err_count, + invalid_tags = invalid_tags, + } +end + +-- Validates all :help files found in `help_dir`: +-- - checks that |tag| links point to valid helptags. +-- - recursively counts parse errors ("ERROR" nodes) +-- +-- This is 10x faster than gen(), for use in CI. +-- +-- @returns results dict +function M.validate(help_dir, include) + vim.validate{ + help_dir={help_dir, function(d) return vim.fn.isdirectory(d) == 1 end, 'valid directory'}, + include={include, 't', true}, + } + local err_count = 0 + tagmap = get_helptags(help_dir) + helpfiles = get_helpfiles(include) + + for _, f in ipairs(helpfiles) do + local helpfile = vim.fs.basename(f) + local rv = validate_one(f) + print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile)) + err_count = err_count + #rv.parse_errors + end + + return { + helpfiles = helpfiles, + err_count = err_count, + invalid_tags = invalid_tags, + } +end + +return M diff --git a/scripts/gen_help_html.py b/scripts/gen_help_html.py deleted file mode 100644 index 0b8e77ac22..0000000000 --- a/scripts/gen_help_html.py +++ /dev/null @@ -1,389 +0,0 @@ -# Converts Vim/Nvim documentation to HTML. -# -# USAGE: -# 1. python3 scripts/gen_help_html.py runtime/doc/ ~/neovim.github.io/t/ -# 3. cd ~/neovim.github.io/ && jekyll serve --host 0.0.0.0 -# 2. Visit http://localhost:4000/t/help.txt.html -# -# Adapted from https://github.com/c4rlo/vimhelp/ -# License: MIT -# -# Copyright (c) 2016 Carlo Teubner -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import os -import re -import urllib.parse -import datetime -import sys -from itertools import chain - -HEAD = """\ - - - - - -Nvim: {filename} -""" - -HEAD_END = '\n\n' - -INTRO = """ -

Nvim help files

-

-Nvim help pages{vers-note}. -Updated automatically -from the Nvim source. -

-""" - -VERSION_NOTE = ", current as of Nvim {version}" - -SITENAVI_LINKS = """ -Quick reference · -User manual · -Reference manual · -""" - -SITENAVI_LINKS_PLAIN = SITENAVI_LINKS.format(helptxt='help.txt.html') -SITENAVI_LINKS_WEB = SITENAVI_LINKS.format(helptxt='/') - -SITENAVI_PLAIN = '

' + SITENAVI_LINKS_PLAIN + '

' -SITENAVI_WEB = '

' + SITENAVI_LINKS_WEB + '

' - -SITENAVI_SEARCH = '
' + SITENAVI_LINKS_WEB + \ - '
' - -TEXTSTART = """ -
-
""" + (" " * 80) + """
-
-
-"""
-
-FOOTER = '
' - -FOOTER2 = """ - -
-
- - -""".format( - generated_date='{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()), - commit='?') - -RE_TAGLINE = re.compile(r'(\S+)\s+(\S+)') - -PAT_WORDCHAR = '[!#-)+-{}~\xC0-\xFF]' - -PAT_HEADER = r'(^.*~$)' -PAT_GRAPHIC = r'(^.* `$)' -PAT_PIPEWORD = r'(?|.)?)' -PAT_SPECIAL = r'(<.+?>|\{.+?}|' \ - r'\[(?:range|line|count|offset|\+?cmd|[-+]?num|\+\+opt|' \ - r'arg|arguments|ident|addr|group)]|' \ - r'(?<=\s)\[[-a-z^A-Z0-9_]{2,}])' -PAT_TITLE = r'(Vim version [0-9.a-z]+|VIM REFERENCE.*)' -PAT_NOTE = r'((? \t]+[a-zA-Z0-9/])' -PAT_WORD = r'((?$') -RE_EG_END = re.compile(r'\S') -RE_SECTION = re.compile(r'[-A-Z .][-A-Z0-9 .()]*(?=\s+\*)') -RE_STARTAG = re.compile(r'\s\*([^ \t|]+)\*(?:\s|$)') -RE_LOCAL_ADD = re.compile(r'LOCAL ADDITIONS:\s+\*local-additions\*$') - - -class Link(object): - __slots__ = 'link_plain_same', 'link_pipe_same', \ - 'link_plain_foreign', 'link_pipe_foreign', \ - 'filename' - - def __init__(self, link_plain_same, link_plain_foreign, - link_pipe_same, link_pipe_foreign, filename): - self.link_plain_same = link_plain_same - self.link_plain_foreign = link_plain_foreign - self.link_pipe_same = link_pipe_same - self.link_pipe_foreign = link_pipe_foreign - self.filename = filename - - -class VimH2H(object): - def __init__(self, tags, version=None, is_web_version=True): - self._urls = {} - self._version = version - self._is_web_version = is_web_version - for line in RE_NEWLINE.split(tags): - m = RE_TAGLINE.match(line) - if m: - tag, filename = m.group(1, 2) - self.do_add_tag(filename, tag) - - def add_tags(self, filename, contents): - for match in RE_STARTAG.finditer(contents): - tag = match.group(1).replace('\\', '\\\\').replace('/', '\\/') - self.do_add_tag(str(filename), tag) - - def do_add_tag(self, filename, tag): - tag_quoted = urllib.parse.quote_plus(tag) - - def mkpart1(doc): - return '' + html_escape[tag] + '' - - def mklinks(cssclass): - return (part1_same + cssclass + part2, - part1_foreign + cssclass + part2) - cssclass_plain = 'd' - m = RE_LINKWORD.match(tag) - if m: - opt, ctrl, special = m.groups() - if opt is not None: - cssclass_plain = 'o' - elif ctrl is not None: - cssclass_plain = 'k' - elif special is not None: - cssclass_plain = 's' - links_plain = mklinks(cssclass_plain) - links_pipe = mklinks('l') - self._urls[tag] = Link( - links_plain[0], links_plain[1], - links_pipe[0], links_pipe[1], - filename) - - def maplink(self, tag, curr_filename, css_class=None): - links = self._urls.get(tag) - if links is not None: - if links.filename == curr_filename: - if css_class == 'l': - return links.link_pipe_same - else: - return links.link_plain_same - else: - if css_class == 'l': - return links.link_pipe_foreign - else: - return links.link_plain_foreign - elif css_class is not None: - return '' + html_escape[tag] + \ - '' - else: - return html_escape[tag] - - def to_html(self, filename, contents, encoding): - out = [] - - inexample = 0 - filename = str(filename) - is_help_txt = (filename == 'help.txt') - last = '' - for line in RE_NEWLINE.split(contents): - line = line.rstrip('\r\n') - line_tabs = line - line = line.expandtabs() - if last == 'h1': - out.extend(('')) # XXX - out.extend(('

', line.rstrip(), '

\n')) - out.extend(('
'))
-                last = ''
-                continue
-            if RE_HRULE.match(line):
-                # out.extend(('', line, '\n'))
-                last = 'h1'
-                continue
-            if inexample == 2:
-                if RE_EG_END.match(line):
-                    inexample = 0
-                    if line[0] == '<':
-                        line = line[1:]
-                else:
-                    out.extend(('', html_escape[line],
-                                '\n'))
-                    continue
-            if RE_EG_START.match(line_tabs):
-                inexample = 1
-                line = line[0:-1]
-            if RE_SECTION.match(line_tabs):
-                m = RE_SECTION.match(line)
-                out.extend((r'', m.group(0), r''))
-                line = line[m.end():]
-            lastpos = 0
-            for match in RE_TAGWORD.finditer(line):
-                pos = match.start()
-                if pos > lastpos:
-                    out.append(html_escape[line[lastpos:pos]])
-                lastpos = match.end()
-                header, graphic, pipeword, starword, command, opt, ctrl, \
-                    special, title, note, url, word = match.groups()
-                if pipeword is not None:
-                    out.append(self.maplink(pipeword, filename, 'l'))
-                elif starword is not None:
-                    out.extend(('', html_escape[starword], ''))
-                elif command is not None:
-                    out.extend(('', html_escape[command],
-                                ''))
-                elif opt is not None:
-                    out.append(self.maplink(opt, filename, 'o'))
-                elif ctrl is not None:
-                    out.append(self.maplink(ctrl, filename, 'k'))
-                elif special is not None:
-                    out.append(self.maplink(special, filename, 's'))
-                elif title is not None:
-                    out.extend(('', html_escape[title],
-                                ''))
-                elif note is not None:
-                    out.extend(('', html_escape[note],
-                                ''))
-                elif header is not None:
-                    out.extend(('', html_escape[header[:-1]],
-                                ''))
-                elif graphic is not None:
-                    out.append(html_escape[graphic[:-2]])
-                elif url is not None:
-                    out.extend(('' +
-                                html_escape[url], ''))
-                elif word is not None:
-                    out.append(self.maplink(word, filename))
-            if lastpos < len(line):
-                out.append(html_escape[line[lastpos:]])
-            out.append('\n')
-            if inexample == 1:
-                inexample = 2
-
-        header = []
-        header.append(HEAD.format(encoding=encoding, filename=filename))
-        header.append(HEAD_END)
-        if self._is_web_version and is_help_txt:
-            vers_note = VERSION_NOTE.replace('{version}', self._version) \
-                if self._version else ''
-            header.append(INTRO.replace('{vers-note}', vers_note))
-        if self._is_web_version:
-            header.append(SITENAVI_SEARCH)
-            sitenavi_footer = SITENAVI_WEB
-        else:
-            header.append(SITENAVI_PLAIN)
-            sitenavi_footer = SITENAVI_PLAIN
-        header.append(TEXTSTART)
-        return ''.join(chain(header, out, (FOOTER, sitenavi_footer, FOOTER2)))
-
-
-class HtmlEscCache(dict):
-    def __missing__(self, key):
-        r = key.replace('&', '&') \
-               .replace('<', '<') \
-               .replace('>', '>')
-        self[key] = r
-        return r
-
-
-html_escape = HtmlEscCache()
-
-
-def slurp(filename):
-    try:
-        with open(filename, encoding='UTF-8') as f:
-            return f.read(), 'UTF-8'
-    except UnicodeError:
-        # 'ISO-8859-1' ?
-        with open(filename, encoding='latin-1') as f:
-            return f.read(), 'latin-1'
-
-
-def usage():
-    return "usage: " + sys.argv[0] + " IN_DIR OUT_DIR [BASENAMES...]"
-
-
-def main():
-    if len(sys.argv) < 3:
-        sys.exit(usage())
-
-    in_dir = sys.argv[1]
-    out_dir = sys.argv[2]
-    basenames = sys.argv[3:]
-
-    print("Processing tags...")
-    h2h = VimH2H(slurp(os.path.join(in_dir, 'tags'))[0], is_web_version=False)
-
-    if len(basenames) == 0:
-        basenames = os.listdir(in_dir)
-
-    for basename in basenames:
-        if os.path.splitext(basename)[1] != '.txt' and basename != 'tags':
-            print("Ignoring " + basename)
-            continue
-        print("Processing " + basename + "...")
-        path = os.path.join(in_dir, basename)
-        text, encoding = slurp(path)
-        outpath = os.path.join(out_dir, basename + '.html')
-        of = open(outpath, 'w')
-        of.write(h2h.to_html(basename, text, encoding))
-        of.close()
-
-
-main()
diff --git a/test/functional/lua/help_spec.lua b/test/functional/lua/help_spec.lua
new file mode 100644
index 0000000000..266f261ab9
--- /dev/null
+++ b/test/functional/lua/help_spec.lua
@@ -0,0 +1,54 @@
+-- Tests for gen_help_html.lua. Validates :help tags/links and HTML doc generation.
+--
+-- TODO: extract parts of gen_help_html.lua into Nvim stdlib?
+
+local helpers = require('test.functional.helpers')(after_each)
+local clear = helpers.clear
+local exec_lua = helpers.exec_lua
+local eq = helpers.eq
+local ok = helpers.ok
+
+describe(':help docs', function()
+  before_each(clear)
+  it('validate', function()
+    -- If this test fails, try these steps (in order):
+    -- 1. Try to fix/cleanup the :help docs, especially Nvim-owned :help docs.
+    -- 2. Try to fix the parser: https://github.com/vigoux/tree-sitter-vimdoc
+    -- 3. File a parser bug, and adjust the tolerance of this test in the meantime.
+
+    local rv = exec_lua([[return require('scripts.gen_help_html').validate('./build/runtime/doc')]])
+    -- Check that parse errors did not increase wildly.
+    -- TODO: yes, there are currently 24k+ parser errors.
+    --       WIP: https://github.com/vigoux/tree-sitter-vimdoc/pull/16
+    ok(rv.err_count < 24000, '<24000', rv.err_count)
+    -- TODO: should be eq(0, …)
+    ok(exec_lua('return vim.tbl_count(...)', rv.invalid_tags) < 538, '<538',
+      exec_lua('return vim.inspect(...)', rv.invalid_tags))
+  end)
+
+  it('gen_help_html.lua generates HTML', function()
+    -- Test:
+    -- 1. Check that parse errors did not increase wildly. Because we explicitly test only a few
+    --    :help files, we can be more precise about the tolerances here.
+    -- 2. exercise gen_help_html.lua, check that it actually works.
+    -- 3. check that its tree-sitter-vimdoc dependency is working.
+
+    local tmpdir = exec_lua('return vim.fs.dirname(vim.fn.tempname())')
+    -- Because gen() is slow (1 min), this test is limited to a few files.
+    local rv = exec_lua([[
+      local to_dir = ...
+      return require('scripts.gen_help_html').gen(
+        './build/runtime/doc',
+        to_dir,
+        { 'pi_health.txt', 'help.txt', 'index.txt', 'nvim.txt', }
+      )
+      ]],
+      tmpdir
+    )
+    eq(4, #rv.helpfiles)
+    ok(rv.err_count < 700, '<700', rv.err_count)
+    -- TODO: should be eq(0, …)
+    ok(exec_lua('return vim.tbl_count(...)', rv.invalid_tags) <= 32, '<=32',
+      exec_lua('return vim.inspect(...)', rv.invalid_tags))
+  end)
+end)