local fmt = string.format --- @class nvim.text_utils.MDNode --- @field [integer] nvim.text_utils.MDNode --- @field type string --- @field text? string local INDENTATION = 4 local NBSP = string.char(160) local M = {} local function contains(t, xs) return vim.tbl_contains(xs, t) end --- @param txt string --- @param srow integer --- @param scol integer --- @param erow? integer --- @param ecol? integer --- @return string local function slice_text(txt, srow, scol, erow, ecol) local lines = vim.split(txt, '\n') if srow == erow then return lines[srow + 1]:sub(scol + 1, ecol) end if erow then -- Trim the end for _ = erow + 2, #lines do table.remove(lines, #lines) end end -- Trim the start for _ = 1, srow do table.remove(lines, 1) end lines[1] = lines[1]:sub(scol + 1) lines[#lines] = lines[#lines]:sub(1, ecol) return table.concat(lines, '\n') end --- @param text string --- @return nvim.text_utils.MDNode local function parse_md_inline(text) local parser = vim.treesitter.languagetree.new(text, 'markdown_inline') local root = parser:parse(true)[1]:root() --- @param node TSNode --- @return nvim.text_utils.MDNode? local function extract(node) local ntype = node:type() if ntype:match('^%p$') then return end --- @type table local ret = { type = ntype } ret.text = vim.treesitter.get_node_text(node, text) local row, col = 0, 0 for child, child_field in node:iter_children() do local e = extract(child) if e and ntype == 'inline' then local srow, scol = child:start() if (srow == row and scol > col) or srow > row then local t = slice_text(ret.text, row, col, srow, scol) if t and t ~= '' then table.insert(ret, { type = 'text', j = true, text = t }) end end row, col = child:end_() end if child_field then ret[child_field] = e else table.insert(ret, e) end end if ntype == 'inline' and (row > 0 or col > 0) then local t = slice_text(ret.text, row, col) if t and t ~= '' then table.insert(ret, { type = 'text', text = t }) end end return ret end return extract(root) or {} end --- @param text string --- @return nvim.text_utils.MDNode local function parse_md(text) local parser = vim.treesitter.languagetree.new(text, 'markdown', { injections = { markdown = '' }, }) local root = parser:parse(true)[1]:root() local EXCLUDE_TEXT_TYPE = { list = true, list_item = true, section = true, document = true, fenced_code_block = true, fenced_code_block_delimiter = true, } --- @param node TSNode --- @return nvim.text_utils.MDNode? local function extract(node) local ntype = node:type() if ntype:match('^%p$') or contains(ntype, { 'block_continuation' }) then return end --- @type table local ret = { type = ntype } if not EXCLUDE_TEXT_TYPE[ntype] then ret.text = vim.treesitter.get_node_text(node, text) end if ntype == 'inline' then ret = parse_md_inline(ret.text) end for child, child_field in node:iter_children() do local e = extract(child) if child_field then ret[child_field] = e else table.insert(ret, e) end end return ret end return extract(root) or {} end --- @param x string --- @param start_indent integer --- @param indent integer --- @param text_width integer --- @return string function M.wrap(x, start_indent, indent, text_width) local words = vim.split(vim.trim(x), '%s+') local parts = { string.rep(' ', start_indent) } --- @type string[] local count = indent for i, w in ipairs(words) do if count > indent and count + #w > text_width - 1 then parts[#parts + 1] = '\n' parts[#parts + 1] = string.rep(' ', indent) count = indent elseif i ~= 1 then parts[#parts + 1] = ' ' count = count + 1 end count = count + #w parts[#parts + 1] = w end return (table.concat(parts):gsub('%s+\n', '\n'):gsub('\n+$', '')) end --- @param node nvim.text_utils.MDNode --- @param start_indent integer --- @param indent integer --- @param text_width integer --- @param level integer --- @return string[] local function render_md(node, start_indent, indent, text_width, level, is_list) local parts = {} --- @type string[] -- For debugging local add_tag = false -- local add_tag = true local ntype = node.type if add_tag then parts[#parts + 1] = '<' .. ntype .. '>' end if ntype == 'text' then parts[#parts + 1] = node.text elseif ntype == 'html_tag' then error('html_tag: ' .. node.text) elseif ntype == 'inline_link' then vim.list_extend(parts, { '*', node[1].text, '*' }) elseif ntype == 'shortcut_link' then if node[1].text:find('^<.*>$') then parts[#parts + 1] = node[1].text elseif node[1].text:find('^%d+$') then vim.list_extend(parts, { '[', node[1].text, ']' }) else vim.list_extend(parts, { '|', node[1].text, '|' }) end elseif ntype == 'backslash_escape' then parts[#parts + 1] = node.text elseif ntype == 'emphasis' then parts[#parts + 1] = node.text:sub(2, -2) elseif ntype == 'code_span' then vim.list_extend(parts, { '`', node.text:sub(2, -2):gsub(' ', NBSP), '`' }) elseif ntype == 'inline' then if #node == 0 then local text = assert(node.text) parts[#parts + 1] = M.wrap(text, start_indent, indent, text_width) else for _, child in ipairs(node) do vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1)) end end elseif ntype == 'paragraph' then local pparts = {} for _, child in ipairs(node) do vim.list_extend(pparts, render_md(child, start_indent, indent, text_width, level + 1)) end parts[#parts + 1] = M.wrap(table.concat(pparts), start_indent, indent, text_width) parts[#parts + 1] = '\n' elseif ntype == 'code_fence_content' then local lines = vim.split(node.text:gsub('\n%s*$', ''), '\n') local cindent = indent + INDENTATION if level > 3 then -- The tree-sitter markdown parser doesn't parse the code blocks indents -- correctly in lists. Fudge it! lines[1] = ' ' .. lines[1] -- ¯\_(ツ)_/¯ cindent = indent - level local _, initial_indent = lines[1]:find('^%s*') initial_indent = initial_indent + cindent if initial_indent < indent then cindent = indent - INDENTATION end end for _, l in ipairs(lines) do if #l > 0 then parts[#parts + 1] = string.rep(' ', cindent) parts[#parts + 1] = l end parts[#parts + 1] = '\n' end elseif ntype == 'fenced_code_block' then parts[#parts + 1] = '>' for _, child in ipairs(node) do if child.type == 'info_string' then parts[#parts + 1] = child.text break end end parts[#parts + 1] = '\n' for _, child in ipairs(node) do if child.type ~= 'info_string' then vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1)) end end parts[#parts + 1] = '<\n' elseif ntype == 'html_block' then local text = node.text:gsub('^
help', '')
    text = text:gsub('
%s*$', '') parts[#parts + 1] = text elseif ntype == 'list_marker_dot' then parts[#parts + 1] = node.text elseif contains(ntype, { 'list_marker_minus', 'list_marker_star' }) then parts[#parts + 1] = '• ' elseif ntype == 'list_item' then parts[#parts + 1] = string.rep(' ', indent) local offset = node[1].type == 'list_marker_dot' and 3 or 2 for i, child in ipairs(node) do local sindent = i <= 2 and 0 or (indent + offset) vim.list_extend( parts, render_md(child, sindent, indent + offset, text_width, level + 1, true) ) end else if node.text then error(fmt('cannot render:\n%s', vim.inspect(node))) end for i, child in ipairs(node) do local start_indent0 = i == 1 and start_indent or indent vim.list_extend( parts, render_md(child, start_indent0, indent, text_width, level + 1, is_list) ) if ntype ~= 'list' and i ~= #node then if (node[i + 1] or {}).type ~= 'list' then parts[#parts + 1] = '\n' end end end end if add_tag then parts[#parts + 1] = '' end return parts end --- @param text_width integer local function align_tags(text_width) --- @param line string --- @return string return function(line) local tag_pat = '%s*(%*.+%*)%s*$' local tags = {} for m in line:gmatch(tag_pat) do table.insert(tags, m) end if #tags > 0 then line = line:gsub(tag_pat, '') local tags_str = ' ' .. table.concat(tags, ' ') --- @type integer local conceal_offset = select(2, tags_str:gsub('%*', '')) - 2 local pad = string.rep(' ', text_width - #line - #tags_str + conceal_offset) return line .. pad .. tags_str end return line end end --- @param text string --- @param start_indent integer --- @param indent integer --- @param is_list? boolean --- @return string function M.md_to_vimdoc(text, start_indent, indent, text_width, is_list) -- Add an extra newline so the parser can properly capture ending ``` local parsed = parse_md(text .. '\n') local ret = render_md(parsed, start_indent, indent, text_width, 0, is_list) local lines = vim.split(table.concat(ret):gsub(NBSP, ' '), '\n') lines = vim.tbl_map(align_tags(text_width), lines) local s = table.concat(lines, '\n') -- Reduce whitespace in code-blocks s = s:gsub('\n+%s*>([a-z]+)\n', ' >%1\n') s = s:gsub('\n+%s*>\n?\n', ' >\n') return s end return M