feat(treesitter): bundle markdown parser and queries (#22481)

* bundle split Markdown parser from https://github.com/MDeiml/tree-sitter-markdown
* add queries from https://github.com/nvim-treesitter/nvim-treesitter/tree/main
* upstream `#trim!` and `#inject-language!` directives

Co-authored-by: dundargoc <gocdundar@gmail.com>
This commit is contained in:
Christian Clason 2023-07-01 11:08:06 +02:00 committed by GitHub
parent 538b6c3853
commit 11844dde81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 325 additions and 3 deletions

View File

@ -37,3 +37,4 @@ endfunction()
foreach(lang c lua vim vimdoc query)
BuildTSParser(LANG ${lang})
endforeach()
BuildTSParser(LANG markdown CMAKE_FILE MarkdownParserCMakeLists.txt)

View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.10)
project(${PARSERLANG} C)
set(CMAKE_C_STANDARD 99)
add_library(markdown MODULE
tree-sitter-markdown/src/parser.c
tree-sitter-markdown/src/scanner.c)
target_include_directories(markdown
PRIVATE
tree-sitter-markdown/src)
add_library(markdown_inline MODULE
tree-sitter-markdown-inline/src/parser.c
tree-sitter-markdown-inline/src/scanner.c)
target_include_directories(markdown_inline
PRIVATE
tree-sitter-markdown-inline/src)
set_target_properties(
markdown markdown_inline
PROPERTIES
PREFIX ""
)
install(TARGETS markdown markdown_inline LIBRARY DESTINATION lib/nvim/parser)
# vim: set ft=cmake:

View File

@ -54,5 +54,7 @@ TREESITTER_VIMDOC_URL https://github.com/neovim/tree-sitter-vimdoc/archive/v2.0.
TREESITTER_VIMDOC_SHA256 61e165df29778dc0c9277c2a7bc67447cc4e1bed36ca916a2f476dd25ce3260e
TREESITTER_QUERY_URL https://github.com/nvim-treesitter/tree-sitter-query/archive/v0.1.0.tar.gz
TREESITTER_QUERY_SHA256 e2b806f80e8bf1c4f4e5a96248393fe6622fc1fc6189d6896d269658f67f914c
TREESITTER_MARKDOWN_URL https://github.com/MDeiml/tree-sitter-markdown/archive/936cc84289f6de83c263ae8e659fb342867ceb16.tar.gz
TREESITTER_MARKDOWN_SHA256 4f2315930dc2c1bd42971a0b728cf4dafc57830c61f8abe3e2548cf230968713
TREESITTER_URL https://github.com/tree-sitter/tree-sitter/archive/91e4d940169a0c0b024560632ef53c4f119117ca.tar.gz
TREESITTER_SHA256 e15e335d127d38aaa73e727f3169df6015f43de1010d806e69b9e9222ad50fe1

View File

@ -101,6 +101,8 @@ The following new APIs and features were added.
• Implemented LSP inlay hints: |vim.lsp.inlay_hint()|
https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_inlayHint
• Bundled Markdown parser for treesitter highlighting and folding.
==============================================================================
CHANGED FEATURES *news-changed*
@ -120,7 +122,8 @@ The following changes to existing APIs or features add new behavior.
• Automatic linting of treesitter query files (see |ft-query-plugin|).
Can be disabled via: >lua
vim.g.query_lint_on = {}
< • Enabled treesitter highlighting for treesitter query files.
<
• Enabled treesitter highlighting for treesitter query files.
• The `workspace/didChangeWatchedFiles` LSP client capability is now enabled
by default.

View File

@ -292,7 +292,39 @@ The following directives are built in:
Example: >query
((identifier) @constant (#offset! @constant 0 1 0 -1))
<
`gsub!` *treesitter-directive-gsub!*
Transforms the content of the node using a Lua pattern. This will set
a new `metadata[capture_id].text`.
Parameters: ~
{capture_id}
{pattern}
Example: >query
(#gsub! @_node ".*%.(.*)" "%1")
<
`trim!` *treesitter-directive-trim!*
Trim blank lines from the end of the node. This will set a new
`metadata[capture_id].range`.
Parameters: ~
{capture_id}
Example: >query
(#inject-language! @_lang)
<
`inject-language!` *treesitter-directive-inject-language!*
Set the injection language from the node text, interpreted first as a
language name, then (if a parser is not found) a filetype. Custom
aliases can be added via |vim.treesitter.language.register()|. This
will set a new `metadata[capture_id]['injection.language']`.
Parameters: ~
{capture_id}
Example: >query
(#inject-language! @_lang)
<
Further directives can be added via |vim.treesitter.query.add_directive()|.
Use |vim.treesitter.query.list_directives()| to list all available directives.

View File

@ -475,7 +475,6 @@ local directive_handlers = {
metadata[capture_id].range = range
end
end,
-- Transform the content of the node
-- Example: (#gsub! @_node ".*%.(.*)" "%1")
['gsub!'] = function(match, _, bufnr, pred, metadata)
@ -497,6 +496,65 @@ local directive_handlers = {
metadata[id].text = text:gsub(pattern, replacement)
end,
-- Trim blank lines from end of the node
-- Example: (#trim! @fold)
-- TODO(clason): generalize to arbitrary whitespace removal
['trim!'] = function(match, _, bufnr, pred, metadata)
local node = match[pred[2]]
if not node then
return
end
local start_row, start_col, end_row, end_col = node:range()
-- Don't trim if region ends in middle of a line
if end_col ~= 0 then
return
end
while true do
-- As we only care when end_col == 0, always inspect one line above end_row.
local end_line = vim.api.nvim_buf_get_lines(bufnr, end_row - 1, end_row, true)[1]
if end_line ~= '' then
break
end
end_row = end_row - 1
end
-- If this produces an invalid range, we just skip it.
if start_row < end_row or (start_row == end_row and start_col <= end_col) then
metadata.range = { start_row, start_col, end_row, end_col }
end
end,
-- Set injection language from node text, interpreted first as language and then as filetype
-- Example: (#inject-language! @_lang)
['inject-language!'] = function(match, _, bufnr, pred, metadata)
local id = pred[2]
local node = match[id]
if not node then
return
end
-- TODO(clason): replace by refactored `ts.has_parser` API
local has_parser = function(lang)
return vim._ts_has_language(lang)
or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0
end
local alias = vim.treesitter.get_node_text(node, bufnr, { metadata = metadata[id] })
if not alias then
return
elseif has_parser(alias) then
metadata['injection.language'] = alias
else
local lang = vim.treesitter.language.get_lang(alias)
if lang and has_parser(lang) then
metadata['injection.language'] = lang
end
end
end,
}
--- Adds a new predicate to be used in queries

View File

@ -0,0 +1,9 @@
(
[
(fenced_code_block)
(indented_code_block)
(list)
(section)
] @fold
(#trim! @fold)
)

View File

@ -0,0 +1,63 @@
;From MDeiml/tree-sitter-markdown & Helix
(setext_heading (paragraph) @text.title.1 (setext_h1_underline) @text.title.1.marker)
(setext_heading (paragraph) @text.title.2 (setext_h2_underline) @text.title.2.marker)
(atx_heading (atx_h1_marker) @text.title.1.marker (inline) @text.title.1)
(atx_heading (atx_h2_marker) @text.title.2.marker (inline) @text.title.2)
(atx_heading (atx_h3_marker) @text.title.3.marker (inline) @text.title.3)
(atx_heading (atx_h4_marker) @text.title.4.marker (inline) @text.title.4)
(atx_heading (atx_h5_marker) @text.title.5.marker (inline) @text.title.5)
(atx_heading (atx_h6_marker) @text.title.6.marker (inline) @text.title.6)
(link_title) @text.literal
(indented_code_block) @text.literal.block
((fenced_code_block) @text.literal.block (#set! "priority" 90))
(info_string) @label
(pipe_table_header (pipe_table_cell) @text.title)
(pipe_table_header "|" @punctuation.special)
(pipe_table_row "|" @punctuation.special)
(pipe_table_delimiter_row "|" @punctuation.special)
(pipe_table_delimiter_cell) @punctuation.special
[
(fenced_code_block_delimiter)
] @punctuation.delimiter
(code_fence_content) @none
[
(link_destination)
] @text.uri
[
(link_label)
] @text.reference
[
(list_marker_plus)
(list_marker_minus)
(list_marker_star)
(list_marker_dot)
(list_marker_parenthesis)
(thematic_break)
] @punctuation.special
(task_list_marker_unchecked) @text.todo.unchecked
(task_list_marker_checked) @text.todo.checked
(block_quote) @text.quote
[
(block_continuation)
(block_quote_marker)
] @punctuation.special
[
(backslash_escape)
] @string.escape
(inline) @spell

View File

@ -0,0 +1,26 @@
(fenced_code_block
(info_string
(language) @_lang)
(code_fence_content) @injection.content
(#inject-language! @_lang))
((html_block) @injection.content
(#set! injection.language "html")
(#set! injection.combined)
(#set! injection.include-children))
((minus_metadata) @injection.content
(#set! injection.language "yaml")
(#offset! @injection.content 1 0 -1 0)
(#set! injection.include-children))
((plus_metadata) @injection.content
(#set! injection.language "toml")
(#offset! @injection.content 1 0 -1 0)
(#set! injection.include-children))
([
(inline)
(pipe_table_cell)
] @injection.content
(#set! injection.language "markdown_inline"))

View File

@ -0,0 +1,92 @@
;; From MDeiml/tree-sitter-markdown
[
(code_span)
(link_title)
] @text.literal @nospell
[
(emphasis_delimiter)
(code_span_delimiter)
] @punctuation.delimiter
(emphasis) @text.emphasis
(strong_emphasis) @text.strong
(strikethrough) @text.strike
[
(link_destination)
(uri_autolink)
] @text.uri @nospell
[
(link_label)
(link_text)
(image_description)
] @text.reference
[
(backslash_escape)
(hard_line_break)
] @string.escape
(image "!" @punctuation.special)
(image ["[" "]" "(" ")"] @punctuation.bracket)
(inline_link ["[" "]" "(" ")"] @punctuation.bracket)
(shortcut_link ["[" "]"] @punctuation.bracket)
; Conceal codeblock and text style markers
([
(code_span_delimiter)
(emphasis_delimiter)
] @conceal
(#set! conceal ""))
; Conceal inline links
(inline_link
[
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))
; Conceal image links
(image
[
"!"
"["
"]"
"("
(link_destination)
")"
] @conceal
(#set! conceal ""))
; Conceal full reference links
(full_reference_link
[
"["
"]"
(link_label)
] @conceal
(#set! conceal ""))
; Conceal collapsed reference links
(collapsed_reference_link
[
"["
"]"
] @conceal
(#set! conceal ""))
; Conceal shortcut links
(shortcut_link
[
"["
"]"
] @conceal
(#set! conceal ""))

View File

@ -0,0 +1,8 @@
((html_tag) @injection.content
(#set! injection.language "html")
(#set! injection.combined)
(#set! injection.include-children))
((latex_block) @injection.content
(#set! injection.language "latex")
(#set! injection.include-children))

View File

@ -783,7 +783,7 @@ int x = INT_MAX;
return list
]]
eq({ 'gsub!', 'offset!', 'set!' }, res_list)
eq({ 'gsub!', 'inject-language!', 'offset!', 'set!', 'trim!' }, res_list)
end)
end)
end)