This commit is contained in:
Jaehwang Jung 2024-09-16 13:12:41 +02:00 committed by GitHub
commit b0fae12c71
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 586 additions and 159 deletions

View File

@ -87,6 +87,9 @@ TREESITTER
capture IDs to a list of nodes that need to be iterated over. For
backwards compatibility, an option `all=false` (only return the last
matching node) is provided that will be removed in a future release.
• |LanguageTree:trees()| no longer guarantees that the returned table is
list-like even after a full parse. Always use |next()| or |pairs()| to
access it.
TUI
@ -186,6 +189,9 @@ TREESITTER
• |LanguageTree:node_for_range()| gets anonymous and named nodes for a range
• |vim.treesitter.get_node()| now takes an option `include_anonymous`, default
false, which allows it to return anonymous nodes as well as named nodes.
• |LanguageTree:parse()| runs injection query only on the provided list of
ranges as long as the language does not have a combined injection,
significantly improving |treesitter-highlight| performance.
TUI

View File

@ -1343,7 +1343,7 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
Should only be called when the tracked state of the LanguageTree is not
valid against the parse tree in treesitter. Doesn't clear filesystem
cache. Called often, so needs to be fast.
cache.
Parameters: ~
• {reload} (`boolean?`)
@ -1351,7 +1351,8 @@ LanguageTree:invalidate({reload}) *LanguageTree:invalidate()*
LanguageTree:is_valid({exclude_children}) *LanguageTree:is_valid()*
Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()|
reflects the latest state of the source. If invalid, user should call
|LanguageTree:parse()|.
|LanguageTree:parse()|. `is_valid(false)` can be slow because it runs
injection on the full source.
Parameters: ~
• {exclude_children} (`boolean?`) whether to ignore the validity of
@ -1411,7 +1412,7 @@ LanguageTree:node_for_range({range}, {opts})
Return: ~
(`TSNode?`)
LanguageTree:parse({range}) *LanguageTree:parse()*
LanguageTree:parse({ranges}) *LanguageTree:parse()*
Recursively parse all regions in the language tree using
|treesitter-parsers| for the corresponding languages and run injection
queries on the parsed trees to determine whether child trees should be
@ -1422,11 +1423,11 @@ LanguageTree:parse({range}) *LanguageTree:parse()*
if {range} is `true`).
Parameters: ~
• {range} (`boolean|Range?`) Parse this range in the parser's source.
Set to `true` to run a complete parse of the source (Note:
Can be slow!) Set to `false|nil` to only parse regions with
empty ranges (typically only the root tree without
injections).
• {ranges} (`boolean|Range|(Range)[]?`) Parse this range(s) in the
parser's source. Set to `true` to run a complete parse of
the source (Note: Can be slow!) Set to `false|nil` to only
parse regions with empty ranges (typically only the root
tree without injections).
Return: ~
(`table<integer, TSTree>`)
@ -1478,10 +1479,7 @@ LanguageTree:tree_for_range({range}, {opts})
LanguageTree:trees() *LanguageTree:trees()*
Returns all trees of the regions parsed by this parser. Does not include
child languages. The result is list-like if
• this LanguageTree is the root, in which case the result is empty or a
singleton list; or
• the root LanguageTree is fully parsed.
child languages.
Return: ~
(`table<integer, TSTree>`)

View File

@ -555,7 +555,8 @@ local function update_editor_highlights(query_win, base_win, lang)
-- Remove the '@' from the cursor word
cursor_word = cursor_word:sub(2)
local topline, botline = vim.fn.line('w0', base_win), vim.fn.line('w$', base_win)
for id, node in query:iter_captures(parser:trees()[1]:root(), base_buf, topline - 1, botline) do
local _, tree = next(parser:trees())
for id, node in query:iter_captures(tree:root(), base_buf, topline - 1, botline) do
local capture_name = query.captures[id]
if capture_name == cursor_word then
local lnum, col, end_lnum, end_col = node:range()

View File

@ -391,6 +391,23 @@ function TSHighlighter._on_spell_nav(_, _, buf, srow, _, erow, _)
self._highlight_states = highlight_states
end
function TSHighlighter._on_start()
local buf_ranges = {} ---@type table<integer, (Range)[]>
for _, win in ipairs(api.nvim_tabpage_list_wins(0)) do
local buf = api.nvim_win_get_buf(win)
if TSHighlighter.active[buf] then
if not buf_ranges[buf] then
buf_ranges[buf] = {}
end
local topline, botline = vim.fn.line('w0', win) - 1, vim.fn.line('w$', win)
table.insert(buf_ranges[buf], { topline, botline })
end
end
for buf, ranges in pairs(buf_ranges) do
TSHighlighter.active[buf].tree:parse(ranges)
end
end
---@private
---@param _win integer
---@param buf integer
@ -401,13 +418,13 @@ function TSHighlighter._on_win(_, _win, buf, topline, botline)
if not self then
return false
end
self.tree:parse({ topline, botline + 1 })
self:prepare_highlight_states(topline, botline + 1)
self.redraw_count = self.redraw_count + 1
return true
end
api.nvim_set_decoration_provider(ns, {
on_start = TSHighlighter._on_start,
on_win = TSHighlighter._on_win,
on_line = TSHighlighter._on_line,
_on_spell_nav = TSHighlighter._on_spell_nav,

View File

@ -72,19 +72,35 @@ local TSCallbackNames = {
---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages
---@field private _injection_query vim.treesitter.Query Queries defining injected languages
---@field private _injection_query vim.treesitter.Query? Queries defining injected languages
---
---If `is_valid(true) and _injections_processed`, the set of children parsers and their sets of
---regions are complete wrt. the full source, so that it's not necessary to execute injections.
---@field private _injections_processed boolean
---
---@field private _opts table Options
---@field private _parser TSParser Parser for language
---@field private _has_regions boolean
---@field private _regions table<integer, Range6[]>?
---
---List of regions this tree should manage and parse. If nil then regions are
---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _regions table<integer, Range6[]>?
---
---Inverse region table, i.e., a (chaining) hash table from regions to their index in `_region`.
---Used for checking if an added region is already managed by this parser, so that it can reuse
---the existing tree for incremental parsing.
---The hash function is simply `region[1][3]` (the start byte of its first range).
---Each bucket has the shape of { region1, index of region1, region2, index of region2, ... }.
---@field private _regions_inv table<integer, (Range6[]|integer)[]>?
---
---@field private _lang string Language name
---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
---@field private _source (integer|string) Buffer or string to parse
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---
---Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid.
---@field private _trees table<integer, TSTree>
---
---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
---@field private _logger? fun(logtype: string, msg: string)
---@field private _logfile? file*
@ -228,7 +244,7 @@ end
--- Invalidates this parser and its children.
---
--- Should only be called when the tracked state of the LanguageTree is not valid against the parse
--- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast.
--- tree in treesitter. Doesn't clear filesystem cache.
---@param reload boolean|nil
function LanguageTree:invalidate(reload)
self._valid = false
@ -248,9 +264,6 @@ end
--- Returns all trees of the regions parsed by this parser.
--- Does not include child languages.
--- The result is list-like if
--- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
--- * the root LanguageTree is fully parsed.
---
---@return table<integer, TSTree>
function LanguageTree:trees()
@ -264,6 +277,7 @@ end
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
--- `is_valid(false)` can be slow because it runs injection on the full source.
---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
---@return boolean
function LanguageTree:is_valid(exclude_children)
@ -278,8 +292,11 @@ function LanguageTree:is_valid(exclude_children)
end
if not exclude_children then
-- Run full injection to check if the current set of children and their regions are complete.
-- Note that `set_included_regions` marks new regions invalid.
if not self._injections_processed then
return false
self:_add_injections(true)
self._injections_processed = true
end
for _, child in pairs(self._children) do
@ -308,24 +325,26 @@ function LanguageTree:source()
end
--- @param region Range6[]
--- @param range? boolean|Range
--- @param ranges? boolean|(Range)[]
--- @return boolean
local function intercepts_region(region, range)
local function intercepts_region(region, ranges)
if #region == 0 then
return true
end
if range == nil then
if ranges == nil then
return false
end
if type(range) == 'boolean' then
return range
if type(ranges) == 'boolean' then
return ranges
end
for _, r in ipairs(region) do
if Range.intercepts(r, range) then
return true
for _, r1 in ipairs(region) do
for _, r2 in ipairs(ranges) do
if Range.intercepts(r1, r2) then
return true
end
end
end
@ -333,11 +352,11 @@ local function intercepts_region(region, range)
end
--- @private
--- @param range boolean|Range?
--- @param ranges boolean|(Range)[]?
--- @return Range6[] changes
--- @return integer no_regions_parsed
--- @return number total_parse_time
function LanguageTree:_parse_regions(range)
function LanguageTree:_parse_regions(ranges)
local changes = {}
local no_regions_parsed = 0
local total_parse_time = 0
@ -346,17 +365,17 @@ function LanguageTree:_parse_regions(range)
self._valid = {}
end
-- If there are no ranges, set to an empty list
-- If there is no region, set to an empty list
-- so the included ranges in the parser are cleared.
for i, ranges in pairs(self:included_regions()) do
for i, region in pairs(self:included_regions()) do
if
not self._valid[i]
and (
intercepts_region(ranges, range)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
intercepts_region(region, ranges)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), ranges))
)
then
self._parser:set_included_ranges(ranges)
self._parser:set_included_ranges(region)
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
@ -377,11 +396,12 @@ function LanguageTree:_parse_regions(range)
end
--- @private
--- @param ranges boolean|(Range)[]|nil
--- @return number
function LanguageTree:_add_injections()
function LanguageTree:_add_injections(ranges)
local seen_langs = {} ---@type table<string,boolean>
local query_time, injections_by_lang = tcall(self._get_injections, self)
local query_time, injections_by_lang = tcall(self._get_injections, self, ranges)
for lang, injection_regions in pairs(injections_by_lang) do
local has_lang = pcall(language.add, lang)
@ -409,6 +429,14 @@ function LanguageTree:_add_injections()
return query_time
end
---@param region (Range)[]
---@return Range4
local function region_range(region)
local srow, scol, _, _ = Range.unpack4(region[1])
local _, _, erow, ecol = Range.unpack4(region[#region])
return { srow, scol, erow, ecol }
end
--- Recursively parse all regions in the language tree using |treesitter-parsers|
--- for the corresponding languages and run injection queries on the parsed trees
--- to determine whether child trees should be created and parsed.
@ -416,16 +444,16 @@ end
--- Any region with empty range (`{}`, typically only the root tree) is always parsed;
--- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
---
--- @param range boolean|Range|nil: Parse this range in the parser's source.
--- @param ranges boolean|Range|(Range)[]|nil: Parse this range(s) in the parser's source.
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections).
--- @return table<integer, TSTree>
function LanguageTree:parse(range)
if self:is_valid() then
self:_log('valid')
return self._trees
function LanguageTree:parse(ranges)
if type(ranges) == 'table' and #ranges > 0 and type(ranges[1]) == 'number' then
ranges = { ranges }
end
---@cast ranges boolean|(Range)[]|nil
local changes --- @type Range6[]?
@ -436,16 +464,26 @@ function LanguageTree:parse(range)
-- At least 1 region is invalid
if not self:is_valid(true) then
changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
changes, no_regions_parsed, total_parse_time = self:_parse_regions(ranges)
-- Need to run injections when we parsed something
if no_regions_parsed > 0 then
self._injections_processed = false
end
end
if not self._injections_processed and range ~= false and range ~= nil then
query_time = self:_add_injections()
self._injections_processed = true
-- NOTE: Trade-off in partial injection query execution
-- * The good: Each `parse()` is faster.
-- * The bad: `is_valid(false)` is more expensive, requiring a full injection query execution. To
-- avoid this cost, each `parse()` always runs partial injection. However, this is not a big
-- problem as partial injection is very cheap even on huge files.
-- * A potential optimization: Track the ranges where the set of injected regions are known to be
-- complete and valid, and run the injection query only on the intersection of requested ranges
-- and the invalid ranges. This would be even more beneficial for combined injection.
if self._injection_query and not self._injections_processed and ranges then
query_time = self:_add_injections(ranges)
if ranges == true or self._injection_query.has_combined_injection then
self._injections_processed = true
end
end
self:_log({
@ -453,11 +491,11 @@ function LanguageTree:parse(range)
regions_parsed = no_regions_parsed,
parse_time = total_parse_time,
query_time = query_time,
range = range,
ranges = ranges,
})
for _, child in pairs(self._children) do
child:parse(range)
child:parse(ranges)
end
return self._trees
@ -585,6 +623,125 @@ function LanguageTree:_iter_regions(fn)
end
end
---Add a region to the inverse region table.
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param i integer
---@param region Range6[]
local function regions_inv_insert(regions_inv, i, region)
local start_byte = region[1][3]
local bucket = regions_inv[start_byte]
if not bucket then
regions_inv[start_byte] = { region, i }
else
table.insert(bucket, region)
table.insert(bucket, i)
end
end
---Remove a region from the inverse region table.
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param region Range6[]
local function regions_inv_remove(regions_inv, region)
local start_byte = region[1][3]
local bucket = assert(regions_inv[start_byte])
for e = 1, #bucket, 2 do
if vim.deep_equal(bucket[e], region) then
table.remove(bucket, e + 1)
table.remove(bucket, e)
if #bucket == 0 then
regions_inv[start_byte] = nil
end
return
end
end
error('region not found')
end
---Whether two region values are approximately equal. Should be implied by equality.
---See the comment in `set_included_regions` on why we use similarity.
---For now it simply compares the last bytes of the first and the last regions.
---@param region1 Range6[]
---@param region2 Range6[]
---@return boolean
local function region_similar(region1, region2)
return region1[1][6] == region2[1][6] or region1[#region1][6] == region2[#region2][6]
end
---Find the given region from the inverse region table.
---If there is no exact match, find an approximately matching region.
---@param regions_inv table<integer, (Range6[]|integer)[]>
---@param region Range6[]
---@return integer?
---@return boolean? exact
local function regions_inv_lookup(regions_inv, region)
local bucket = regions_inv[region[1][3]]
if not bucket then
return
end
local i ---@type integer?
for e = 1, #bucket, 2 do
local old_region = bucket[e] --[[@as Range6[] ]]
if region_similar(old_region, region) then
i = bucket[e + 1] --[[@as integer]]
if vim.deep_equal(old_region, region) then
return i, true
end
end
end
return i, false
end
---@param i integer
function LanguageTree:_invalidate_region(i)
if self._valid == true then
self._valid = {}
for j, _ in pairs(self._regions) do
self._valid[j] = true
end
self._valid[i] = false
elseif type(self._valid) == 'table' then
self._valid[i] = false
end
end
---@param i integer
function LanguageTree:_discard_region(i)
if not self._has_regions then
return
end
if self._regions then
regions_inv_remove(self._regions_inv, self._regions[i])
self._regions[i] = nil
end
if self._trees[i] then
local region = self._trees[i]:included_ranges(true)
self:_log(function()
return 'discarding region', i, region_tostr(region)
end)
self:_do_callback('changedtree', region, self._trees[i])
local discarded_range = region_range(region)
self._trees[i] = nil
-- Discard children's regions that are included in the discarded region. This is necessary
-- because changes that only remove trees in this parser keep the children parsers untouched.
for _, child in pairs(self._children) do
for child_i, child_region in pairs(child:included_regions()) do
if Range.contains(discarded_range, region_range(child_region)) then
child:_discard_region(child_i)
end
end
end
end
-- If it's boolean (fully valid/invalid), deleting a region doesn't change its value.
if type(self._valid) == 'table' then
self._valid[i] = nil
end
end
--- Sets the included regions that should be parsed by this |LanguageTree|.
--- A region is a set of nodes and/or ranges that will be parsed in the same context.
---
@ -604,7 +761,23 @@ end
function LanguageTree:set_included_regions(new_regions)
self._has_regions = true
-- Transform the tables from 4 element long to 6 element long (with byte offset)
-- Refresh self._regions and self._regions_inv
self:included_regions()
local touched = {} ---@type table<integer, true>
-- Check if the parser already has each region so that they can be parsed incrementally from an
-- existing tree. We find the existing regions by "similarity" instead of the exact equality,
-- because the values of an existing region and the matching region in `new_regions` may not be
-- equal, in which case the existing tree can't be reused.
--
-- Inequality of matching regions happens because `_edit` does not accurately track changes in the
-- existing regions. One (probably the only?) case is when a multi-range region created from a
-- non-`include-children` injection or a combined injection is edited in a way that adds a range
-- to the region, e.g., when adding a line in markdown fenced code block (with language).
--
-- Matching the regions doesn't need to precise: the consequence of false match and false
-- non-match is just a minor loss in efficiency due to reparsing a region from scratch.
for _, region in ipairs(new_regions) do
for i, range in ipairs(region) do
if type(range) == 'table' and #range == 4 then
@ -613,26 +786,50 @@ function LanguageTree:set_included_regions(new_regions)
region[i] = { range:range(true) }
end
end
end
---@cast region Range6[]
-- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
-- new_regions is different from included_regions, then outdated regions in included_regions are
-- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
-- outdated_regions is invalidated by _iter_regions in else branch.
if #self:included_regions() ~= #new_regions then
-- TODO(lewis6991): inefficient; invalidate trees incrementally
for _, t in pairs(self._trees) do
self:_do_callback('changedtree', t:included_ranges(true), t)
local i, exact = regions_inv_lookup(self._regions_inv, region)
if not exact then
if i then
self:_log(function()
return 'invalidating inexactly matched region', i, region_tostr(self._regions[i])
end)
regions_inv_remove(self._regions_inv, self._regions[i])
else
i = #self._regions + 1 -- this always gives an unoccupied index even if there are holes
end
self._regions[i] = region
regions_inv_insert(self._regions_inv, i, region)
self:_invalidate_region(i)
end
self._trees = {}
self:invalidate()
else
self:_iter_regions(function(i, region)
return vim.deep_equal(new_regions[i], region)
end)
---@cast i integer
touched[i] = true
end
self._regions = new_regions
-- Discard stale regions.
for i, _ in pairs(self._regions) do
if not touched[i] then
self:_discard_region(i)
end
end
end
--- @param region Range6[]
local function prune_empty_ranges(region)
local cur = 1
for i, range in ipairs(region) do
if range[3] ~= range[6] then
if cur < i then
region[cur] = range
end
cur = cur + 1
end
end
for i = #region, cur, -1 do
region[i] = nil
end
end
---Gets the set of included regions managed by this LanguageTree. This can be different from the
@ -651,12 +848,27 @@ function LanguageTree:included_regions()
return { {} }
end
local regions = {} ---@type Range6[][]
local regions = {} ---@type table<integer, Range6[]>
local regions_inv = {} ---@type table<integer, (Range6[]|integer)[]>
for i, _ in pairs(self._trees) do
regions[i] = self._trees[i]:included_ranges(true)
local region = self._trees[i]:included_ranges(true)
-- If user deletes a range in a region, `tree:edit()` leaves an empty range instead of deleting
-- it. This could be a bug in treesitter.
prune_empty_ranges(region)
if #region > 0 then
regions[i] = region
regions_inv_insert(regions_inv, i, region)
else
self._trees[i] = nil
-- If it's boolean (fully valid/invalid), deleting a region doesn't change its value.
if type(self._valid) == 'table' then
self._valid[i] = nil
end
end
end
self._regions = regions
self._regions_inv = regions_inv
return regions
end
@ -816,30 +1028,59 @@ end
---
--- This is where most of the injection processing occurs.
---
--- TODO: Allow for an offset predicate to tailor the injection range
--- instead of using the entire nodes range.
--- @param ranges boolean|(Range)[]|nil
--- @private
--- @return table<string, Range6[][]>
function LanguageTree:_get_injections()
if not self._injection_query then
function LanguageTree:_get_injections(ranges)
if not self._injection_query or not ranges then
return {}
end
---@type table<integer,vim.treesitter.languagetree.Injection>
local injections = {}
-- Combined injection must be run on the full source, and currently there is no simply way to
-- selectively match each pattern separately.
if ranges == true or self._injection_query.has_combined_injection then
ranges = { true } ---@diagnostic disable-line: assign-type-mismatch
else
for i, range in ipairs(ranges) do
local sline, _, eline, _ = Range.unpack4(range)
ranges[i] = { sline, eline }
end
end
---@cast ranges (true|Range2)[]
for index, tree in pairs(self._trees) do
local root_node = tree:root()
local start_line, _, end_line, _ = root_node:range()
local start_line, _, end_line, end_col = root_node:range()
if end_col > 0 then
end_line = end_line + 1
end
for pattern, match, metadata in
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
do
local lang, combined, ranges = self:_get_injection(match, metadata)
if lang then
add_injection(injections, index, pattern, lang, combined, ranges)
else
self:_log('match from injection query failed for pattern', pattern)
for _, range in ipairs(ranges) do
local start_line_in_range, end_line_in_range = start_line, end_line
if range ~= true then
start_line_in_range = math.max(start_line, range[1])
end_line_in_range = math.min(end_line, range[2])
end
-- Duplicates from overlapping ranges are handled by `set_included_ranges`.
if start_line_in_range < end_line_in_range then
for pattern, match, metadata in
self._injection_query:iter_matches(
root_node,
self._source,
start_line_in_range,
end_line_in_range
)
do
local lang, combined, inj_ranges = self:_get_injection(match, metadata)
if lang then
add_injection(injections, index, pattern, lang, combined, inj_ranges)
else
self:_log('match from injection query failed for pattern', pattern)
end
end
end
end
end
@ -859,8 +1100,8 @@ function LanguageTree:_get_injections()
if entry.combined then
table.insert(result[lang], combine_regions(entry.regions))
else
for _, ranges in pairs(entry.regions) do
table.insert(result[lang], ranges)
for _, inj_ranges in pairs(entry.regions) do
table.insert(result[lang], inj_ranges)
end
end
end
@ -908,6 +1149,7 @@ function LanguageTree:_edit(
end
self._regions = nil
self._regions_inv = nil
local changed_range = {
start_row,
@ -1071,14 +1313,7 @@ end
---@param range Range
---@return boolean
local function tree_contains(tree, range)
local tree_ranges = tree:included_ranges(false)
return Range.contains({
tree_ranges[1][1],
tree_ranges[1][2],
tree_ranges[#tree_ranges][3],
tree_ranges[#tree_ranges][4],
}, range)
return Range.contains(region_range(tree:included_ranges(false)), range)
end
--- Determines whether {range} is contained in the |LanguageTree|.

View File

@ -11,6 +11,7 @@ local M = {}
---@field lang string name of the language for this parser
---@field captures string[] list of (unique) capture names defined in query
---@field info vim.treesitter.QueryInfo contains information used in the query (e.g. captures, predicates, directives)
---@field has_combined_injection true? whether this query has a combined injection pattern
---@field query TSQuery userdata query object
local Query = {}
Query.__index = Query
@ -30,6 +31,18 @@ function Query.new(lang, ts_query)
patterns = query_info.patterns,
}
self.captures = self.info.captures
for _, preds in pairs(self.info.patterns) do
if
vim.tbl_contains(preds, function(pred)
return vim.deep_equal(pred, { 'set!', 'injection.combined' })
end, { predicate = true })
then
self.has_combined_injection = true
break
end
end
return self
end

View File

@ -802,7 +802,7 @@ local function validate_one(fname, parser_path)
parse_errors = {},
}
local lang_tree, buf = parse_buf(fname, parser_path)
for _, tree in ipairs(lang_tree:trees()) do
for _, tree in pairs(lang_tree:trees()) do
visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname }, stats)
end
lang_tree:destroy()
@ -909,7 +909,7 @@ local function gen_one(fname, to_fname, old, commit, parser_path)
]]
local main = ''
for _, tree in ipairs(lang_tree:trees()) do
for _, tree in pairs(lang_tree:trees()) do
main = main
.. (
visit_node(

View File

@ -251,20 +251,25 @@ end]]
local root = _G.parser:parse()[1]:root()
_G.parser:set_included_regions({ { root:child(0) } })
_G.parser:invalidate()
return { _G.parser:parse(true)[1]:root():range() }
local _, tree = next(_G.parser:parse(true))
return { tree:root():range() }
end)
eq({ 0, 0, 18, 1 }, res2)
eq({ { { 0, 0, 0, 18, 1, 512 } } }, exec_lua [[ return parser:included_regions() ]])
eq(
{ { { 0, 0, 0, 18, 1, 512 } } },
exec_lua [[return vim.tbl_values(_G.parser:included_regions())]]
)
local range_tbl = exec_lua(function()
_G.parser:set_included_regions { { { 0, 0, 17, 1 } } }
_G.parser:parse()
return _G.parser:included_regions()
end)
eq({ { { 0, 0, 0, 17, 1, 508 } } }, range_tbl)
eq(
{ { { 0, 0, 0, 17, 1, 508 } } },
exec_lua(function()
_G.parser:set_included_regions { { { 0, 0, 17, 1 } } }
_G.parser:parse()
return vim.tbl_values(_G.parser:included_regions())
end)
)
end)
it('allows to set complex ranges', function()
@ -281,7 +286,8 @@ end]]
parser:set_included_regions({ nodes })
local root = parser:parse(true)[1]:root()
local _, tree = next(parser:parse(true))
local root = tree:root()
local res = {}
for i = 0, (root:named_child_count() - 1) do
@ -824,27 +830,140 @@ print()
1,
exec_lua(function()
_G.parser:parse({ 0, 2 })
return #_G.parser:children().lua:trees()
return vim.tbl_count(_G.parser:children().lua:trees())
end)
)
-- Regions outside the given range are discarded.
eq(
1,
exec_lua(function()
_G.parser:parse({ 2, 6 })
return vim.tbl_count(_G.parser:children().lua:trees())
end)
)
eq(
2,
exec_lua(function()
_G.parser:parse({ 2, 6 })
return #_G.parser:children().lua:trees()
end)
exec_lua [[
parser:invalidate()
parser:parse({{0, 2}, {2,6}})
return vim.tbl_count(parser:children().lua:trees())
]]
)
eq(
2,
exec_lua [[
parser:parse({{0, 5}, {2, 6}})
return vim.tbl_count(parser:children().lua:trees())
]]
)
eq(
7,
exec_lua(function()
_G.parser:parse(true)
return #_G.parser:children().lua:trees()
return vim.tbl_count(_G.parser:children().lua:trees())
end)
)
end)
it('reuses similar existing regions', function()
insert(dedent [[
* line1
line2]])
exec_lua(function()
_G.parser = vim.treesitter.get_parser(0, 'markdown', {
injections = {
markdown = '((inline) @injection.content (#set! injection.language "markdown_inline"))',
},
})
end)
local function get_regions()
return exec_lua(function()
_G.parser:parse(true)
local result = {}
for i, tree in pairs(_G.parser:children().markdown_inline:trees()) do
result[i] = tree:included_ranges()
end
return result
end)
end
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
feed('2ggyyp')
-- region index does not change
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 2, 0 }, { 2, 2, 2, 7 } },
}, get_regions())
feed('2ggdd')
eq({
[1] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
feed('ggyGP')
-- the old region moves while maintaining its index
eq({
[1] = { { 2, 2, 3, 0 }, { 3, 2, 3, 7 } },
[2] = { { 0, 2, 1, 0 }, { 1, 2, 1, 7 } },
}, get_regions())
end)
it("recursively discards children's regions contained in a parent's discarded region", function()
insert(dedent [[
`return`
```
line 4
```
line 6 `return`
```]])
exec_lua(function()
_G.parser = vim.treesitter.get_parser(0, 'markdown', {
injections = {
-- inject code span to lua
markdown_inline = '((code_span) @injection.content (#offset! @injection.content 0 1 0 -1) (#set! injection.language "lua"))',
},
})
end)
local function get_regions()
return exec_lua(function()
_G.parser:parse(true)
local result = {}
for i, tree in pairs(_G.parser:children().markdown_inline:children().lua:trees()) do
result[i] = tree:included_ranges()
end
return result
end)
end
-- Initially, "line 4" is in the fenced code block, and "line 6 `return`" is a normal paragraph
-- with a inline code span.
eq({
[1] = { { 0, 1, 0, 7 } },
[2] = { { 5, 8, 5, 14 } },
}, get_regions())
-- Extend the code block to "line 6 `return`". Note that the only effect to markdown_inline
-- parser is removing a region, so it does not parse anything in markdown_inline parser.
feed('5ggD')
-- Despite not parsing at the parent (markdown_inline) parser, the regions in children (lua)
-- parser that are included in the parent's removed region should be removed as well.
-- The "`return`" at the first line is just for preventing the lua parser from being removed.
eq({
[1] = { { 0, 1, 0, 7 } },
}, get_regions())
end)
describe('languagetree is_valid()', function()
before_each(function()
insert(dedent [[
@ -855,10 +974,8 @@ print()
]])
feed(':set ft=help<cr>')
exec_lua(function()
vim.treesitter.get_parser(0, 'vimdoc', {
_G.parser = vim.treesitter.get_parser(0, 'vimdoc', {
injections = {
vimdoc = '((codeblock (language) @injection.language (code) @injection.content) (#set! injection.include-children))',
},
@ -866,21 +983,22 @@ print()
end)
end)
it('is valid excluding, invalid including children initially', function()
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
local function get_regions()
return exec_lua(function()
if not _G.parser:children().lua then
return nil
end
local result = {}
for i, tree in pairs(_G.parser:children().lua:trees()) do
result[i] = tree:included_ranges()
end
return result
end)
end
it('is fully valid after a full parse', function()
exec_lua('vim.treesitter.get_parser():parse(true)')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is fully valid after a parsing a range on parsed tree', function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
it('is valid including children since it does not have one', function()
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
describe('when adding content with injections', function()
@ -895,36 +1013,36 @@ print()
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('vim.treesitter.get_parser():parse()')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it(
'is fully valid after a range parse that leads to parsing not parsed injections',
function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end
)
it(
'is valid excluding, invalid including children after a range parse that does not lead to parsing not parsed injections',
function()
exec_lua('vim.treesitter.get_parser():parse({2, 4})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse({2, 4})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end
)
end)
describe('when removing content with injections', function()
describe('when removing an injection region', function()
before_each(function()
feed('G')
insert(dedent [[
@ -933,41 +1051,80 @@ print()
<
>lua
local a = {}
local b = {}
<
]])
exec_lua('vim.treesitter.get_parser():parse(true)')
exec_lua('parser:parse(true)')
eq({ [1] = { { 6, 0, 7, 0 } }, [2] = { { 10, 0, 11, 0 } } }, get_regions())
feed('Gd3k')
-- the empty region is pruned
eq({ [1] = { { 6, 0, 7, 0 } } }, get_regions())
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('vim.treesitter.get_parser():parse()')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
end)
it('is fully valid after a range parse that leads to parsing modified child tree', function()
exec_lua('vim.treesitter.get_parser():parse({5, 7})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid()'))
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it(
'is valid excluding, invalid including children after a range parse that does not lead to parsing modified child tree',
'is fully valid after a rangeless parse, since the only change to the children was removing a region',
function()
exec_lua('vim.treesitter.get_parser():parse({2, 4})')
eq(true, exec_lua('return vim.treesitter.get_parser():is_valid(true)'))
eq(false, exec_lua('return vim.treesitter.get_parser():is_valid()'))
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end
)
it('is fully valid after a range parse that includes injection region', function()
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
it(
'is valid excluding, invalid including children after a range parse that does not include injection region',
function()
exec_lua('parser:parse({2, 4})')
eq(vim.NIL, get_regions())
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end
)
end)
describe('when editing an injection region', function()
before_each(function()
feed('G')
insert(dedent [[
>lua
local a = 1
<
]])
exec_lua('parser:parse(true)')
feed('G2kA<BS>2<ESC>') -- 1 → 2
end)
it('is fully invalid after changes', function()
eq(false, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is valid excluding, invalid including children after a rangeless parse', function()
exec_lua('parser:parse()')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(false, exec_lua('return parser:is_valid()'))
end)
it('is fully valid after a range parse that includes modified region', function()
exec_lua('parser:parse({5, 7})')
eq(true, exec_lua('return parser:is_valid(true)'))
eq(true, exec_lua('return parser:is_valid()'))
end)
end)
end)
end)