From 7d971500847089ec8ade926a7f84d6bb3a51c8b0 Mon Sep 17 00:00:00 2001 From: Lewis Russell Date: Mon, 25 Mar 2024 22:06:31 +0000 Subject: [PATCH] fix(treesitter): return correct match table in iter_captures() --- runtime/doc/treesitter.txt | 2 +- runtime/lua/vim/func.lua | 5 +- runtime/lua/vim/func/_memoize.lua | 8 ++- runtime/lua/vim/treesitter/highlighter.lua | 14 ++-- runtime/lua/vim/treesitter/query.lua | 46 ++++++------- scripts/luacats_parser.lua | 4 +- test/functional/treesitter/query_spec.lua | 78 ++++++++++++++++++++++ 7 files changed, 123 insertions(+), 34 deletions(-) diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index a76fa3c123..2dbff332af 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -1166,7 +1166,7 @@ Query:iter_captures({node}, {source}, {start}, {stop}) Defaults to `node:end_()`. Return: ~ - (`fun(end_line: integer?): integer, TSNode, vim.treesitter.query.TSMetadata, table?`) + (`fun(end_line: integer?): integer, TSNode, vim.treesitter.query.TSMetadata, TSQueryMatch`) capture id, capture node, metadata, match *Query:iter_matches()* diff --git a/runtime/lua/vim/func.lua b/runtime/lua/vim/func.lua index 206d1bae95..f71659ffb4 100644 --- a/runtime/lua/vim/func.lua +++ b/runtime/lua/vim/func.lua @@ -32,10 +32,11 @@ local M = {} --- first n arguments passed to {fn}. --- --- @param fn F Function to memoize. +--- @param strong? boolean Do not use a weak table --- @return F # Memoized version of {fn} --- @nodoc -function M._memoize(hash, fn) - return require('vim.func._memoize')(hash, fn) +function M._memoize(hash, fn, strong) + return require('vim.func._memoize')(hash, fn, strong) end return M diff --git a/runtime/lua/vim/func/_memoize.lua b/runtime/lua/vim/func/_memoize.lua index 835bf64c93..65210351bf 100644 --- a/runtime/lua/vim/func/_memoize.lua +++ b/runtime/lua/vim/func/_memoize.lua @@ -36,15 +36,19 @@ end --- @generic F: function --- @param hash integer|string|fun(...): any --- @param fn F +--- @param strong? boolean --- @return F -return function(hash, fn) +return function(hash, fn, strong) vim.validate({ hash = { hash, { 'number', 'string', 'function' } }, fn = { fn, 'function' }, }) ---@type table> - local cache = setmetatable({}, { __mode = 'kv' }) + local cache = {} + if not strong then + setmetatable(cache, { __mode = 'kv' }) + end hash = resolve_hash(hash) diff --git a/runtime/lua/vim/treesitter/highlighter.lua b/runtime/lua/vim/treesitter/highlighter.lua index 1e6f128461..3f7e31212c 100644 --- a/runtime/lua/vim/treesitter/highlighter.lua +++ b/runtime/lua/vim/treesitter/highlighter.lua @@ -4,7 +4,7 @@ local Range = require('vim.treesitter._range') local ns = api.nvim_create_namespace('treesitter/highlighter') ----@alias vim.treesitter.highlighter.Iter fun(end_line: integer|nil): integer, TSNode, vim.treesitter.query.TSMetadata, table +---@alias vim.treesitter.highlighter.Iter fun(end_line: integer|nil): integer, TSNode, vim.treesitter.query.TSMetadata, TSQueryMatch ---@class (private) vim.treesitter.highlighter.Query ---@field private _query vim.treesitter.Query? @@ -243,7 +243,7 @@ function TSHighlighter:get_query(lang) return self._queries[lang] end ---- @param match table +--- @param match TSQueryMatch --- @param bufnr integer --- @param capture integer --- @param metadata vim.treesitter.query.TSMetadata @@ -256,13 +256,15 @@ local function get_url(match, bufnr, capture, metadata) return url end - if not match or not match[url] then + local captures = match:captures() + + if not captures[url] then return end -- Assume there is only one matching node. If there is more than one, take the URL -- from the first. - local other_node = match[url][1] + local other_node = captures[url][1] return vim.treesitter.get_node_text(other_node, bufnr, { metadata = metadata[url], @@ -296,6 +298,10 @@ local function on_line_impl(self, buf, line, is_spell_nav) end if state.iter == nil or state.next_row < line then + -- Mainly used to skip over folds + + -- TODO(lewis6991): Creating a new iterator loses the cached predicate results for query + -- matches. Move this logic inside iter_captures() so we can maintain the cache. state.iter = state.highlighter_query:query():iter_captures(root_node, self.bufnr, line, root_end_row + 1) end diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua index 075fd0e99b..e68acac929 100644 --- a/runtime/lua/vim/treesitter/query.lua +++ b/runtime/lua/vim/treesitter/query.lua @@ -1,5 +1,6 @@ local api = vim.api local language = require('vim.treesitter.language') +local memoize = vim.func._memoize local M = {} @@ -212,7 +213,7 @@ end ---@param query_name string Name of the query (e.g. "highlights") --- ---@return vim.treesitter.Query? : Parsed query. `nil` if no query files are found. -M.get = vim.func._memoize('concat-2', function(lang, query_name) +M.get = memoize('concat-2', function(lang, query_name) if explicit_queries[lang][query_name] then return explicit_queries[lang][query_name] end @@ -245,7 +246,7 @@ end) ---@return vim.treesitter.Query : Parsed query --- ---@see [vim.treesitter.query.get()] -M.parse = vim.func._memoize('concat-2', function(lang, query) +M.parse = memoize('concat-2', function(lang, query) language.add(lang) local ts_query = vim._ts_parse_query(lang, query) @@ -812,6 +813,12 @@ local function value_or_node_range(start, stop, node) return start, stop end +--- @param match TSQueryMatch +--- @return integer +local function match_id_hash(_, match) + return (match:info()) +end + --- Iterate over all captures from all matches inside {node} --- --- {source} is needed if the query contains predicates; then the caller @@ -841,7 +848,7 @@ end ---@param start? integer Starting line for the search. Defaults to `node:start()`. ---@param stop? integer Stopping line for the search (end-exclusive). Defaults to `node:end_()`. --- ----@return (fun(end_line: integer|nil): integer, TSNode, vim.treesitter.query.TSMetadata, table?): +---@return (fun(end_line: integer|nil): integer, TSNode, vim.treesitter.query.TSMetadata, TSQueryMatch): --- capture id, capture node, metadata, match --- ---@note Captures are only returned if the query pattern of a specific capture contained predicates. @@ -854,7 +861,8 @@ function Query:iter_captures(node, source, start, stop) local cursor = vim._create_ts_querycursor(node, self.query, start, stop, { match_limit = 256 }) - local max_match_id = -1 + local apply_directives = memoize(match_id_hash, self.apply_directives, true) + local match_preds = memoize(match_id_hash, self.match_preds, true) local function iter(end_line) local capture, captured_node, match = cursor:next_capture() @@ -863,27 +871,18 @@ function Query:iter_captures(node, source, start, stop) return end - local captures --- @type table? - local match_id, pattern_index = match:info() - - local metadata = {} - - local preds = self.info.patterns[pattern_index] or {} - - if #preds > 0 and match_id > max_match_id then - captures = match:captures() - max_match_id = match_id - if not self:match_preds(match, source) then - cursor:remove_match(match_id) - if end_line and captured_node:range() > end_line then - return nil, captured_node, nil - end - return iter(end_line) -- tail call: try next match + if not match_preds(self, match, source) then + local match_id = match:info() + cursor:remove_match(match_id) + if end_line and captured_node:range() > end_line then + return nil, captured_node, nil, nil end - - metadata = self:apply_directives(match, source) + return iter(end_line) -- tail call: try next match end - return capture, captured_node, metadata, captures + + local metadata = apply_directives(self, match, source) + + return capture, captured_node, metadata, match end return iter end @@ -972,6 +971,7 @@ function Query:iter_matches(node, source, start, stop, opts) return pattern, old_match, metadata end + -- TODO(lewis6991): create a new function that returns {match, metadata} return pattern, captures, metadata end return iter diff --git a/scripts/luacats_parser.lua b/scripts/luacats_parser.lua index cd671fb9dc..cb301b32e4 100644 --- a/scripts/luacats_parser.lua +++ b/scripts/luacats_parser.lua @@ -281,8 +281,8 @@ local function filter_decl(line) -- M.fun = vim._memoize(function(...) -- -> -- function M.fun(...) - line = line:gsub('^local (.+) = .*_memoize%([^,]+, function%((.*)%)$', 'local function %1(%2)') - line = line:gsub('^(.+) = .*_memoize%([^,]+, function%((.*)%)$', 'function %1(%2)') + line = line:gsub('^local (.+) = memoize%([^,]+, function%((.*)%)$', 'local function %1(%2)') + line = line:gsub('^(.+) = memoize%([^,]+, function%((.*)%)$', 'function %1(%2)') return line end diff --git a/test/functional/treesitter/query_spec.lua b/test/functional/treesitter/query_spec.lua index fb3eaa1518..170f448f97 100644 --- a/test/functional/treesitter/query_spec.lua +++ b/test/functional/treesitter/query_spec.lua @@ -696,4 +696,82 @@ void ui_refresh(void) '((identifier) @id \n(#eq? @id\n@ok.capture\n))' ) end) + + describe('Query:iter_captures', function() + it('includes metadata for all captured nodes #23664', function() + insert([[ + const char *sql = "SELECT * FROM Students WHERE name = 'Robert'); DROP TABLE Students;--"; + ]]) + + local query = [[ + (declaration + type: (_) + declarator: (init_declarator + declarator: (pointer_declarator + declarator: (identifier)) @_id + value: (string_literal + (string_content) @injection.content)) + (#set! injection.language "sql") + (#contains? @_id "sql")) + ]] + + local result = exec_lua( + [=[ + local query = vim.treesitter.query.parse("c", ...) + local parser = vim.treesitter.get_parser(0, "c") + local root = parser:parse()[1]:root() + local t = {} + for id, node, metadata in query:iter_captures(root, 0) do + t[query.captures[id]] = metadata + end + return t + ]=], + query + ) + + eq({ + ['_id'] = { ['injection.language'] = 'sql' }, + ['injection.content'] = { ['injection.language'] = 'sql' }, + }, result) + end) + + it('only evaluates predicates once per match', function() + insert([[ + void foo(int x, int y); + ]]) + local query = [[ + (declaration + type: (_) + declarator: (function_declarator + declarator: (identifier) @function.name + parameters: (parameter_list + (parameter_declaration + type: (_) + declarator: (identifier) @argument))) + (#eq? @function.name "foo")) + ]] + + local result = exec_lua( + [[ + local query = vim.treesitter.query.parse("c", ...) + local match_preds = query.match_preds + local called = 0 + function query:match_preds(...) + called = called + 1 + return match_preds(self, ...) + end + local parser = vim.treesitter.get_parser(0, "c") + local root = parser:parse()[1]:root() + local captures = {} + for id, node in query:iter_captures(root, 0) do + captures[#captures + 1] = id + end + return { called, captures } + ]], + query + ) + + eq({ 2, { 1, 1, 2, 2 } }, result) + end) + end) end)