perf(lsp): load buffer contents once when processing semantic tokens responses (#23484)

perf(lsp): load buffer contents once when processing semantic token responses

Using _get_line_byte_from_position() for each token's boundaries was a
pretty huge bottleneck, since that function would load individual buffer
lines via nvim_buf_get_lines() (plus a lot of extra overhead). So each
token caused two calls to nvim_buf_get_lines() (once for the start
position, and once for the end position).

For semantic tokens, we only attach to buffers that have already been
loaded, so we can safely just get all the lines for the entire buffer at
once, and lift the rest of the _get_line_byte_from_position()
implementation directly while bypassing the part that loads the buffer
line.

While I was looking at get_lines (used by _get_line_byte_from_position),
I noticed that we were checking for non-file URIs before we even looked
to see if we already had the buffer loaded. Moving the buffer-loaded
check to be the first thing done in get_lines() more than halved the
average time spent transforming the token list into highlight ranges vs
when it was still using _get_line_byte_from_position. I ended up
improving that loop more by not using get_lines, but figured the
performance improvement it provided was worth leaving in.
This commit is contained in:
jdrouhard 2023-05-05 00:41:36 -05:00 committed by GitHub
parent 9ded4c1275
commit 648f777931
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 11 deletions

View File

@ -99,6 +99,7 @@ local function tokens_to_ranges(data, bufnr, client, request)
local legend = client.server_capabilities.semanticTokensProvider.legend
local token_types = legend.tokenTypes
local token_modifiers = legend.tokenModifiers
local lines = api.nvim_buf_get_lines(bufnr, 0, -1, false)
local ranges = {}
local start = uv.hrtime()
@ -137,11 +138,17 @@ local function tokens_to_ranges(data, bufnr, client, request)
local modifiers = modifiers_from_number(data[i + 4], token_modifiers)
---@private
local function _get_byte_pos(char_pos)
return util._get_line_byte_from_position(bufnr, {
line = line,
character = char_pos,
}, client.offset_encoding)
local function _get_byte_pos(col)
if col > 0 then
local buf_line = lines[line + 1] or ''
local ok, result
ok, result = pcall(util._str_byteindex_enc, buf_line, col, client.offset_encoding)
if ok then
return result
end
return math.min(#buf_line, col)
end
return col
end
local start_col = _get_byte_pos(start_char)

View File

@ -253,12 +253,17 @@ local function get_lines(bufnr, rows)
---@private
local function buf_lines()
local lines = {}
for _, row in pairs(rows) do
for _, row in ipairs(rows) do
lines[row] = (api.nvim_buf_get_lines(bufnr, row, row + 1, false) or { '' })[1]
end
return lines
end
-- use loaded buffers if available
if vim.fn.bufloaded(bufnr) == 1 then
return buf_lines()
end
local uri = vim.uri_from_bufnr(bufnr)
-- load the buffer if this is not a file uri
@ -268,11 +273,6 @@ local function get_lines(bufnr, rows)
return buf_lines()
end
-- use loaded buffers if available
if vim.fn.bufloaded(bufnr) == 1 then
return buf_lines()
end
local filename = api.nvim_buf_get_name(bufnr)
-- get the data from the file