feat(treesitter): add support for wasm parsers

Problem: Installing treesitter parser is hard (harder than
climbing to heaven).

Solution: Add optional support for wasm parsers with `wasmtime`.

Notes:

* Needs to be enabled by setting `ENABLE_WASMTIME` for tree-sitter and
  Neovim. Build with
  `make CMAKE_EXTRA_FLAGS=-DENABLE_WASMTIME=ON
  DEPS_CMAKE_FLAGS=-DENABLE_WASMTIME=ON`
* Adds optional Rust (obviously) and C11 dependencies.
* Wasmtime comes with a lot of features that can negatively affect
  Neovim performance due to library and symbol table size. Make sure to
  build with minimal features and full LTO.
* To reduce re-compilation times, install `sccache` and build with
  `RUSTC_WRAPPER=<path/to/sccache> make ...`
This commit is contained in:
Lewis Russell 2024-04-19 16:04:57 +01:00 committed by Christian Clason
parent 664de5ea97
commit 688b961d13
17 changed files with 272 additions and 20 deletions

View File

@ -21,6 +21,21 @@ env:
INSTALL_PREFIX: ${{ github.workspace }}/nvim-install
jobs:
wasmtime:
strategy:
fail-fast: false
matrix:
test: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.test }}
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup
- run: |
cmake -S cmake.deps --preset ci -D ENABLE_WASMTIME=ON
cmake --build .deps
cmake --preset ci -D ENABLE_WASMTIME=ON
cmake --build build
old-cmake:
name: Test oldest supported cmake
runs-on: ubuntu-latest

View File

@ -130,6 +130,7 @@ else()
option(ENABLE_LTO "enable link time optimization" ON)
endif()
option(ENABLE_LIBINTL "enable libintl" ON)
option(ENABLE_WASMTIME "enable wasmtime" OFF)
message(STATUS "CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}")

View File

@ -36,6 +36,7 @@ option(USE_BUNDLED_TS "Use the bundled treesitter runtime." ${USE_BUNDLED})
option(USE_BUNDLED_TS_PARSERS "Use the bundled treesitter parsers." ${USE_BUNDLED})
option(USE_BUNDLED_UNIBILIUM "Use the bundled unibilium." ${USE_BUNDLED})
option(USE_BUNDLED_UTF8PROC "Use the bundled utf8proc library." ${USE_BUNDLED})
if(USE_BUNDLED AND MSVC)
option(USE_BUNDLED_GETTEXT "Use the bundled version of gettext." ON)
option(USE_BUNDLED_LIBICONV "Use the bundled version of libiconv." ON)
@ -44,6 +45,19 @@ else()
option(USE_BUNDLED_LIBICONV "Use the bundled version of libiconv." OFF)
endif()
option(ENABLE_WASMTIME "Use treesitter with wasmtime support." OFF)
if(ENABLE_WASMTIME)
if(USE_BUNDLED)
option(USE_BUNDLED_WASMTIME "Use the bundled wasmtime." ON)
else()
option(USE_BUNDLED_WASMTIME "Use the bundled wasmtime." OFF)
endif()
endif()
if(NOT ENABLE_WASMTIME AND USE_BUNDLED_WASMTIME)
message(FATAL_ERROR "ENABLE_WASMTIME is set to OFF while USE_BUNDLED_WASMTIME is set to ON.\
You need set ENABLE_WASMTIME to ON if you want to use wasmtime.")
endif()
option(USE_EXISTING_SRC_DIR "Skip download of deps sources in case of existing source directory." OFF)
set_default_buildtype(Release)
@ -127,6 +141,10 @@ if(USE_BUNDLED_TS_PARSERS)
include(BuildTreesitterParsers)
endif()
if(USE_BUNDLED_WASMTIME)
include(BuildWasmtime)
endif()
if(USE_BUNDLED_TS)
include(BuildTreesitter)
endif()

View File

@ -17,7 +17,8 @@
"cacheVariables": {
"USE_BUNDLED":"OFF",
"USE_BUNDLED_TS":"ON",
"USE_BUNDLED_UTF8PROC":"ON"
"USE_BUNDLED_UTF8PROC":"ON",
"ENABLE_WASMTIME":"OFF"
},
"inherits": ["base"]
}

View File

@ -1,8 +1,24 @@
if(ENABLE_WASMTIME)
if(USE_BUNDLED_WASMTIME)
set(WASMTIME_CACHE_ARGS "-DCMAKE_C_FLAGS:STRING=-I${DEPS_INSTALL_DIR}/include/wasmtime -I${DEPS_INSTALL_DIR}/include")
else()
find_package(Wasmtime 24.0.0 EXACT REQUIRED)
set(WASMTIME_CACHE_ARGS "-DCMAKE_C_FLAGS:STRING=-I${WASMTIME_INCLUDE_DIR}")
endif()
string(APPEND WASMTIME_CACHE_ARGS " -DTREE_SITTER_FEATURE_WASM")
set(WASMTIME_ARGS -D CMAKE_C_STANDARD=11)
endif()
get_externalproject_options(treesitter ${DEPS_IGNORE_SHA})
ExternalProject_Add(treesitter
DOWNLOAD_DIR ${DEPS_DOWNLOAD_DIR}/treesitter
PATCH_COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/cmake/TreesitterCMakeLists.txt
${DEPS_BUILD_DIR}/src/treesitter/CMakeLists.txt
CMAKE_ARGS ${DEPS_CMAKE_ARGS}
CMAKE_ARGS ${DEPS_CMAKE_ARGS} ${WASMTIME_ARGS}
CMAKE_CACHE_ARGS ${WASMTIME_CACHE_ARGS}
${EXTERNALPROJECT_OPTIONS})
if(USE_BUNDLED_WASMTIME)
add_dependencies(treesitter wasmtime)
endif()

View File

@ -0,0 +1,11 @@
# wasmtime is a chungus -- optimize _extra hard_ to keep nvim svelte
get_externalproject_options(wasmtime ${DEPS_IGNORE_SHA})
ExternalProject_Add(wasmtime
DOWNLOAD_DIR ${DEPS_DOWNLOAD_DIR}/wasmtime
SOURCE_SUBDIR crates/c-api
CMAKE_ARGS ${DEPS_CMAKE_ARGS}
-D WASMTIME_FASTEST_RUNTIME=ON # build with full LTO
-D WASMTIME_DISABLE_ALL_FEATURES=ON # don't need all that crap...
-D WASMTIME_FEATURE_CRANELIFT=ON # ...except this one (compiles wasm to platform code)
USES_TERMINAL_BUILD TRUE
${EXTERNALPROJECT_OPTIONS})

View File

@ -5,7 +5,7 @@ add_compile_options(-w)
add_library(tree-sitter lib/src/lib.c)
target_include_directories(tree-sitter
PRIVATE lib/src lib/include)
PRIVATE lib/src lib/src/wasm lib/include)
install(FILES
lib/include/tree_sitter/api.h

View File

@ -53,6 +53,9 @@ TREESITTER_MARKDOWN_SHA256 4909d6023643f1afc3ab219585d4035b7403f3a17849782ab803c
TREESITTER_URL https://github.com/tree-sitter/tree-sitter/archive/v0.23.0.tar.gz
TREESITTER_SHA256 6403b361b0014999e96f61b9c84d6950d42f0c7d6e806be79382e0232e48a11b
WASMTIME_URL https://github.com/bytecodealliance/wasmtime/archive/v24.0.0.tar.gz
WASMTIME_SHA256 2ccb49bb3bfa4d86907ad4c80d1147aef6156c7b6e3f7f14ed02a39de9761155
UNCRUSTIFY_URL https://github.com/uncrustify/uncrustify/archive/uncrustify-0.79.0.tar.gz
UNCRUSTIFY_SHA256 e7afaeabf636b7f0ce4e3e9747b95f7bd939613a8db49579755dddf44fedca5f
LUA_DEV_DEPS_URL https://github.com/neovim/deps/raw/5a1f71cceb24990a0b15fd9a472a5f549f019248/opt/lua-dev-deps.tar.gz

22
cmake/FindWasmtime.cmake Normal file
View File

@ -0,0 +1,22 @@
find_path2(WASMTIME_INCLUDE_DIR wasmtime.h)
find_library2(WASMTIME_LIBRARY wasmtime)
if(WASMTIME_INCLUDE_DIR AND EXISTS "${WASMTIME_INCLUDE_DIR}/wasmtime.h")
file(STRINGS ${WASMTIME_INCLUDE_DIR}/wasmtime.h WASMTIME_VERSION REGEX "#define WASMTIME_VERSION")
string(REGEX MATCH "[0-9]+\.[0-9]\.[0-9]" WASMTIME_VERSION ${WASMTIME_VERSION})
endif()
find_package_handle_standard_args(Wasmtime
REQUIRED_VARS WASMTIME_INCLUDE_DIR WASMTIME_LIBRARY
VERSION_VAR WASMTIME_VERSION)
add_library(wasmtime INTERFACE)
target_include_directories(wasmtime SYSTEM BEFORE INTERFACE ${WASMTIME_INCLUDE_DIR})
target_link_libraries(wasmtime INTERFACE ${WASMTIME_LIBRARY})
if(MSVC)
target_compile_options(wasmtime INTERFACE -DWASM_API_EXTERN= -DWASI_API_EXTERN=)
target_link_libraries(wasmtime INTERFACE ws2_32 advapi32 userenv ntdll shell32 ole32 bcrypt)
endif()
mark_as_advanced(WASMTIME_INCLUDE_DIR WASMTIME_LIBRARY)

View File

@ -51,6 +51,13 @@ treesitter parser for buffers with filetype `svg` or `xslt`, use: >lua
vim.treesitter.language.register('xml', { 'svg', 'xslt' })
<
*treesitter-parsers-wasm*
If Nvim is built with `ENABLE_WASMTIME`, then wasm parsers can also be
loaded: >lua
vim.treesitter.language.add('python', { path = "/path/to/python.wasm" })
<
==============================================================================
TREESITTER TREES *treesitter-tree*

View File

@ -72,7 +72,11 @@ vim._ts_get_language_version = function() end
--- @param path string
--- @param lang string
--- @param symbol_name? string
vim._ts_add_language = function(path, lang, symbol_name) end
vim._ts_add_language_from_object = function(path, lang, symbol_name) end
--- @param path string
--- @param lang string
vim._ts_add_language_from_wasm = function(path, lang) end
---@return integer
vim._ts_get_minimum_language_version = function() end

View File

@ -28,6 +28,9 @@ function M.check()
)
end
end
local can_wasm = vim._ts_add_language_from_wasm ~= nil
health.info(string.format('Can load WASM parsers: %s', tostring(can_wasm)))
end
return M

View File

@ -109,7 +109,14 @@ function M.add(lang, opts)
path = paths[1]
end
vim._ts_add_language(path, lang, symbol_name)
if vim.endswith(path, '.wasm') then
if not vim._ts_add_language_from_wasm then
error(string.format("Unable to load wasm parser '%s': not built with ENABLE_WASMTIME ", path))
end
vim._ts_add_language_from_wasm(path, lang)
else
vim._ts_add_language_from_object(path, lang, symbol_name)
end
M.register(lang, filetype)
end

View File

@ -31,7 +31,7 @@ target_link_libraries(main_lib INTERFACE ${LUV_LIBRARY})
find_package(Iconv REQUIRED)
find_package(Libuv 1.28.0 REQUIRED)
find_package(Lpeg REQUIRED)
find_package(Treesitter 0.22.6 REQUIRED)
find_package(Treesitter 0.23.0 REQUIRED)
find_package(Unibilium 2.0 REQUIRED)
find_package(UTF8proc REQUIRED)
@ -48,6 +48,12 @@ if(ENABLE_LIBINTL)
target_link_libraries(main_lib INTERFACE libintl)
endif()
if(ENABLE_WASMTIME)
find_package(Wasmtime 24.0.0 EXACT REQUIRED)
target_link_libraries(main_lib INTERFACE wasmtime)
target_compile_definitions(nvim_bin PRIVATE HAVE_WASMTIME)
endif()
target_compile_definitions(main_lib INTERFACE HAVE_UNIBILIUM)
# The unit test lib requires LuaJIT; it will be skipped if LuaJIT is missing.

View File

@ -924,6 +924,7 @@ void nlua_free_all_mem(void)
lua_State *lstate = global_lstate;
nlua_unref_global(lstate, require_ref);
nlua_common_free_all_mem(lstate);
tslua_free();
}
static void nlua_common_free_all_mem(lua_State *lstate)
@ -1902,8 +1903,13 @@ static void nlua_add_treesitter(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
lua_pushcfunction(lstate, tslua_push_querycursor);
lua_setfield(lstate, -2, "_create_ts_querycursor");
lua_pushcfunction(lstate, tslua_add_language);
lua_setfield(lstate, -2, "_ts_add_language");
lua_pushcfunction(lstate, tslua_add_language_from_object);
lua_setfield(lstate, -2, "_ts_add_language_from_object");
#ifdef HAVE_WASMTIME
lua_pushcfunction(lstate, tslua_add_language_from_wasm);
lua_setfield(lstate, -2, "_ts_add_language_from_wasm");
#endif
lua_pushcfunction(lstate, tslua_has_language);
lua_setfield(lstate, -2, "_ts_has_language");

View File

@ -15,6 +15,10 @@
#include <tree_sitter/api.h>
#include <uv.h>
#ifdef HAVE_WASMTIME
# include <wasm.h>
#endif
#include "klib/kvec.h"
#include "nvim/api/private/helpers.h"
#include "nvim/buffer_defs.h"
@ -24,6 +28,7 @@
#include "nvim/map_defs.h"
#include "nvim/memline.h"
#include "nvim/memory.h"
#include "nvim/os/fs.h"
#include "nvim/pos_defs.h"
#include "nvim/strings.h"
#include "nvim/types_defs.h"
@ -53,6 +58,11 @@ typedef struct {
static PMap(cstr_t) langs = MAP_INIT;
#ifdef HAVE_WASMTIME
static wasm_engine_t *wasmengine;
static TSWasmStore *ts_wasmstore;
#endif
// TSLanguage
int tslua_has_language(lua_State *L)
@ -62,8 +72,59 @@ int tslua_has_language(lua_State *L)
return 1;
}
static TSLanguage *load_language(lua_State *L, const char *path, const char *lang_name,
const char *symbol)
#ifdef HAVE_WASMTIME
static char *read_file(const char *path, size_t *len)
FUNC_ATTR_MALLOC
{
FILE *file = os_fopen(path, "r");
if (file == NULL) {
return NULL;
}
fseek(file, 0L, SEEK_END);
*len = (size_t)ftell(file);
fseek(file, 0L, SEEK_SET);
char *data = xmalloc(*len);
if (fread(data, *len, 1, file) != 1) {
xfree(data);
fclose(file);
return NULL;
}
fclose(file);
return data;
}
static const char *wasmerr_to_str(TSWasmErrorKind werr)
{
switch (werr) {
case TSWasmErrorKindParse:
return "PARSE";
case TSWasmErrorKindCompile:
return "COMPILE";
case TSWasmErrorKindInstantiate:
return "INSTANTIATE";
case TSWasmErrorKindAllocate:
return "ALLOCATE";
default:
return "UNKNOWN";
}
}
#endif
int tslua_add_language_from_wasm(lua_State *L)
{
return add_language(L, true);
}
// Creates the language into the internal language map.
//
// Returns true if the language is correctly loaded in the language map
int tslua_add_language_from_object(lua_State *L)
{
return add_language(L, false);
}
static const TSLanguage *load_language_from_object(lua_State *L, const char *path,
const char *lang_name, const char *symbol)
{
uv_lib_t lib;
if (uv_dlopen(path, &lib)) {
@ -91,16 +152,59 @@ static TSLanguage *load_language(lua_State *L, const char *path, const char *lan
return lang;
}
// Creates the language into the internal language map.
//
// Returns true if the language is correctly loaded in the language map
int tslua_add_language(lua_State *L)
static const TSLanguage *load_language_from_wasm(lua_State *L, const char *path,
const char *lang_name)
{
#ifndef HAVE_WASMTIME
luaL_error(L, "Not supported");
return NULL;
#else
if (wasmengine == NULL) {
wasmengine = wasm_engine_new();
}
assert(wasmengine != NULL);
TSWasmError werr = { 0 };
if (ts_wasmstore == NULL) {
ts_wasmstore = ts_wasm_store_new(wasmengine, &werr);
}
if (werr.kind > 0) {
luaL_error(L, "Error creating wasm store: (%s) %s", wasmerr_to_str(werr.kind), werr.message);
}
size_t file_size = 0;
char *data = read_file(path, &file_size);
if (data == NULL) {
luaL_error(L, "Unable to read file", path);
}
const TSLanguage *lang = ts_wasm_store_load_language(ts_wasmstore, lang_name, data,
(uint32_t)file_size, &werr);
xfree(data);
if (werr.kind > 0) {
luaL_error(L, "Failed to load WASM parser %s: (%s) %s", path, wasmerr_to_str(werr.kind),
werr.message);
}
if (lang == NULL) {
luaL_error(L, "Failed to load parser %s: internal error", path);
}
return lang;
#endif
}
static int add_language(lua_State *L, bool is_wasm)
{
const char *path = luaL_checkstring(L, 1);
const char *lang_name = luaL_checkstring(L, 2);
const char *symbol_name = lang_name;
if (lua_gettop(L) >= 3 && !lua_isnil(L, 3)) {
if (!is_wasm && lua_gettop(L) >= 3 && !lua_isnil(L, 3)) {
symbol_name = luaL_checkstring(L, 3);
}
@ -109,7 +213,9 @@ int tslua_add_language(lua_State *L)
return 1;
}
TSLanguage *lang = load_language(L, path, lang_name, symbol_name);
const TSLanguage *lang = is_wasm
? load_language_from_wasm(L, path, lang_name)
: load_language_from_object(L, path, lang_name, symbol_name);
uint32_t lang_version = ts_language_version(lang);
if (lang_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
@ -121,7 +227,7 @@ int tslua_add_language(lua_State *L)
TREE_SITTER_LANGUAGE_VERSION, lang_version);
}
pmap_put(cstr_t)(&langs, xstrdup(lang_name), lang);
pmap_put(cstr_t)(&langs, xstrdup(lang_name), (TSLanguage *)lang);
lua_pushboolean(L, true);
return 1;
@ -186,6 +292,9 @@ int tslua_inspect_lang(lua_State *L)
lua_setfield(L, -2, "fields"); // [retval]
lua_pushboolean(L, ts_language_is_wasm(lang));
lua_setfield(L, -2, "_wasm");
lua_pushinteger(L, ts_language_version(lang)); // [retval, version]
lua_setfield(L, -2, "_abi_version");
@ -215,6 +324,13 @@ int tslua_push_parser(lua_State *L)
TSParser **parser = lua_newuserdata(L, sizeof(TSParser *));
*parser = ts_parser_new();
#ifdef HAVE_WASMTIME
if (ts_language_is_wasm(lang)) {
assert(wasmengine != NULL);
ts_parser_set_wasm_store(*parser, ts_wasmstore);
}
#endif
if (!ts_parser_set_language(*parser, lang)) {
ts_parser_delete(*parser);
const char *lang_name = luaL_checkstring(L, 1);
@ -1561,3 +1677,15 @@ void tslua_init(lua_State *L)
ts_set_allocator(xmalloc, xcalloc, xrealloc, xfree);
}
void tslua_free(void)
{
#ifdef HAVE_WASMTIME
if (wasmengine != NULL) {
wasm_engine_delete(wasmengine);
}
if (ts_wasmstore != NULL) {
ts_wasm_store_delete(ts_wasmstore);
}
#endif
}

View File

@ -57,8 +57,12 @@ describe('treesitter language API', function()
local keys, fields, symbols = unpack(exec_lua(function()
local lang = vim.treesitter.language.inspect('c')
local keys, symbols = {}, {}
for k, _ in pairs(lang) do
keys[k] = true
for k, v in pairs(lang) do
if type(v) == 'boolean' then
keys[k] = v
else
keys[k] = true
end
end
-- symbols array can have "holes" and is thus not a valid msgpack array
@ -69,7 +73,7 @@ describe('treesitter language API', function()
return { keys, lang.fields, symbols }
end))
eq({ fields = true, symbols = true, _abi_version = true }, keys)
eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys)
local fset = {}
for _, f in pairs(fields) do