From 224f303ee54c54d2147f03010385e8cc48e42869 Mon Sep 17 00:00:00 2001 From: Gregory Anders <8965202+gpanders@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:15:32 -0500 Subject: [PATCH] feat(stdlib): add vim.base64 module (#25843) Add base64 encode() and decode() functions to a vim.base64 module. --- runtime/doc/lua.txt | 22 +++ runtime/doc/news.txt | 3 + runtime/lua/vim/_meta/base64.lua | 13 ++ scripts/gen_vimdoc.py | 5 +- src/nvim/base64.c | 209 ++++++++++++++++++++++++++++ src/nvim/base64.h | 10 ++ src/nvim/lua/base64.c | 65 +++++++++ src/nvim/lua/base64.h | 12 ++ src/nvim/lua/stdlib.c | 5 + test/functional/lua/base64_spec.lua | 105 ++++++++++++++ 10 files changed, 448 insertions(+), 1 deletion(-) create mode 100644 runtime/lua/vim/_meta/base64.lua create mode 100644 src/nvim/base64.c create mode 100644 src/nvim/base64.h create mode 100644 src/nvim/lua/base64.c create mode 100644 src/nvim/lua/base64.h create mode 100644 test/functional/lua/base64_spec.lua diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt index 1d69b1cc91..aea19d7bf0 100644 --- a/runtime/doc/lua.txt +++ b/runtime/doc/lua.txt @@ -809,6 +809,28 @@ vim.json.encode({obj}) *vim.json.encode()* (string) +============================================================================== +VIM.BASE64 *vim.base64* + +vim.base64.decode({str}) *vim.base64.decode()* + Decode a Base64 encoded string. + + Parameters: ~ + • {str} (string) Base64 encoded string + + Return: ~ + (string) Decoded string + +vim.base64.encode({str}) *vim.base64.encode()* + Encode {str} using Base64. + + Parameters: ~ + • {str} (string) String to encode + + Return: ~ + (string) Encoded string + + ============================================================================== VIM.SPELL *vim.spell* diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index d1191bef9a..b88b7d164f 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -202,6 +202,9 @@ The following new APIs and features were added. • 'complete' option supports "f" flag for completing buffer names. +• Added |vim.base64.encode()| and |vim.base64.decode()| for encoding and decoding + strings using Base64 encoding. + ============================================================================== CHANGED FEATURES *news-changed* diff --git a/runtime/lua/vim/_meta/base64.lua b/runtime/lua/vim/_meta/base64.lua new file mode 100644 index 0000000000..f25b4af234 --- /dev/null +++ b/runtime/lua/vim/_meta/base64.lua @@ -0,0 +1,13 @@ +--- @meta + +--- Encode {str} using Base64. +--- +--- @param str string String to encode +--- @return string Encoded string +function vim.base64.encode(str) end + +--- Decode a Base64 encoded string. +--- +--- @param str string Base64 encoded string +--- @return string Decoded string +function vim.base64.decode(str) end diff --git a/scripts/gen_vimdoc.py b/scripts/gen_vimdoc.py index 13f64c44a3..1f10a39e35 100755 --- a/scripts/gen_vimdoc.py +++ b/scripts/gen_vimdoc.py @@ -150,6 +150,7 @@ CONFIG = { 'diff.lua', 'mpack.lua', 'json.lua', + 'base64.lua', 'spell.lua', 'builtin.lua', '_options.lua', @@ -187,6 +188,7 @@ CONFIG = { 'runtime/lua/vim/_meta/diff.lua', 'runtime/lua/vim/_meta/mpack.lua', 'runtime/lua/vim/_meta/json.lua', + 'runtime/lua/vim/_meta/base64.lua', 'runtime/lua/vim/_meta/regex.lua', 'runtime/lua/vim/_meta/spell.lua', ], @@ -206,7 +208,7 @@ CONFIG = { 'section_fmt': lambda name: ( 'Lua module: vim' if name.lower() == '_editor' else 'LUA-VIMSCRIPT BRIDGE' if name.lower() == '_options' else - f'VIM.{name.upper()}' if name.lower() in [ 'highlight', 'mpack', 'json', 'diff', 'spell', 'regex' ] else + f'VIM.{name.upper()}' if name.lower() in [ 'highlight', 'mpack', 'json', 'base64', 'diff', 'spell', 'regex' ] else 'VIM' if name.lower() == 'builtin' else f'Lua module: vim.{name.lower()}'), 'helptag_fmt': lambda name: ( @@ -241,6 +243,7 @@ CONFIG = { 'builtin': 'vim', 'mpack': 'vim.mpack', 'json': 'vim.json', + 'base64': 'vim.base64', 'regex': 'vim.regex', 'spell': 'vim.spell', }, diff --git a/src/nvim/base64.c b/src/nvim/base64.c new file mode 100644 index 0000000000..95aa491281 --- /dev/null +++ b/src/nvim/base64.c @@ -0,0 +1,209 @@ +#include +#include +#include + +#include "nvim/base64.h" +#include "nvim/memory.h" + +#ifdef HAVE_BE64TOH +# include ENDIAN_INCLUDE_FILE +#endif + +static const char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +// Indices are 1-based because we use 0 to indicate a letter that is not part of the alphabet +static const uint8_t char_to_index[256] = { + ['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5, ['F'] = 6, ['G'] = 7, ['H'] = 8, + ['I'] = 9, ['J'] = 10, ['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15, ['P'] = 16, + ['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20, ['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24, + ['Y'] = 25, ['Z'] = 26, ['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31, ['f'] = 32, + ['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36, ['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40, + ['o'] = 41, ['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46, ['u'] = 47, ['v'] = 48, + ['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52, ['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56, + ['4'] = 57, ['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62, ['+'] = 63, ['/'] = 64, +}; + +#ifndef HAVE_BE64TOH +static inline uint64_t htobe64(uint64_t host_64bits) +{ +# ifdef ORDER_BIG_ENDIAN + return host_64bits; +# else + uint8_t *buf = (uint8_t *)&host_64bits; + uint64_t ret = 0; + for (size_t i = 8; i; i--) { + ret |= ((uint64_t)buf[i - 1]) << ((8 - i) * 8); + } + return ret; +# endif +} + +static inline uint32_t htobe32(uint32_t host_32bits) +{ +# ifdef ORDER_BIG_ENDIAN + return host_32bits; +# else + uint8_t *buf = (uint8_t *)&host_32bits; + uint32_t ret = 0; + for (size_t i = 4; i; i--) { + ret |= ((uint32_t)buf[i - 1]) << ((4 - i) * 8); + } + return ret; +# endif +} +#endif + +/// Encode a string using Base64. +/// +/// @param src String to encode +/// @param src_len Length of the string +/// @return Base64 encoded string +char *base64_encode(const char *src, size_t src_len) +{ + assert(src != NULL); + + const size_t out_len = ((src_len + 2) / 3) * 4; + char *dest = xmalloc(out_len + 1); + + size_t src_i = 0; + size_t out_i = 0; + + const uint8_t *s = (const uint8_t *)src; + + // Read 8 bytes at a time as much as we can + for (; src_i + 7 < src_len; src_i += 6) { + uint64_t bits_h; + memcpy(&bits_h, &s[src_i], sizeof(uint64_t)); + const uint64_t bits_be = htobe64(bits_h); + dest[out_i + 0] = alphabet[(bits_be >> 58) & 0x3F]; + dest[out_i + 1] = alphabet[(bits_be >> 52) & 0x3F]; + dest[out_i + 2] = alphabet[(bits_be >> 46) & 0x3F]; + dest[out_i + 3] = alphabet[(bits_be >> 40) & 0x3F]; + dest[out_i + 4] = alphabet[(bits_be >> 34) & 0x3F]; + dest[out_i + 5] = alphabet[(bits_be >> 28) & 0x3F]; + dest[out_i + 6] = alphabet[(bits_be >> 22) & 0x3F]; + dest[out_i + 7] = alphabet[(bits_be >> 16) & 0x3F]; + out_i += sizeof(uint64_t); + } + + for (; src_i + 3 < src_len; src_i += 3) { + uint32_t bits_h; + memcpy(&bits_h, &s[src_i], sizeof(uint32_t)); + const uint32_t bits_be = htobe32(bits_h); + dest[out_i + 0] = alphabet[(bits_be >> 26) & 0x3F]; + dest[out_i + 1] = alphabet[(bits_be >> 20) & 0x3F]; + dest[out_i + 2] = alphabet[(bits_be >> 14) & 0x3F]; + dest[out_i + 3] = alphabet[(bits_be >> 8) & 0x3F]; + out_i += sizeof(uint32_t); + } + + if (src_i + 2 < src_len) { + dest[out_i + 0] = alphabet[s[src_i] >> 2]; + dest[out_i + 1] = alphabet[((s[src_i] & 0x3) << 4) | (s[src_i + 1] >> 4)]; + dest[out_i + 2] = alphabet[(s[src_i + 1] & 0xF) << 2 | (s[src_i + 2] >> 6)]; + dest[out_i + 3] = alphabet[(s[src_i + 2] & 0x3F)]; + out_i += 4; + } else if (src_i + 1 < src_len) { + dest[out_i + 0] = alphabet[s[src_i] >> 2]; + dest[out_i + 1] = alphabet[((s[src_i] & 0x3) << 4) | (s[src_i + 1] >> 4)]; + dest[out_i + 2] = alphabet[(s[src_i + 1] & 0xF) << 2]; + out_i += 3; + } else if (src_i < src_len) { + dest[out_i + 0] = alphabet[s[src_i] >> 2]; + dest[out_i + 1] = alphabet[(s[src_i] & 0x3) << 4]; + out_i += 2; + } + + for (; out_i < out_len; out_i++) { + dest[out_i] = '='; + } + + dest[out_len] = '\0'; + + return dest; +} + +/// Decode a Base64 encoded string. +/// +/// @param src Base64 encoded string +/// @param src_len Length of {src} +/// @return Decoded string +char *base64_decode(const char *src, size_t src_len) +{ + assert(src != NULL); + + char *dest = NULL; + + if (src_len % 4 != 0) { + goto invalid; + } + + size_t out_len = (src_len / 4) * 3; + if (src_len >= 1 && src[src_len - 1] == '=') { + out_len--; + } + if (src_len >= 2 && src[src_len - 2] == '=') { + out_len--; + } + + const uint8_t *s = (const uint8_t *)src; + + dest = xmalloc(out_len + 1); + + int acc = 0; + int acc_len = 0; + size_t out_i = 0; + size_t src_i = 0; + int leftover_i = -1; + + for (; src_i < src_len; src_i++) { + const uint8_t c = s[src_i]; + const uint8_t d = char_to_index[c]; + if (d == 0) { + if (c == '=') { + leftover_i = (int)src_i; + break; + } + goto invalid; + } + + acc = ((acc << 6) & 0xFFF) + (d - 1); + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + dest[out_i] = (char)(acc >> acc_len); + out_i += 1; + } + } + + if (acc_len > 4 || ((acc & ((1 << acc_len) - 1)) != 0)) { + goto invalid; + } + + if (leftover_i >= -1) { + int padding_len = acc_len / 2; + int padding_chars = 0; + for (; (size_t)leftover_i < src_len; leftover_i++) { + const uint8_t c = s[leftover_i]; + if (c != '=') { + goto invalid; + } + padding_chars += 1; + } + + if (padding_chars != padding_len) { + goto invalid; + } + } + + dest[out_len] = '\0'; + + return dest; + +invalid: + if (dest) { + xfree((void *)dest); + } + + return NULL; +} diff --git a/src/nvim/base64.h b/src/nvim/base64.h new file mode 100644 index 0000000000..2162fd9ba2 --- /dev/null +++ b/src/nvim/base64.h @@ -0,0 +1,10 @@ +#ifndef NVIM_BASE64_H +#define NVIM_BASE64_H + +#include + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "base64.h.generated.h" +#endif + +#endif // NVIM_BASE64_H diff --git a/src/nvim/lua/base64.c b/src/nvim/lua/base64.c new file mode 100644 index 0000000000..3f246839d5 --- /dev/null +++ b/src/nvim/lua/base64.c @@ -0,0 +1,65 @@ +#include +#include +#include + +#include "nvim/base64.h" +#include "nvim/lua/base64.h" +#include "nvim/memory.h" + +static int nlua_base64_encode(lua_State *L) +{ + if (lua_gettop(L) < 1) { + return luaL_error(L, "Expected 1 argument"); + } + + if (lua_type(L, 1) != LUA_TSTRING) { + luaL_argerror(L, 1, "expected string"); + } + + size_t src_len = 0; + const char *src = lua_tolstring(L, 1, &src_len); + + const char *ret = base64_encode(src, src_len); + assert(ret != NULL); + lua_pushstring(L, ret); + xfree((void *)ret); + + return 1; +} + +static int nlua_base64_decode(lua_State *L) +{ + if (lua_gettop(L) < 1) { + return luaL_error(L, "Expected 1 argument"); + } + + if (lua_type(L, 1) != LUA_TSTRING) { + luaL_argerror(L, 1, "expected string"); + } + + size_t src_len = 0; + const char *src = lua_tolstring(L, 1, &src_len); + + const char *ret = base64_decode(src, src_len); + if (ret == NULL) { + return luaL_error(L, "Invalid input"); + } + + lua_pushstring(L, ret); + xfree((void *)ret); + + return 1; +} + +static const luaL_Reg base64_functions[] = { + { "encode", nlua_base64_encode }, + { "decode", nlua_base64_decode }, + { NULL, NULL }, +}; + +int luaopen_base64(lua_State *L) +{ + lua_newtable(L); + luaL_register(L, NULL, base64_functions); + return 1; +} diff --git a/src/nvim/lua/base64.h b/src/nvim/lua/base64.h new file mode 100644 index 0000000000..570d9eb677 --- /dev/null +++ b/src/nvim/lua/base64.h @@ -0,0 +1,12 @@ +#ifndef NVIM_LUA_BASE64_H +#define NVIM_LUA_BASE64_H + +#include +#include +#include + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "lua/base64.h.generated.h" +#endif + +#endif // NVIM_LUA_BASE64_H diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c index 14e9902ee2..60be771b6c 100644 --- a/src/nvim/lua/stdlib.c +++ b/src/nvim/lua/stdlib.c @@ -26,6 +26,7 @@ #include "nvim/ex_eval.h" #include "nvim/fold.h" #include "nvim/globals.h" +#include "nvim/lua/base64.h" #include "nvim/lua/converter.h" #include "nvim/lua/spell.h" #include "nvim/lua/stdlib.h" @@ -606,6 +607,10 @@ void nlua_state_add_stdlib(lua_State *const lstate, bool is_thread) lua_pushcfunction(lstate, &nlua_iconv); lua_setfield(lstate, -2, "iconv"); + // vim.base64 + luaopen_base64(lstate); + lua_setfield(lstate, -2, "base64"); + nlua_state_add_internal(lstate); } diff --git a/test/functional/lua/base64_spec.lua b/test/functional/lua/base64_spec.lua new file mode 100644 index 0000000000..f0d112c23e --- /dev/null +++ b/test/functional/lua/base64_spec.lua @@ -0,0 +1,105 @@ +local helpers = require('test.functional.helpers')(after_each) +local clear = helpers.clear +local exec_lua = helpers.exec_lua +local eq = helpers.eq +local pcall_err = helpers.pcall_err +local matches = helpers.matches + +describe('vim.base64', function() + before_each(clear) + + local function encode(s) + return exec_lua([[return vim.base64.encode(...)]], s) + end + + local function decode(s) + return exec_lua([[return vim.base64.decode(...)]], s) + end + + it('works', function() + local values = { + '', + 'Many hands make light work.', + [[ + Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in + my purse, and nothing particular to interest me on shore, I thought I would sail about a + little and see the watery part of the world. + ]], + [[ + It is a truth universally acknowledged, that a single man in possession of a good fortune, + must be in want of a wife. + ]], + 'Happy families are all alike; every unhappy family is unhappy in its own way.', + 'ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя', + 'ÅÍÎÏ˝ÓÔÒÚÆ☃', + '𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆', + '👨‍👩‍👦 👨‍👩‍👧‍👦 👨‍👨‍👦 👩‍👩‍👧 👨‍👦 👨‍👧‍👦 👩‍👦 👩‍👧‍👦', + 'مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،', + [[ + Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣ + ̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰ + ̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟ + ̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕ + Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮ + ]], + } + + for _, v in ipairs(values) do + eq(v, decode(encode(v))) + end + + -- Explicitly check encoded output + eq('VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZwo=', encode('The quick brown fox jumps over the lazy dog\n')) + + -- Test vectors from rfc4648 + local rfc4648 = { + { '', '' }, + { 'f', 'Zg==', }, + { 'fo', 'Zm8=' }, + { 'foo', 'Zm9v' }, + { 'foob', 'Zm9vYg==' }, + { 'fooba', 'Zm9vYmE=' }, + { 'foobar', 'Zm9vYmFy' }, + } + + for _, v in ipairs(rfc4648) do + local input = v[1] + local output = v[2] + eq(output, encode(input)) + eq(input, decode(output)) + end + end) + + it('detects invalid input', function() + local invalid = { + 'A', + 'AA', + 'AAA', + 'A..A', + 'AA=A', + 'AA/=', + 'A/==', + 'A===', + '====', + 'Zm9vYmFyZm9vYmFyA..A', + 'Zm9vYmFyZm9vYmFyAA=A', + 'Zm9vYmFyZm9vYmFyAA/=', + 'Zm9vYmFyZm9vYmFyA/==', + 'Zm9vYmFyZm9vYmFyA===', + 'A..AZm9vYmFyZm9vYmFy', + 'Zm9vYmFyZm9vAA=A', + 'Zm9vYmFyZm9vAA/=', + 'Zm9vYmFyZm9vA/==', + 'Zm9vYmFyZm9vA===', + } + + for _, v in ipairs(invalid) do + eq('Invalid input', pcall_err(decode, v)) + end + + eq('Expected 1 argument', pcall_err(encode)) + eq('Expected 1 argument', pcall_err(decode)) + matches('expected string', pcall_err(encode, 42)) + matches('expected string', pcall_err(decode, 42)) + end) +end)