feat(stdlib): add vim.base64 module (#25843)

Add base64 encode() and decode() functions to a vim.base64 module.
This commit is contained in:
Gregory Anders 2023-10-31 09:15:32 -05:00 committed by GitHub
parent adbe7f3683
commit 224f303ee5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 448 additions and 1 deletions

View File

@ -809,6 +809,28 @@ vim.json.encode({obj}) *vim.json.encode()*
(string)
==============================================================================
VIM.BASE64 *vim.base64*
vim.base64.decode({str}) *vim.base64.decode()*
Decode a Base64 encoded string.
Parameters: ~
• {str} (string) Base64 encoded string
Return: ~
(string) Decoded string
vim.base64.encode({str}) *vim.base64.encode()*
Encode {str} using Base64.
Parameters: ~
• {str} (string) String to encode
Return: ~
(string) Encoded string
==============================================================================
VIM.SPELL *vim.spell*

View File

@ -202,6 +202,9 @@ The following new APIs and features were added.
• 'complete' option supports "f" flag for completing buffer names.
• Added |vim.base64.encode()| and |vim.base64.decode()| for encoding and decoding
strings using Base64 encoding.
==============================================================================
CHANGED FEATURES *news-changed*

View File

@ -0,0 +1,13 @@
--- @meta
--- Encode {str} using Base64.
---
--- @param str string String to encode
--- @return string Encoded string
function vim.base64.encode(str) end
--- Decode a Base64 encoded string.
---
--- @param str string Base64 encoded string
--- @return string Decoded string
function vim.base64.decode(str) end

View File

@ -150,6 +150,7 @@ CONFIG = {
'diff.lua',
'mpack.lua',
'json.lua',
'base64.lua',
'spell.lua',
'builtin.lua',
'_options.lua',
@ -187,6 +188,7 @@ CONFIG = {
'runtime/lua/vim/_meta/diff.lua',
'runtime/lua/vim/_meta/mpack.lua',
'runtime/lua/vim/_meta/json.lua',
'runtime/lua/vim/_meta/base64.lua',
'runtime/lua/vim/_meta/regex.lua',
'runtime/lua/vim/_meta/spell.lua',
],
@ -206,7 +208,7 @@ CONFIG = {
'section_fmt': lambda name: (
'Lua module: vim' if name.lower() == '_editor' else
'LUA-VIMSCRIPT BRIDGE' if name.lower() == '_options' else
f'VIM.{name.upper()}' if name.lower() in [ 'highlight', 'mpack', 'json', 'diff', 'spell', 'regex' ] else
f'VIM.{name.upper()}' if name.lower() in [ 'highlight', 'mpack', 'json', 'base64', 'diff', 'spell', 'regex' ] else
'VIM' if name.lower() == 'builtin' else
f'Lua module: vim.{name.lower()}'),
'helptag_fmt': lambda name: (
@ -241,6 +243,7 @@ CONFIG = {
'builtin': 'vim',
'mpack': 'vim.mpack',
'json': 'vim.json',
'base64': 'vim.base64',
'regex': 'vim.regex',
'spell': 'vim.spell',
},

209
src/nvim/base64.c Normal file
View File

@ -0,0 +1,209 @@
#include <assert.h>
#include <stddef.h>
#include <string.h>
#include "nvim/base64.h"
#include "nvim/memory.h"
#ifdef HAVE_BE64TOH
# include ENDIAN_INCLUDE_FILE
#endif
static const char alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
// Indices are 1-based because we use 0 to indicate a letter that is not part of the alphabet
static const uint8_t char_to_index[256] = {
['A'] = 1, ['B'] = 2, ['C'] = 3, ['D'] = 4, ['E'] = 5, ['F'] = 6, ['G'] = 7, ['H'] = 8,
['I'] = 9, ['J'] = 10, ['K'] = 11, ['L'] = 12, ['M'] = 13, ['N'] = 14, ['O'] = 15, ['P'] = 16,
['Q'] = 17, ['R'] = 18, ['S'] = 19, ['T'] = 20, ['U'] = 21, ['V'] = 22, ['W'] = 23, ['X'] = 24,
['Y'] = 25, ['Z'] = 26, ['a'] = 27, ['b'] = 28, ['c'] = 29, ['d'] = 30, ['e'] = 31, ['f'] = 32,
['g'] = 33, ['h'] = 34, ['i'] = 35, ['j'] = 36, ['k'] = 37, ['l'] = 38, ['m'] = 39, ['n'] = 40,
['o'] = 41, ['p'] = 42, ['q'] = 43, ['r'] = 44, ['s'] = 45, ['t'] = 46, ['u'] = 47, ['v'] = 48,
['w'] = 49, ['x'] = 50, ['y'] = 51, ['z'] = 52, ['0'] = 53, ['1'] = 54, ['2'] = 55, ['3'] = 56,
['4'] = 57, ['5'] = 58, ['6'] = 59, ['7'] = 60, ['8'] = 61, ['9'] = 62, ['+'] = 63, ['/'] = 64,
};
#ifndef HAVE_BE64TOH
static inline uint64_t htobe64(uint64_t host_64bits)
{
# ifdef ORDER_BIG_ENDIAN
return host_64bits;
# else
uint8_t *buf = (uint8_t *)&host_64bits;
uint64_t ret = 0;
for (size_t i = 8; i; i--) {
ret |= ((uint64_t)buf[i - 1]) << ((8 - i) * 8);
}
return ret;
# endif
}
static inline uint32_t htobe32(uint32_t host_32bits)
{
# ifdef ORDER_BIG_ENDIAN
return host_32bits;
# else
uint8_t *buf = (uint8_t *)&host_32bits;
uint32_t ret = 0;
for (size_t i = 4; i; i--) {
ret |= ((uint32_t)buf[i - 1]) << ((4 - i) * 8);
}
return ret;
# endif
}
#endif
/// Encode a string using Base64.
///
/// @param src String to encode
/// @param src_len Length of the string
/// @return Base64 encoded string
char *base64_encode(const char *src, size_t src_len)
{
assert(src != NULL);
const size_t out_len = ((src_len + 2) / 3) * 4;
char *dest = xmalloc(out_len + 1);
size_t src_i = 0;
size_t out_i = 0;
const uint8_t *s = (const uint8_t *)src;
// Read 8 bytes at a time as much as we can
for (; src_i + 7 < src_len; src_i += 6) {
uint64_t bits_h;
memcpy(&bits_h, &s[src_i], sizeof(uint64_t));
const uint64_t bits_be = htobe64(bits_h);
dest[out_i + 0] = alphabet[(bits_be >> 58) & 0x3F];
dest[out_i + 1] = alphabet[(bits_be >> 52) & 0x3F];
dest[out_i + 2] = alphabet[(bits_be >> 46) & 0x3F];
dest[out_i + 3] = alphabet[(bits_be >> 40) & 0x3F];
dest[out_i + 4] = alphabet[(bits_be >> 34) & 0x3F];
dest[out_i + 5] = alphabet[(bits_be >> 28) & 0x3F];
dest[out_i + 6] = alphabet[(bits_be >> 22) & 0x3F];
dest[out_i + 7] = alphabet[(bits_be >> 16) & 0x3F];
out_i += sizeof(uint64_t);
}
for (; src_i + 3 < src_len; src_i += 3) {
uint32_t bits_h;
memcpy(&bits_h, &s[src_i], sizeof(uint32_t));
const uint32_t bits_be = htobe32(bits_h);
dest[out_i + 0] = alphabet[(bits_be >> 26) & 0x3F];
dest[out_i + 1] = alphabet[(bits_be >> 20) & 0x3F];
dest[out_i + 2] = alphabet[(bits_be >> 14) & 0x3F];
dest[out_i + 3] = alphabet[(bits_be >> 8) & 0x3F];
out_i += sizeof(uint32_t);
}
if (src_i + 2 < src_len) {
dest[out_i + 0] = alphabet[s[src_i] >> 2];
dest[out_i + 1] = alphabet[((s[src_i] & 0x3) << 4) | (s[src_i + 1] >> 4)];
dest[out_i + 2] = alphabet[(s[src_i + 1] & 0xF) << 2 | (s[src_i + 2] >> 6)];
dest[out_i + 3] = alphabet[(s[src_i + 2] & 0x3F)];
out_i += 4;
} else if (src_i + 1 < src_len) {
dest[out_i + 0] = alphabet[s[src_i] >> 2];
dest[out_i + 1] = alphabet[((s[src_i] & 0x3) << 4) | (s[src_i + 1] >> 4)];
dest[out_i + 2] = alphabet[(s[src_i + 1] & 0xF) << 2];
out_i += 3;
} else if (src_i < src_len) {
dest[out_i + 0] = alphabet[s[src_i] >> 2];
dest[out_i + 1] = alphabet[(s[src_i] & 0x3) << 4];
out_i += 2;
}
for (; out_i < out_len; out_i++) {
dest[out_i] = '=';
}
dest[out_len] = '\0';
return dest;
}
/// Decode a Base64 encoded string.
///
/// @param src Base64 encoded string
/// @param src_len Length of {src}
/// @return Decoded string
char *base64_decode(const char *src, size_t src_len)
{
assert(src != NULL);
char *dest = NULL;
if (src_len % 4 != 0) {
goto invalid;
}
size_t out_len = (src_len / 4) * 3;
if (src_len >= 1 && src[src_len - 1] == '=') {
out_len--;
}
if (src_len >= 2 && src[src_len - 2] == '=') {
out_len--;
}
const uint8_t *s = (const uint8_t *)src;
dest = xmalloc(out_len + 1);
int acc = 0;
int acc_len = 0;
size_t out_i = 0;
size_t src_i = 0;
int leftover_i = -1;
for (; src_i < src_len; src_i++) {
const uint8_t c = s[src_i];
const uint8_t d = char_to_index[c];
if (d == 0) {
if (c == '=') {
leftover_i = (int)src_i;
break;
}
goto invalid;
}
acc = ((acc << 6) & 0xFFF) + (d - 1);
acc_len += 6;
if (acc_len >= 8) {
acc_len -= 8;
dest[out_i] = (char)(acc >> acc_len);
out_i += 1;
}
}
if (acc_len > 4 || ((acc & ((1 << acc_len) - 1)) != 0)) {
goto invalid;
}
if (leftover_i >= -1) {
int padding_len = acc_len / 2;
int padding_chars = 0;
for (; (size_t)leftover_i < src_len; leftover_i++) {
const uint8_t c = s[leftover_i];
if (c != '=') {
goto invalid;
}
padding_chars += 1;
}
if (padding_chars != padding_len) {
goto invalid;
}
}
dest[out_len] = '\0';
return dest;
invalid:
if (dest) {
xfree((void *)dest);
}
return NULL;
}

10
src/nvim/base64.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef NVIM_BASE64_H
#define NVIM_BASE64_H
#include <stddef.h>
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "base64.h.generated.h"
#endif
#endif // NVIM_BASE64_H

65
src/nvim/lua/base64.c Normal file
View File

@ -0,0 +1,65 @@
#include <assert.h>
#include <lauxlib.h>
#include <lua.h>
#include "nvim/base64.h"
#include "nvim/lua/base64.h"
#include "nvim/memory.h"
static int nlua_base64_encode(lua_State *L)
{
if (lua_gettop(L) < 1) {
return luaL_error(L, "Expected 1 argument");
}
if (lua_type(L, 1) != LUA_TSTRING) {
luaL_argerror(L, 1, "expected string");
}
size_t src_len = 0;
const char *src = lua_tolstring(L, 1, &src_len);
const char *ret = base64_encode(src, src_len);
assert(ret != NULL);
lua_pushstring(L, ret);
xfree((void *)ret);
return 1;
}
static int nlua_base64_decode(lua_State *L)
{
if (lua_gettop(L) < 1) {
return luaL_error(L, "Expected 1 argument");
}
if (lua_type(L, 1) != LUA_TSTRING) {
luaL_argerror(L, 1, "expected string");
}
size_t src_len = 0;
const char *src = lua_tolstring(L, 1, &src_len);
const char *ret = base64_decode(src, src_len);
if (ret == NULL) {
return luaL_error(L, "Invalid input");
}
lua_pushstring(L, ret);
xfree((void *)ret);
return 1;
}
static const luaL_Reg base64_functions[] = {
{ "encode", nlua_base64_encode },
{ "decode", nlua_base64_decode },
{ NULL, NULL },
};
int luaopen_base64(lua_State *L)
{
lua_newtable(L);
luaL_register(L, NULL, base64_functions);
return 1;
}

12
src/nvim/lua/base64.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef NVIM_LUA_BASE64_H
#define NVIM_LUA_BASE64_H
#include <lauxlib.h>
#include <lua.h>
#include <lualib.h>
#ifdef INCLUDE_GENERATED_DECLARATIONS
# include "lua/base64.h.generated.h"
#endif
#endif // NVIM_LUA_BASE64_H

View File

@ -26,6 +26,7 @@
#include "nvim/ex_eval.h"
#include "nvim/fold.h"
#include "nvim/globals.h"
#include "nvim/lua/base64.h"
#include "nvim/lua/converter.h"
#include "nvim/lua/spell.h"
#include "nvim/lua/stdlib.h"
@ -606,6 +607,10 @@ void nlua_state_add_stdlib(lua_State *const lstate, bool is_thread)
lua_pushcfunction(lstate, &nlua_iconv);
lua_setfield(lstate, -2, "iconv");
// vim.base64
luaopen_base64(lstate);
lua_setfield(lstate, -2, "base64");
nlua_state_add_internal(lstate);
}

View File

@ -0,0 +1,105 @@
local helpers = require('test.functional.helpers')(after_each)
local clear = helpers.clear
local exec_lua = helpers.exec_lua
local eq = helpers.eq
local pcall_err = helpers.pcall_err
local matches = helpers.matches
describe('vim.base64', function()
before_each(clear)
local function encode(s)
return exec_lua([[return vim.base64.encode(...)]], s)
end
local function decode(s)
return exec_lua([[return vim.base64.decode(...)]], s)
end
it('works', function()
local values = {
'',
'Many hands make light work.',
[[
Call me Ishmael. Some years agonever mind how long preciselyhaving little or no money in
my purse, and nothing particular to interest me on shore, I thought I would sail about a
little and see the watery part of the world.
]],
[[
It is a truth universally acknowledged, that a single man in possession of a good fortune,
must be in want of a wife.
]],
'Happy families are all alike; every unhappy family is unhappy in its own way.',
'ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя',
'ÅÍÎÏ˝ÓÔÒÚÆ☃',
'𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆',
'👨‍👩‍👦 👨‍👩‍👧‍👦 👨‍👨‍👦 👩‍👩‍👧 👨‍👦 👨‍👧‍👦 👩‍👦 👩‍👧‍👦',
'مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ،',
[[
̺̺̕ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣
̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰
̗̺͖̹̯͓̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟
̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕
Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
]],
}
for _, v in ipairs(values) do
eq(v, decode(encode(v)))
end
-- Explicitly check encoded output
eq('VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZwo=', encode('The quick brown fox jumps over the lazy dog\n'))
-- Test vectors from rfc4648
local rfc4648 = {
{ '', '' },
{ 'f', 'Zg==', },
{ 'fo', 'Zm8=' },
{ 'foo', 'Zm9v' },
{ 'foob', 'Zm9vYg==' },
{ 'fooba', 'Zm9vYmE=' },
{ 'foobar', 'Zm9vYmFy' },
}
for _, v in ipairs(rfc4648) do
local input = v[1]
local output = v[2]
eq(output, encode(input))
eq(input, decode(output))
end
end)
it('detects invalid input', function()
local invalid = {
'A',
'AA',
'AAA',
'A..A',
'AA=A',
'AA/=',
'A/==',
'A===',
'====',
'Zm9vYmFyZm9vYmFyA..A',
'Zm9vYmFyZm9vYmFyAA=A',
'Zm9vYmFyZm9vYmFyAA/=',
'Zm9vYmFyZm9vYmFyA/==',
'Zm9vYmFyZm9vYmFyA===',
'A..AZm9vYmFyZm9vYmFy',
'Zm9vYmFyZm9vAA=A',
'Zm9vYmFyZm9vAA/=',
'Zm9vYmFyZm9vA/==',
'Zm9vYmFyZm9vA===',
}
for _, v in ipairs(invalid) do
eq('Invalid input', pcall_err(decode, v))
end
eq('Expected 1 argument', pcall_err(encode))
eq('Expected 1 argument', pcall_err(decode))
matches('expected string', pcall_err(encode, 42))
matches('expected string', pcall_err(decode, 42))
end)
end)