vim-patch:8.2.{1536,1540}: charclass() (#19748)

vim-patch:8.2.1536: cannot get the class of a character; emoji widths are wrong

Problem:    Cannot get the class of a character; emoji widths are wrong in
            some environments.
Solution:   Add charclass(). Update some emoji widths.  Add script to check
            emoji widths.
4e4473c927

Use latest charclass() docs from Vim.
Rewrite DoIt() in emoji_list.vim in Lua.
Omit emoji table updates:
- emoji_width update looks wrong as these added ranges are only double-width when followed by 0xFE0F.
- Other updates are too old.

vim-patch:8.2.1540: the user cannot try out emoji character widths

Problem:    The user cannot try out emoji character widths.
Solution:   Move the emoji script to the runtime/tools directory.
98945560c1
This commit is contained in:
zeertzjq 2022-08-13 11:29:38 +08:00 committed by GitHub
parent 6f14c5d2dd
commit 754892e59d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 6 deletions

View File

@ -77,6 +77,7 @@ changenr() Number current change number
chanclose({id} [, {stream}]) Number Closes a channel or one of its streams chanclose({id} [, {stream}]) Number Closes a channel or one of its streams
chansend({id}, {data}) Number Writes {data} to channel chansend({id}, {data}) Number Writes {data} to channel
char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr} char2nr({expr} [, {utf8}]) Number ASCII/UTF-8 value of first char in {expr}
charclass({string}) Number character class of {string}
charcol({expr}) Number column number of cursor or mark charcol({expr}) Number column number of cursor or mark
charidx({string}, {idx} [, {countcc}]) charidx({string}, {idx} [, {countcc}])
Number char index of byte {idx} in {string} Number char index of byte {idx} in {string}
@ -1064,7 +1065,19 @@ char2nr({string} [, {utf8}]) *char2nr()*
Can also be used as a |method|: > Can also be used as a |method|: >
GetChar()->char2nr() GetChar()->char2nr()
<
charclass({string}) *charclass()*
Return the character class of the first character in {string}.
The character class is one of:
0 blank
1 punctuation
2 word character
3 emoji
other specific Unicode class
The class is used in patterns and word motions.
Returns 0 if {string} is not a |String|.
*charcol()* *charcol()*
charcol({expr}) Same as |col()| but returns the character index of the column charcol({expr}) Same as |col()| but returns the character index of the column
position given with {expr} instead of the byte position. position given with {expr} instead of the byte position.

View File

@ -606,6 +606,7 @@ String manipulation: *string-functions*
strtrans() translate a string to make it printable strtrans() translate a string to make it printable
tolower() turn a string to lowercase tolower() turn a string to lowercase
toupper() turn a string to uppercase toupper() turn a string to uppercase
charclass() class of a character
match() position where a pattern matches in a string match() position where a pattern matches in a string
matchend() position where a pattern match ends in a string matchend() position where a pattern match ends in a string
matchfuzzy() fuzzy matches a string in a list of strings matchfuzzy() fuzzy matches a string in a list of strings

View File

@ -0,0 +1,21 @@
" Script to fill the window with emoji characters, one per line.
" Source this script: :source %
if &modified
new
else
enew
endif
lua << EOF
local lnum = 1
for c = 0x100, 0x1ffff do
local cs = vim.fn.nr2char(c)
if vim.fn.charclass(cs) == 3 then
vim.fn.setline(lnum, '|' .. cs .. '| ' .. vim.fn.strwidth(cs))
lnum = lnum + 1
end
end
EOF
set nomodified

View File

@ -72,6 +72,7 @@ return {
chanclose={args={1, 2}}, chanclose={args={1, 2}},
chansend={args=2}, chansend={args=2},
char2nr={args={1, 2}, base=1}, char2nr={args={1, 2}, base=1},
charclass={args=1, base=1},
charcol={args=1, base=1}, charcol={args=1, base=1},
charidx={args={2, 3}, base=1}, charidx={args={2, 3}, base=1},
chdir={args=1, base=1}, chdir={args=1, base=1},

View File

@ -1182,6 +1182,11 @@ int utf_class_tab(const int c, const uint64_t *const chartab)
return 1; // punctuation return 1; // punctuation
} }
// emoji
if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) {
return 3;
}
// binary search in table // binary search in table
while (top >= bot) { while (top >= bot) {
mid = (bot + top) / 2; mid = (bot + top) / 2;
@ -1194,11 +1199,6 @@ int utf_class_tab(const int c, const uint64_t *const chartab)
} }
} }
// emoji
if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) {
return 3;
}
// most other characters are "word" characters // most other characters are "word" characters
return 2; return 2;
} }
@ -2858,3 +2858,14 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr)
xfree(cw_table_save); xfree(cw_table_save);
redraw_all_later(NOT_VALID); redraw_all_later(NOT_VALID);
} }
void f_charclass(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
if (argvars[0].v_type != VAR_STRING
|| argvars[0].vval.v_string == NULL
|| *argvars[0].vval.v_string == NUL) {
emsg(_(e_stringreq));
return;
}
rettv->vval.v_number = mb_get_class((const char_u *)argvars[0].vval.v_string);
}

View File

@ -1769,6 +1769,13 @@ func Test_char2nr()
call assert_equal(12354, char2nr('あ', 1)) call assert_equal(12354, char2nr('あ', 1))
endfunc endfunc
func Test_charclass()
call assert_equal(0, charclass(' '))
call assert_equal(1, charclass('.'))
call assert_equal(2, charclass('x'))
call assert_equal(3, charclass("\u203c"))
endfunc
func Test_eventhandler() func Test_eventhandler()
call assert_equal(0, eventhandler()) call assert_equal(0, eventhandler())
endfunc endfunc