Blender V5.0
string_cursor_utf8.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <algorithm>
10#include <cstdio>
11#include <cstdlib>
12
13#include "BLI_string_utf8.h"
14#include "BLI_utildefines.h"
15
16#include "BLI_string_cursor_utf8.h" /* own include */
17
18#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
19
42
44{
45 switch (uch) {
46 case ',':
47 case '.':
48 case 0x2026: /* Horizontal ellipsis. */
49 case 0x3002: /* CJK full width full stop. */
50 case 0xFF0C: /* CJK full width comma. */
51 case 0xFF61: /* CJK half width full stop. */
52 return STRCUR_DELIM_PUNCT;
53
54 case '{':
55 case '}':
56 case '[':
57 case ']':
58 case '(':
59 case ')':
60 case 0x3010: /* CJK full width left black lenticular bracket. */
61 case 0x3011: /* CJK full width right black lenticular bracket. */
62 case 0xFF08: /* CJK full width left parenthesis. */
63 case 0xFF09: /* CJK full width right parenthesis. */
64 return STRCUR_DELIM_BRACE;
65
66 case '+':
67 case '-':
68 case '=':
69 case '~':
70 case '%':
71 case '/':
72 case '<':
73 case '>':
74 case '^':
75 case '*':
76 case '&':
77 case '|':
78 case 0x2014: /* Em dash. */
79 case 0x300A: /* CJK full width left double angle bracket. */
80 case 0x300B: /* CJK full width right double angle bracket. */
81 case 0xFF0F: /* CJK full width solidus (forward slash). */
82 case 0xFF5E: /* CJK full width tilde. */
84
85 case '\'':
86 case '\"':
87 case '`':
88 case 0xB4: /* Acute accent. */
89 case 0x2018: /* Left single quotation mark. */
90 case 0x2019: /* Right single quotation mark. */
91 case 0x201C: /* Left double quotation mark. */
92 case 0x201D: /* Right double quotation mark. */
93 return STRCUR_DELIM_QUOTE;
94
95 case ' ':
96 case '\t':
97 case '\n':
99
100 case '\\':
101 case '@':
102 case '#':
103 case '$':
104 case ':':
105 case ';':
106 case '?':
107 case '!':
108 case 0xA3: /* Pound sign. */
109 case 0x80: /* Euro sign. */
110 case 0x3001: /* CJK ideographic comma. */
111 case 0xFF01: /* CJK full width exclamation mark. */
112 case 0xFF64: /* CJK half width ideographic comma. */
113 case 0xFF65: /* Katakana half width middle dot. */
114 case 0xFF1A: /* CJK full width colon. */
115 case 0xFF1B: /* CJK full width semicolon. */
116 case 0xFF1F: /* CJK full width question mark. */
117 /* case '_': */ /* special case, for python */
118 return STRCUR_DELIM_OTHER;
119
120 default:
121 break;
122 }
123 return STRCUR_DELIM_ALPHANUMERIC; /* Not quite true, but ok for now */
124}
125
127 const int ch_utf8_len,
128 const int pos)
129{
130 BLI_assert(ch_utf8_len >= 0);
131 /* for full unicode support we really need to have large lookup tables to figure
132 * out what's what in every possible char set - and python, glib both have these. */
133 size_t index = size_t(pos);
134 uint uch = BLI_str_utf8_as_unicode_step_or_error(ch_utf8, size_t(ch_utf8_len), &index);
135 return cursor_delim_type_unicode(uch);
136}
137
138bool BLI_str_cursor_step_next_utf8(const char *str, const int str_maxlen, int *pos)
139{
140 /* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf32. */
141 BLI_assert(str_maxlen >= 0);
142 BLI_assert(*pos >= 0);
143
144 if (*pos >= str_maxlen) {
145 return false;
146 }
147 const char *str_end = str + (str_maxlen + 1);
148 const char *str_pos = str + *pos;
149 const char *str_next = str_pos;
150 do {
151 str_next = BLI_str_find_next_char_utf8(str_next, str_end);
152 } while ((str_next < str_end) && (str_next[0] != 0) &&
153 (BLI_str_utf8_char_width_or_error(str_next) == 0));
154 *pos += int(str_next - str_pos);
155 *pos = std::min(*pos, str_maxlen);
156
157 return true;
158}
159
160bool BLI_str_cursor_step_prev_utf8(const char *str, const int str_maxlen, int *pos)
161{
162 /* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf32. */
163 BLI_assert(str_maxlen >= 0);
164 BLI_assert(*pos >= 0);
165
166 if ((*pos > 0) && (*pos <= str_maxlen)) {
167 const char *str_pos = str + *pos;
168 const char *str_prev = str_pos;
169 do {
170 str_prev = BLI_str_find_prev_char_utf8(str_prev, str);
171 } while ((str_prev > str) && (BLI_str_utf8_char_width_or_error(str_prev) == 0));
172 *pos -= int(str_pos - str_prev);
173 return true;
174 }
175
176 return false;
177}
178
180 const int str_maxlen,
181 int *pos,
182 eStrCursorJumpDirection direction,
184 bool use_init_step)
185{
186 BLI_assert(str_maxlen >= 0);
187 const int pos_orig = *pos;
188
189 if (direction == STRCUR_DIR_NEXT) {
190 if (use_init_step) {
192 }
193 else {
195 }
196
197 if (jump != STRCUR_JUMP_NONE) {
198 const eStrCursorDelimType delim_type = (*pos < str_maxlen) ?
199 cursor_delim_type_utf8(str, str_maxlen, *pos) :
201 /* jump between special characters (/,\,_,-, etc.),
202 * look at function cursor_delim_type() for complete
203 * list of special character, ctr -> */
204 while (*pos < str_maxlen) {
205 if (BLI_str_cursor_step_next_utf8(str, str_maxlen, pos)) {
206 if (*pos == str_maxlen) {
207 break;
208 }
209 if ((jump != STRCUR_JUMP_ALL) &&
210 (delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos)))
211 {
212 break;
213 }
214 }
215 else {
216 break; /* unlikely but just in case */
217 }
218 }
219 }
220 }
221 else if (direction == STRCUR_DIR_PREV) {
222 if (use_init_step) {
224 }
225 else {
227 }
228
229 if (jump != STRCUR_JUMP_NONE) {
230 const eStrCursorDelimType delim_type = (*pos > 0) ? cursor_delim_type_utf8(
231 str, str_maxlen, *pos - 1) :
233 /* jump between special characters (/,\,_,-, etc.),
234 * look at function cursor_delim_type() for complete
235 * list of special character, ctr -> */
236 while (*pos > 0) {
237 const int pos_prev = *pos;
238 if (BLI_str_cursor_step_prev_utf8(str, str_maxlen, pos)) {
239 if ((jump != STRCUR_JUMP_ALL) &&
240 (delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos)))
241 {
242 /* left only: compensate for index/change in direction */
243 if ((pos_orig - *pos) >= 1) {
244 *pos = pos_prev;
245 }
246 break;
247 }
248 }
249 else {
250 break;
251 }
252 }
253 }
254 }
255 else {
257 }
258}
259
260bool BLI_str_cursor_step_next_utf32(const char32_t *str, const int str_maxlen, int *pos)
261{
262 /* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf8. */
263 BLI_assert(str_maxlen >= 0);
264 BLI_assert(*pos >= 0);
265
266 if (*pos >= str_maxlen) {
267 return false;
268 }
269 do {
270 (*pos)++;
271 } while ((*pos < str_maxlen) && (str[*pos] != 0) && (BLI_wcwidth_or_error(str[*pos]) == 0));
272
273 return true;
274}
275
276bool BLI_str_cursor_step_prev_utf32(const char32_t *str, const int str_maxlen, int *pos)
277{
278 /* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf8. */
279 BLI_assert(str_maxlen >= 0);
280 BLI_assert(*pos >= 0);
281 UNUSED_VARS_NDEBUG(str_maxlen);
282
283 if (*pos <= 0) {
284 return false;
285 }
286 do {
287 (*pos)--;
288 } while ((*pos > 0) && (BLI_wcwidth_or_error(str[*pos]) == 0));
289
290 return true;
291}
292
293void BLI_str_cursor_step_utf32(const char32_t *str,
294 const int str_maxlen,
295 int *pos,
296 eStrCursorJumpDirection direction,
298 bool use_init_step)
299{
300 BLI_assert(str_maxlen >= 0);
301 const int pos_orig = *pos;
302
303 if (direction == STRCUR_DIR_NEXT) {
304 if (use_init_step) {
306 }
307 else {
309 }
310
311 if (jump != STRCUR_JUMP_NONE) {
312 const eStrCursorDelimType delim_type = (*pos < str_maxlen) ?
315 /* jump between special characters (/,\,_,-, etc.),
316 * look at function cursor_delim_type_unicode() for complete
317 * list of special character, ctr -> */
318 while (*pos < str_maxlen) {
319 if (BLI_str_cursor_step_next_utf32(str, str_maxlen, pos)) {
320 if ((jump != STRCUR_JUMP_ALL) &&
321 (delim_type != cursor_delim_type_unicode(uint(str[*pos]))))
322 {
323 break;
324 }
325 }
326 else {
327 break; /* unlikely but just in case */
328 }
329 }
330 }
331 }
332 else if (direction == STRCUR_DIR_PREV) {
333 if (use_init_step) {
335 }
336 else {
338 }
339
340 if (jump != STRCUR_JUMP_NONE) {
341 const eStrCursorDelimType delim_type = (*pos > 0) ?
344 /* jump between special characters (/,\,_,-, etc.),
345 * look at function cursor_delim_type() for complete
346 * list of special character, ctr -> */
347 while (*pos > 0) {
348 const int pos_prev = *pos;
349 if (BLI_str_cursor_step_prev_utf32(str, str_maxlen, pos)) {
350 if ((jump != STRCUR_JUMP_ALL) &&
351 (delim_type != cursor_delim_type_unicode(uint(str[*pos]))))
352 {
353 /* left only: compensate for index/change in direction */
354 if ((pos_orig - *pos) >= 1) {
355 *pos = pos_prev;
356 }
357 break;
358 }
359 }
360 else {
361 break;
362 }
363 }
364 }
365 }
366 else {
368 }
369}
370
372 const char *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
373{
374 BLI_assert(str_maxlen >= 0);
375 BLI_assert(pos >= 0 && pos <= str_maxlen);
376 /* Identify the type of characters are on either side of the current cursor position. */
377 const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_utf8(str, str_maxlen, pos - 1) :
379 const eStrCursorDelimType next = (pos < str_maxlen) ?
380 cursor_delim_type_utf8(str, str_maxlen, pos) :
382 *r_start = pos;
383 *r_end = pos;
384
385 if (prev != STRCUR_DELIM_NONE) {
386 if ((prev <= next) || (next == STRCUR_DELIM_NONE)) {
387 /* Expand backward if we are between similar content. */
389 str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
390 }
391 }
392 if (next != STRCUR_DELIM_NONE) {
393 if ((next <= prev) || (prev == STRCUR_DELIM_NONE)) {
394 /* Expand forward if we are between similar content. */
396 }
397 }
398}
399
401 const char32_t *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
402{
403 BLI_assert(str_maxlen >= 0);
404 BLI_assert(pos >= 0 && pos <= str_maxlen);
405 /* Identify the type of characters are on either side of the current cursor position. */
406 const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_unicode(str[pos - 1]) :
408 const eStrCursorDelimType next = (pos < str_maxlen) ? cursor_delim_type_unicode(str[pos]) :
410 *r_start = pos;
411 *r_end = pos;
412
413 if (prev != STRCUR_DELIM_NONE) {
414 if ((prev <= next) || (next == STRCUR_DELIM_NONE)) {
415 /* Expand backward if we are between similar content. */
417 str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
418 }
419 }
420 if (next != STRCUR_DELIM_NONE) {
421 if ((next <= prev) || (prev == STRCUR_DELIM_NONE)) {
422 /* Expand forward if we are between similar content. */
424 }
425 }
426}
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
eStrCursorJumpDirection
@ STRCUR_DIR_NEXT
@ STRCUR_DIR_PREV
@ STRCUR_JUMP_ALL
@ STRCUR_JUMP_NONE
@ STRCUR_JUMP_DELIM
const char const char * BLI_str_find_next_char_utf8(const char *p, const char *str_end) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
int BLI_str_utf8_char_width_or_error(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
unsigned int unsigned int BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
const char * BLI_str_find_prev_char_utf8(const char *p, const char *str_start) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
size_t size_t int BLI_wcwidth_or_error(char32_t ucs) ATTR_WARN_UNUSED_RESULT
unsigned int uint
#define UNUSED_VARS_NDEBUG(...)
void jump(const btVector3 &v=btVector3(0, 0, 0))
#define str(s)
uint pos
static ulong * next
void BLI_str_cursor_step_utf32(const char32_t *str, const int str_maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
static eStrCursorDelimType cursor_delim_type_unicode(const uint uch)
void BLI_str_cursor_step_bounds_utf32(const char32_t *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
bool BLI_str_cursor_step_next_utf8(const char *str, const int str_maxlen, int *pos)
eStrCursorDelimType
@ STRCUR_DELIM_OTHER
@ STRCUR_DELIM_BRACE
@ STRCUR_DELIM_QUOTE
@ STRCUR_DELIM_WHITESPACE
@ STRCUR_DELIM_ALPHANUMERIC
@ STRCUR_DELIM_PUNCT
@ STRCUR_DELIM_NONE
@ STRCUR_DELIM_OPERATOR
static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8, const int ch_utf8_len, const int pos)
void BLI_str_cursor_step_utf8(const char *str, const int str_maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
bool BLI_str_cursor_step_prev_utf32(const char32_t *str, const int str_maxlen, int *pos)
void BLI_str_cursor_step_bounds_utf8(const char *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
bool BLI_str_cursor_step_next_utf32(const char32_t *str, const int str_maxlen, int *pos)
bool BLI_str_cursor_step_prev_utf8(const char *str, const int str_maxlen, int *pos)