Blender V4.3
string_utf8.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 1999 Tom Tromey
2 * SPDX-FileCopyrightText: 2000 Red Hat, Inc. All rights reserved.
3 * SPDX-FileCopyrightText: 2011 Blender Authors
4 *
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 *
7 * Code from `gutf8.c` by Tom Tromey & Red Hat, Inc. */
8
13#include <algorithm>
14#include <cstdio>
15#include <cstdlib>
16#include <cstring>
17#include <cwchar>
18#include <cwctype>
19#include <wcwidth.h>
20
21#include "BLI_utildefines.h"
22
23#include "BLI_string.h" /* #BLI_string_debug_size. */
24#include "BLI_string_utf8.h" /* own include */
25#ifdef WIN32
26# include "utfconv.hh"
27#endif
28#ifdef __GNUC__
29# pragma GCC diagnostic error "-Wsign-conversion"
30#endif
31
32#include "BLI_strict_flags.h" /* Keep last. */
33
34/* -------------------------------------------------------------------- */
57{
58 if (UNLIKELY(c >= 192)) {
59 if ((c & 0xe0) == 0xc0) {
60 return 2;
61 }
62 if ((c & 0xf0) == 0xe0) {
63 return 3;
64 }
65 if ((c & 0xf8) == 0xf0) {
66 return 4;
67 }
68 if ((c & 0xfc) == 0xf8) {
69 return 5;
70 }
71 if ((c & 0xfe) == 0xfc) {
72 return 6;
73 }
74 }
75 return 1;
76}
77
79{
80 if (c < 128) {
81 return 1;
82 }
83 if ((c & 0xe0) == 0xc0) {
84 return 2;
85 }
86 if ((c & 0xf0) == 0xe0) {
87 return 3;
88 }
89 if ((c & 0xf8) == 0xf0) {
90 return 4;
91 }
92 if ((c & 0xfc) == 0xf8) {
93 return 5;
94 }
95 if ((c & 0xfe) == 0xfc) {
96 return 6;
97 }
98 return -1;
99}
100
102{
103 /* Originally from GLIB `UTF8_COMPUTE` macro. */
104 if (c < 128) {
105 *r_mask = 0x7f;
106 return 1;
107 }
108 if ((c & 0xe0) == 0xc0) {
109 *r_mask = 0x1f;
110 return 2;
111 }
112 if ((c & 0xf0) == 0xe0) {
113 *r_mask = 0x0f;
114 return 3;
115 }
116 if ((c & 0xf8) == 0xf0) {
117 *r_mask = 0x07;
118 return 4;
119 }
120 if ((c & 0xfc) == 0xf8) {
121 *r_mask = 0x03;
122 return 5;
123 }
124 if ((c & 0xfe) == 0xfc) {
125 *r_mask = 0x01;
126 return 6;
127 }
128 return -1;
129}
130
134BLI_INLINE uint utf8_char_decode(const char *p, const char mask, const int len, const uint err)
135{
136 /* Originally from GLIB `UTF8_GET` macro, added an 'err' argument. */
137 uint result = p[0] & mask;
138 for (int count = 1; count < len; count++) {
139 if ((p[count] & 0xc0) != 0x80) {
140 return err;
141 }
142 result <<= 6;
143 result |= p[count] & 0x3f;
144 }
145 return result;
146}
147
150ptrdiff_t BLI_str_utf8_invalid_byte(const char *str, size_t length)
151{
152 /* NOTE(@ideasman42): from libswish3, originally called u8_isvalid(),
153 * modified to return the index of the bad character (byte index not UTF).
154 * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044.
155 *
156 * Comment from code in: `libswish3`.
157 * Based on the `valid_utf8` routine from the PCRE library by Philip Hazel
158 *
159 * length is in bytes, since without knowing whether the string is valid
160 * it's hard to know how many characters there are! */
161
162 const uchar *p, *perr, *pend = (const uchar *)str + length;
163 uchar c;
164 int ab;
165
166 for (p = (const uchar *)str; p < pend; p++, length--) {
167 c = *p;
168 perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */
169 if (ELEM(c, 0xfe, 0xff, 0x00)) {
170 /* Those three values are not allowed in utf8 string. */
171 goto utf8_error;
172 }
173 if (c < 128) {
174 continue;
175 }
176 if ((c & 0xc0) != 0xc0) {
177 goto utf8_error;
178 }
179
180 /* Note that since we always increase p (and decrease length) by one byte in main loop,
181 * we only add/subtract extra utf8 bytes in code below
182 * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */
183 ab = utf8_char_compute_skip(c) - 1;
184 if (length <= size_t(ab)) {
185 goto utf8_error;
186 }
187
188 /* Check top bits in the second byte */
189 p++;
190 length--;
191 if ((*p & 0xc0) != 0x80) {
192 goto utf8_error;
193 }
194
195 /* Check for overlong sequences for each different length */
196 switch (ab) {
197 case 1:
198 /* Check for: `XX00 000X`. */
199 if ((c & 0x3e) == 0) {
200 goto utf8_error;
201 }
202 continue; /* We know there aren't any more bytes to check */
203
204 case 2:
205 /* Check for: `1110 0000, XX0X XXXX`. */
206 if (c == 0xe0 && (*p & 0x20) == 0) {
207 goto utf8_error;
208 }
209 /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn
210 * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */
211 /* From section 5.1 (and 5.2) */
212 if (c == 0xed) {
213 if (*p == 0xa0 && *(p + 1) == 0x80) {
214 goto utf8_error;
215 }
216 if (*p == 0xad && *(p + 1) == 0xbf) {
217 goto utf8_error;
218 }
219 if (*p == 0xae && *(p + 1) == 0x80) {
220 goto utf8_error;
221 }
222 if (*p == 0xaf && *(p + 1) == 0xbf) {
223 goto utf8_error;
224 }
225 if (*p == 0xb0 && *(p + 1) == 0x80) {
226 goto utf8_error;
227 }
228 if (*p == 0xbe && *(p + 1) == 0x80) {
229 goto utf8_error;
230 }
231 if (*p == 0xbf && *(p + 1) == 0xbf) {
232 goto utf8_error;
233 }
234 }
235 /* From section 5.3 */
236 if (c == 0xef) {
237 if (*p == 0xbf && *(p + 1) == 0xbe) {
238 goto utf8_error;
239 }
240 if (*p == 0xbf && *(p + 1) == 0xbf) {
241 goto utf8_error;
242 }
243 }
244 break;
245
246 case 3:
247 /* Check for: `1111 0000, XX00 XXXX`. */
248 if (c == 0xf0 && (*p & 0x30) == 0) {
249 goto utf8_error;
250 }
251 break;
252
253 case 4:
254 /* Check for `1111 1000, XX00 0XXX`. */
255 if (c == 0xf8 && (*p & 0x38) == 0) {
256 goto utf8_error;
257 }
258 break;
259
260 case 5:
261 /* Check for: `1111 1100, XX00 00XX`. */
262 if (c == 0xfc && (*p & 0x3c) == 0) {
263 goto utf8_error;
264 }
265 break;
266 }
267
268 /* Check for valid bytes after the 2nd, if any; all must start 10. */
269 while (--ab > 0) {
270 p++;
271 length--;
272 if ((*p & 0xc0) != 0x80) {
273 goto utf8_error;
274 }
275 }
276 }
277
278 return -1;
279
280utf8_error:
281
282 return ((const char *)perr - (const char *)str);
283}
284
285int BLI_str_utf8_invalid_strip(char *str, size_t length)
286{
287 ptrdiff_t bad_char;
288 int tot = 0;
289
290 BLI_assert(str[length] == '\0');
291
292 while ((bad_char = BLI_str_utf8_invalid_byte(str, length)) != -1) {
293 str += bad_char;
294 length -= size_t(bad_char + 1);
295
296 if (length == 0) {
297 /* last character bad, strip it */
298 *str = '\0';
299 tot++;
300 break;
301 }
302 /* strip, keep looking */
303 memmove(str, str + 1, length + 1); /* +1 for NULL char! */
304 tot++;
305 }
306
307 return tot;
308}
309
318BLI_INLINE char *str_utf8_copy_max_bytes_impl(char *dst, const char *src, size_t dst_maxncpy)
319{
320 /* Cast to `uint8_t` is a no-op, quiets array subscript of type `char` warning.
321 * No need to check `src` points to a nil byte as this will return from the switch statement. */
322 size_t utf8_size;
323 while ((utf8_size = size_t(utf8_char_compute_skip(*src))) < dst_maxncpy) {
324 dst_maxncpy -= utf8_size;
325 /* Prefer more compact block. */
326 /* NOLINTBEGIN: bugprone-assignment-in-if-condition */
327 /* clang-format off */
328 switch (utf8_size) {
329 case 6: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++; ATTR_FALLTHROUGH;
330 case 5: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++; ATTR_FALLTHROUGH;
331 case 4: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++; ATTR_FALLTHROUGH;
332 case 3: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++; ATTR_FALLTHROUGH;
333 case 2: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++; ATTR_FALLTHROUGH;
334 case 1: if (UNLIKELY(!(*dst = *src++))) { return dst; } dst++;
335 }
336 /* clang-format on */
337 /* NOLINTEND: bugprone-assignment-in-if-condition */
338 }
339 *dst = '\0';
340 return dst;
341}
342
343char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t dst_maxncpy)
344{
345 BLI_assert(dst_maxncpy != 0);
346 BLI_string_debug_size(dst, dst_maxncpy);
347
348 str_utf8_copy_max_bytes_impl(dst, src, dst_maxncpy);
349 return dst;
350}
351
352size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t dst_maxncpy)
353{
354 BLI_assert(dst_maxncpy != 0);
355 BLI_string_debug_size(dst, dst_maxncpy);
356
357 char *r_dst = dst;
358 dst = str_utf8_copy_max_bytes_impl(dst, src, dst_maxncpy);
359
360 return size_t(dst - r_dst);
361}
362
363/* -------------------------------------------------------------------- */
364/* wchar_t / utf8 functions */
365
366size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst,
367 const wchar_t *__restrict src,
368 const size_t dst_maxncpy)
369{
370 BLI_assert(dst_maxncpy != 0);
371 BLI_string_debug_size(dst, dst_maxncpy);
372
373 size_t len = 0;
374 while (*src && len < dst_maxncpy) {
375 len += BLI_str_utf8_from_unicode(uint(*src++), dst + len, dst_maxncpy - len);
376 }
377 dst[len] = '\0';
378 /* Return the correct length when part of the final byte did not fit into the string. */
379 while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
380 len--;
381 }
382 return len;
383}
384
385size_t BLI_wstrlen_utf8(const wchar_t *src)
386{
387 size_t len = 0;
388
389 while (*src) {
391 }
392
393 return len;
394}
395
396size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
397{
398 size_t len = 0;
399 const char *strc_orig = strc;
400
401 while (*strc) {
402 int step = BLI_str_utf8_size_safe(strc);
403
404 /* Detect null bytes within multi-byte sequences.
405 * This matches the behavior of #BLI_strncpy_utf8 for incomplete byte sequences. */
406 for (int i = 1; i < step; i++) {
407 if (UNLIKELY(strc[i] == '\0')) {
408 step = i;
409 break;
410 }
411 }
412
413 strc += step;
414 len++;
415 }
416
417 *r_len_bytes = size_t(strc - strc_orig);
418 return len;
419}
420
421size_t BLI_strlen_utf8(const char *strc)
422{
423 size_t len_bytes;
424 return BLI_strlen_utf8_ex(strc, &len_bytes);
425}
426
427size_t BLI_strnlen_utf8_ex(const char *strc, const size_t strc_maxlen, size_t *r_len_bytes)
428{
429 size_t len = 0;
430 const char *strc_orig = strc;
431 const char *strc_end = strc + strc_maxlen;
432
433 while (*strc) {
434 int step = BLI_str_utf8_size_safe(strc);
435 if (strc + step > strc_end) {
436 break;
437 }
438
439 /* Detect null bytes within multi-byte sequences.
440 * This matches the behavior of #BLI_strncpy_utf8 for incomplete byte sequences. */
441 for (int i = 1; i < step; i++) {
442 if (UNLIKELY(strc[i] == '\0')) {
443 step = i;
444 break;
445 }
446 }
447 strc += step;
448 len++;
449 }
450
451 *r_len_bytes = size_t(strc - strc_orig);
452 return len;
453}
454
455size_t BLI_strnlen_utf8(const char *strc, const size_t strc_maxlen)
456{
457 size_t len_bytes;
458 return BLI_strnlen_utf8_ex(strc, strc_maxlen, &len_bytes);
459}
460
461size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
462 const char *__restrict src_c,
463 const size_t dst_w_maxncpy)
464{
465#ifdef WIN32
466 BLI_string_debug_size(dst_w, dst_w_maxncpy);
467 conv_utf_8_to_16(src_c, dst_w, dst_w_maxncpy);
468 /* NOTE: it would be more efficient to calculate the length as part of #conv_utf_8_to_16. */
469 return wcslen(dst_w);
470#else
471 return BLI_str_utf8_as_utf32((char32_t *)dst_w, src_c, dst_w_maxncpy);
472#endif
473}
474
475/* end wchar_t / utf8 functions */
476/* -------------------------------------------------------------------- */
477
478int BLI_wcwidth_or_error(char32_t ucs)
479{
480 /* Treat private use areas (icon fonts), symbols, and emoticons as double-width. */
481 if (ucs >= 0xf0000 || (ucs >= 0xe000 && ucs < 0xf8ff) || (ucs >= 0x1f300 && ucs < 0x1fbff)) {
482 return 2;
483 }
484 return mk_wcwidth(ucs);
485}
486
487int BLI_wcwidth_safe(char32_t ucs)
488{
489 const int columns = BLI_wcwidth_or_error(ucs);
490 if (columns >= 0) {
491 return columns;
492 }
493 return 1;
494}
495
496int BLI_wcswidth_or_error(const char32_t *pwcs, size_t n)
497{
498 return mk_wcswidth(pwcs, n);
499}
500
502{
504 if (unicode == BLI_UTF8_ERR) {
505 return -1;
506 }
507
508 return BLI_wcwidth_or_error(char32_t(unicode));
509}
510
512{
514 if (unicode == BLI_UTF8_ERR) {
515 return 1;
516 }
517
518 return BLI_wcwidth_safe(char32_t(unicode));
519}
520
521/* -------------------------------------------------------------------- */
531char32_t BLI_str_utf32_char_to_upper(const char32_t wc)
532{
533 if (wc < U'\xFF') { /* Latin. */
534 if ((wc <= U'z' && wc >= U'a') || (wc <= U'\xF6' && wc >= U'\xE0') ||
535 /* Correct but the first case is know, only check the second */
536 // (wc <= U'\xFE' && wc >= U'\xF8')
537 (wc >= U'\xF8'))
538 {
539 return wc - 32;
540 }
541 return wc;
542 }
543
544 if ((wc <= U'\x137' && wc >= U'\x101') || (wc <= U'\x1E95' && wc >= U'\x1E01')) {
545 /* Latin Extended. */
546 return (wc & 1) ? wc - 1 : wc;
547 }
548 if ((wc <= U'\x586' && wc >= U'\x561') || (wc <= U'\x10F5' && wc >= U'\x10D0')) {
549 /* Armenian and Georgian */
550 return wc - 48;
551 }
552 if (wc <= U'\x24E9' && wc >= U'\x24D0') { /* Enclosed Numerals. */
553 return wc - 26;
554 }
555 if (wc <= U'\xFF5A' && wc >= U'\xFF41') { /* Full-width Forms. */
556 return wc - 32;
557 }
558
559 /* There are only three remaining ranges that contain capitalization. */
560 if (!(wc <= U'\x0292' && wc >= U'\x00FF') && !(wc <= U'\x04F9' && wc >= U'\x03AC') &&
561 !(wc <= U'\x1FE1' && wc >= U'\x1E01'))
562 {
563 return wc;
564 }
565
566 static const char32_t from[] =
567 U"\x00FF\x013A\x013C\x013E\x0140\x0142\x0144\x0146\x0148\x014B\x014D\x014F\x0151\x0153\x0155"
568 U"\x0157\x0159\x015B\x015D\x015F\x0161\x0163\x0165\x0167\x0169\x016B\x016D\x016F\x0171\x0173"
569 U"\x0175\x0177\x017A\x017C\x017E\x0183\x0185\x0188\x018C\x0192\x0199\x01A1\x01A3\x01A5\x01A8"
570 U"\x01AD\x01B0\x01B4\x01B6\x01B9\x01BD\x01C6\x01C9\x01CC\x01CE\x01D0\x01D2\x01D4\x01D6\x01D8"
571 U"\x01DA\x01DC\x01DF\x01E1\x01E3\x01E5\x01E7\x01E9\x01EB\x01ED\x01EF\x01F3\x01F5\x01FB\x01FD"
572 U"\x01FF\x0201\x0203\x0205\x0207\x0209\x020B\x020D\x020F\x0211\x0213\x0215\x0217\x0253\x0254"
573 U"\x0257\x0258\x0259\x025B\x0260\x0263\x0268\x0269\x026F\x0272\x0275\x0283\x0288\x028A\x028B"
574 U"\x0292\x03AC\x03AD\x03AE\x03AF\x03B1\x03B2\x03B3\x03B4\x03B5\x03B6\x03B7\x03B8\x03B9\x03BA"
575 U"\x03BB\x03BC\x03BD\x03BE\x03BF\x03C0\x03C1\x03C3\x03C4\x03C5\x03C6\x03C7\x03C8\x03C9\x03CA"
576 U"\x03CB\x03CC\x03CD\x03CE\x03E3\x03E5\x03E7\x03E9\x03EB\x03ED\x03EF\x0430\x0431\x0432\x0433"
577 U"\x0434\x0435\x0436\x0437\x0438\x0439\x043A\x043B\x043C\x043D\x043E\x043F\x0440\x0441\x0442"
578 U"\x0443\x0444\x0445\x0446\x0447\x0448\x0449\x044A\x044B\x044C\x044D\x044E\x044F\x0451\x0452"
579 U"\x0453\x0454\x0455\x0456\x0457\x0458\x0459\x045A\x045B\x045C\x045E\x045F\x0461\x0463\x0465"
580 U"\x0467\x0469\x046B\x046D\x046F\x0471\x0473\x0475\x0477\x0479\x047B\x047D\x047F\x0481\x0491"
581 U"\x0493\x0495\x0497\x0499\x049B\x049D\x049F\x04A1\x04A3\x04A5\x04A7\x04A9\x04AB\x04AD\x04AF"
582 U"\x04B1\x04B3\x04B5\x04B7\x04B9\x04BB\x04BD\x04BF\x04C2\x04C4\x04C8\x04CC\x04D1\x04D3\x04D5"
583 U"\x04D7\x04D9\x04DB\x04DD\x04DF\x04E1\x04E3\x04E5\x04E7\x04E9\x04EB\x04EF\x04F1\x04F3\x04F5"
584 U"\x04F9\x1EA1\x1EA3\x1EA5\x1EA7\x1EA9\x1EAB\x1EAD\x1EAF\x1EB1\x1EB3\x1EB5\x1EB7\x1EB9\x1EBB"
585 U"\x1EBD\x1EBF\x1EC1\x1EC3\x1EC5\x1EC7\x1EC9\x1ECB\x1ECD\x1ECF\x1ED1\x1ED3\x1ED5\x1ED7\x1ED9"
586 U"\x1EDB\x1EDD\x1EDF\x1EE1\x1EE3\x1EE5\x1EE7\x1EE9\x1EEB\x1EED\x1EEF\x1EF1\x1EF3\x1EF5\x1EF7"
587 U"\x1EF9\x1F00\x1F01\x1F02\x1F03\x1F04\x1F05\x1F06\x1F07\x1F10\x1F11\x1F12\x1F13\x1F14\x1F15"
588 U"\x1F20\x1F21\x1F22\x1F23\x1F24\x1F25\x1F26\x1F27\x1F30\x1F31\x1F32\x1F33\x1F34\x1F35\x1F36"
589 U"\x1F37\x1F40\x1F41\x1F42\x1F43\x1F44\x1F45\x1F51\x1F53\x1F55\x1F57\x1F60\x1F61\x1F62\x1F63"
590 U"\x1F64\x1F65\x1F66\x1F67\x1F80\x1F81\x1F82\x1F83\x1F84\x1F85\x1F86\x1F87\x1F90\x1F91\x1F92"
591 U"\x1F93\x1F94\x1F95\x1F96\x1F97\x1FA0\x1FA1\x1FA2\x1FA3\x1FA4\x1FA5\x1FA6\x1FA7\x1FB0\x1FB1"
592 U"\x1FD0\x1FD1\x1FE0\x1FE1";
593 static const char32_t to[] =
594 U"\x0178\x0139\x013B\x013D\x013F\x0141\x0143\x0145\x0147\x014A\x014C\x014E\x0150\x0152\x0154"
595 U"\x0156\x0158\x015A\x015C\x015E\x0160\x0162\x0164\x0166\x0168\x016A\x016C\x016E\x0170\x0172"
596 U"\x0174\x0176\x0179\x017B\x017D\x0182\x0184\x0187\x018B\x0191\x0198\x01A0\x01A2\x01A4\x01A7"
597 U"\x01AC\x01AF\x01B3\x01B5\x01B8\x01BC\x01C4\x01C7\x01CA\x01CD\x01CF\x01D1\x01D3\x01D5\x01D7"
598 U"\x01D9\x01DB\x01DE\x01E0\x01E2\x01E4\x01E6\x01E8\x01EA\x01EC\x01EE\x01F1\x01F4\x01FA\x01FC"
599 U"\x01FE\x0200\x0202\x0204\x0206\x0208\x020A\x020C\x020E\x0210\x0212\x0214\x0216\x0181\x0186"
600 U"\x018A\x018E\x018F\x0190\x0193\x0194\x0197\x0196\x019C\x019D\x019F\x01A9\x01AE\x01B1\x01B2"
601 U"\x01B7\x0386\x0388\x0389\x038A\x0391\x0392\x0393\x0394\x0395\x0396\x0397\x0398\x0399\x039A"
602 U"\x039B\x039C\x039D\x039E\x039F\x03A0\x03A1\x03A3\x03A4\x03A5\x03A6\x03A7\x03A8\x03A9\x03AA"
603 U"\x03AB\x038C\x038E\x038F\x03E2\x03E4\x03E6\x03E8\x03EA\x03EC\x03EE\x0410\x0411\x0412\x0413"
604 U"\x0414\x0415\x0416\x0417\x0418\x0419\x041A\x041B\x041C\x041D\x041E\x041F\x0420\x0421\x0422"
605 U"\x0423\x0424\x0425\x0426\x0427\x0428\x0429\x042A\x042B\x042C\x042D\x042E\x042F\x0401\x0402"
606 U"\x0403\x0404\x0405\x0406\x0407\x0408\x0409\x040A\x040B\x040C\x040E\x040F\x0460\x0462\x0464"
607 U"\x0466\x0468\x046A\x046C\x046E\x0470\x0472\x0474\x0476\x0478\x047A\x047C\x047E\x0480\x0490"
608 U"\x0492\x0494\x0496\x0498\x049A\x049C\x049E\x04A0\x04A2\x04A4\x04A6\x04A8\x04AA\x04AC\x04AE"
609 U"\x04B0\x04B2\x04B4\x04B6\x04B8\x04BA\x04BC\x04BE\x04C1\x04C3\x04C7\x04CB\x04D0\x04D2\x04D4"
610 U"\x04D6\x04D8\x04DA\x04DC\x04DE\x04E0\x04E2\x04E4\x04E6\x04E8\x04EA\x04EE\x04F0\x04F2\x04F4"
611 U"\x04F8\x1EA0\x1EA2\x1EA4\x1EA6\x1EA8\x1EAA\x1EAC\x1EAE\x1EB0\x1EB2\x1EB4\x1EB6\x1EB8\x1EBA"
612 U"\x1EBC\x1EBE\x1EC0\x1EC2\x1EC4\x1EC6\x1EC8\x1ECA\x1ECC\x1ECE\x1ED0\x1ED2\x1ED4\x1ED6\x1ED8"
613 U"\x1EDA\x1EDC\x1EDE\x1EE0\x1EE2\x1EE4\x1EE6\x1EE8\x1EEA\x1EEC\x1EEE\x1EF0\x1EF2\x1EF4\x1EF6"
614 U"\x1EF8\x1F08\x1F09\x1F0A\x1F0B\x1F0C\x1F0D\x1F0E\x1F0F\x1F18\x1F19\x1F1A\x1F1B\x1F1C\x1F1D"
615 U"\x1F28\x1F29\x1F2A\x1F2B\x1F2C\x1F2D\x1F2E\x1F2F\x1F38\x1F39\x1F3A\x1F3B\x1F3C\x1F3D\x1F3E"
616 U"\x1F3F\x1F48\x1F49\x1F4A\x1F4B\x1F4C\x1F4D\x1F59\x1F5B\x1F5D\x1F5F\x1F68\x1F69\x1F6A\x1F6B"
617 U"\x1F6C\x1F6D\x1F6E\x1F6F\x1F88\x1F89\x1F8A\x1F8B\x1F8C\x1F8D\x1F8E\x1F8F\x1F98\x1F99\x1F9A"
618 U"\x1F9B\x1F9C\x1F9D\x1F9E\x1F9F\x1FA8\x1FA9\x1FAA\x1FAB\x1FAC\x1FAD\x1FAE\x1FAF\x1FB8\x1FB9"
619 U"\x1FD8\x1FD9\x1FE8\x1FE9";
620
621 if (wc >= from[0] && wc <= from[ARRAY_SIZE(from) - 2]) {
622 /* Binary search since these are sorted. */
623 size_t min = 0;
624 size_t max = ARRAY_SIZE(from) - 2;
625 while (max >= min) {
626 const size_t mid = (min + max) / 2;
627 if (wc > from[mid]) {
628 min = mid + 1;
629 }
630 else if (wc < from[mid]) {
631 max = mid - 1;
632 }
633 else {
634 return to[mid];
635 }
636 }
637 }
638
639 return wc;
640}
641
642char32_t BLI_str_utf32_char_to_lower(const char32_t wc)
643{
644 if (wc < U'\xD8') { /* Latin. */
645 if ((wc <= U'Z' && wc >= U'A') || (wc <= U'\xD6' && wc >= U'\xC0')) {
646 return wc + 32;
647 }
648 return wc;
649 }
650 if ((wc <= U'\x136' && wc >= U'\x100') || (wc <= U'\x1E94' && wc >= U'\x1E00')) {
651 /* Latin Extended. */
652 return (wc % 2 == 0) ? wc + 1 : wc;
653 }
654 if ((wc <= U'\x556' && wc >= U'\x531') || (wc <= U'\x10C5' && wc >= U'\x10A0')) {
655 /* Armenian and Georgian. */
656 return wc + 48;
657 }
658 if (wc <= U'\x24CF' && wc >= U'\x24B6') { /* Enclosed Numerals. */
659 return wc + 26;
660 }
661 if (wc <= U'\xFF3A' && wc >= U'\xFF21') { /* Full-width Forms. */
662 return wc + 32;
663 }
664
665 /* There are only three remaining ranges that contain capitalization. */
666 if (!(wc <= U'\x0216' && wc >= U'\x00D8') && !(wc <= U'\x04F8' && wc >= U'\x0386') &&
667 !(wc <= U'\x1FE9' && wc >= U'\x1E00'))
668 {
669 return wc;
670 }
671
672 static const char32_t from[] =
673 U"\x00D8\x00D9\x00DA\x00DB\x00DC\x00DD\x00DE\x0139\x013B\x013D\x013F\x0141\x0143\x0145\x0147"
674 U"\x014A\x014C\x014E\x0150\x0152\x0154\x0156\x0158\x015A\x015C\x015E\x0160\x0162\x0164\x0166"
675 U"\x0168\x016A\x016C\x016E\x0170\x0172\x0174\x0176\x0178\x0179\x017B\x017D\x0181\x0182\x0184"
676 U"\x0186\x0187\x018A\x018B\x018E\x018F\x0190\x0191\x0193\x0194\x0196\x0197\x0198\x019C\x019D"
677 U"\x019F\x01A0\x01A2\x01A4\x01A7\x01A9\x01AC\x01AE\x01AF\x01B1\x01B2\x01B3\x01B5\x01B7\x01B8"
678 U"\x01BC\x01C4\x01C5\x01C7\x01C8\x01CA\x01CB\x01CD\x01CF\x01D1\x01D3\x01D5\x01D7\x01D9\x01DB"
679 U"\x01DE\x01E0\x01E2\x01E4\x01E6\x01E8\x01EA\x01EC\x01EE\x01F1\x01F4\x01FA\x01FC\x01FE\x0200"
680 U"\x0202\x0204\x0206\x0208\x020A\x020C\x020E\x0210\x0212\x0214\x0216\x0386\x0388\x0389\x038A"
681 U"\x038C\x038E\x038F\x0391\x0392\x0393\x0394\x0395\x0396\x0397\x0398\x0399\x039A\x039B\x039C"
682 U"\x039D\x039E\x039F\x03A0\x03A1\x03A3\x03A4\x03A5\x03A6\x03A7\x03A8\x03A9\x03AA\x03AB\x03E2"
683 U"\x03E4\x03E6\x03E8\x03EA\x03EC\x03EE\x0401\x0402\x0403\x0404\x0405\x0406\x0407\x0408\x0409"
684 U"\x040A\x040B\x040C\x040E\x040F\x0410\x0411\x0412\x0413\x0414\x0415\x0416\x0417\x0418\x0419"
685 U"\x041A\x041B\x041C\x041D\x041E\x041F\x0420\x0421\x0422\x0423\x0424\x0425\x0426\x0427\x0428"
686 U"\x0429\x042A\x042B\x042C\x042D\x042E\x042F\x0460\x0462\x0464\x0466\x0468\x046A\x046C\x046E"
687 U"\x0470\x0472\x0474\x0476\x0478\x047A\x047C\x047E\x0480\x0490\x0492\x0494\x0496\x0498\x049A"
688 U"\x049C\x049E\x04A0\x04A2\x04A4\x04A6\x04A8\x04AA\x04AC\x04AE\x04B0\x04B2\x04B4\x04B6\x04B8"
689 U"\x04BA\x04BC\x04BE\x04C1\x04C3\x04C7\x04CB\x04D0\x04D2\x04D4\x04D6\x04D8\x04DA\x04DC\x04DE"
690 U"\x04E0\x04E2\x04E4\x04E6\x04E8\x04EA\x04EE\x04F0\x04F2\x04F4\x04F8\x1EA0\x1EA2\x1EA4\x1EA6"
691 U"\x1EA8\x1EAA\x1EAC\x1EAE\x1EB0\x1EB2\x1EB4\x1EB6\x1EB8\x1EBA\x1EBC\x1EBE\x1EC0\x1EC2\x1EC4"
692 U"\x1EC6\x1EC8\x1ECA\x1ECC\x1ECE\x1ED0\x1ED2\x1ED4\x1ED6\x1ED8\x1EDA\x1EDC\x1EDE\x1EE0\x1EE2"
693 U"\x1EE4\x1EE6\x1EE8\x1EEA\x1EEC\x1EEE\x1EF0\x1EF2\x1EF4\x1EF6\x1EF8\x1F08\x1F09\x1F0A\x1F0B"
694 U"\x1F0C\x1F0D\x1F0E\x1F0F\x1F18\x1F19\x1F1A\x1F1B\x1F1C\x1F1D\x1F28\x1F29\x1F2A\x1F2B\x1F2C"
695 U"\x1F2D\x1F2E\x1F2F\x1F38\x1F39\x1F3A\x1F3B\x1F3C\x1F3D\x1F3E\x1F3F\x1F48\x1F49\x1F4A\x1F4B"
696 U"\x1F4C\x1F4D\x1F59\x1F5B\x1F5D\x1F5F\x1F68\x1F69\x1F6A\x1F6B\x1F6C\x1F6D\x1F6E\x1F6F\x1F88"
697 U"\x1F89\x1F8A\x1F8B\x1F8C\x1F8D\x1F8E\x1F8F\x1F98\x1F99\x1F9A\x1F9B\x1F9C\x1F9D\x1F9E\x1F9F"
698 U"\x1FA8\x1FA9\x1FAA\x1FAB\x1FAC\x1FAD\x1FAE\x1FAF\x1FB8\x1FB9\x1FD8\x1FD9\x1FE8\x1FE9";
699 static const char32_t to[] =
700 U"\x00F8\x00F9\x00FA\x00FB\x00FC\x00FD\x00FE\x013A\x013C\x013E\x0140\x0142\x0144\x0146\x0148"
701 U"\x014B\x014D\x014F\x0151\x0153\x0155\x0157\x0159\x015B\x015D\x015F\x0161\x0163\x0165\x0167"
702 U"\x0169\x016B\x016D\x016F\x0171\x0173\x0175\x0177\x00FF\x017A\x017C\x017E\x0253\x0183\x0185"
703 U"\x0254\x0188\x0257\x018C\x0258\x0259\x025B\x0192\x0260\x0263\x0269\x0268\x0199\x026f\x0272"
704 U"\x0275\x01A1\x01A3\x01A5\x01A8\x0283\x01AD\x0288\x01B0\x028A\x028B\x01B4\x01B6\x0292\x01B9"
705 U"\x01BD\x01C6\x01C6\x01C9\x01C9\x01CC\x01CC\x01CE\x01D0\x01D2\x01D4\x01D6\x01D8\x01DA\x01DC"
706 U"\x01DF\x01E1\x01E3\x01E5\x01E7\x01E9\x01EB\x01ED\x01EF\x01F3\x01F5\x01FB\x01FD\x01FF\x0201"
707 U"\x0203\x0205\x0207\x0209\x020B\x020D\x020F\x0211\x0213\x0215\x0217\x03AC\x03AD\x03AE\x03AF"
708 U"\x03CC\x03CD\x03CE\x03B1\x03B2\x03B3\x03B4\x03B5\x03B6\x03B7\x03B8\x03B9\x03BA\x03BB\x03BC"
709 U"\x03BD\x03BE\x03BF\x03C0\x03C1\x03C3\x03C4\x03C5\x03C6\x03C7\x03C8\x03C9\x03CA\x03CB\x03E3"
710 U"\x03E5\x03E7\x03E9\x03EB\x03ED\x03EF\x0451\x0452\x0453\x0454\x0455\x0456\x0457\x0458\x0459"
711 U"\x045A\x045B\x045C\x045E\x045F\x0430\x0431\x0432\x0433\x0434\x0435\x0436\x0437\x0438\x0439"
712 U"\x043A\x043B\x043C\x043D\x043E\x043F\x0440\x0441\x0442\x0443\x0444\x0445\x0446\x0447\x0448"
713 U"\x0449\x044A\x044B\x044C\x044D\x044E\x044F\x0461\x0463\x0465\x0467\x0469\x046B\x046D\x046F"
714 U"\x0471\x0473\x0475\x0477\x0479\x047B\x047D\x047F\x0481\x0491\x0493\x0495\x0497\x0499\x049B"
715 U"\x049D\x049F\x04A1\x04A3\x04A5\x04A7\x04A9\x04AB\x04AD\x04AF\x04B1\x04B3\x04B5\x04B7\x04B9"
716 U"\x04BB\x04BD\x04BF\x04C2\x04C4\x04C8\x04CC\x04D1\x04D3\x04D5\x04D7\x04D9\x04DB\x04DD\x04DF"
717 U"\x04E1\x04E3\x04E5\x04E7\x04E9\x04EB\x04EF\x04F1\x04F3\x04F5\x04F9\x1EA1\x1EA3\x1EA5\x1EA7"
718 U"\x1EA9\x1EAB\x1EAD\x1EAF\x1EB1\x1EB3\x1EB5\x1EB7\x1EB9\x1EBB\x1EBD\x1EBF\x1EC1\x1EC3\x1EC5"
719 U"\x1EC7\x1EC9\x1ECB\x1ECD\x1ECF\x1ED1\x1ED3\x1ED5\x1ED7\x1ED9\x1EDB\x1EDD\x1EDF\x1EE1\x1EE3"
720 U"\x1EE5\x1EE7\x1EE9\x1EEB\x1EED\x1EEF\x1EF1\x1EF3\x1EF5\x1EF7\x1EF9\x1F00\x1F01\x1F02\x1F03"
721 U"\x1F04\x1F05\x1F06\x1F07\x1F10\x1F11\x1F12\x1F13\x1F14\x1F15\x1F20\x1F21\x1F22\x1F23\x1F24"
722 U"\x1F25\x1F26\x1F27\x1F30\x1F31\x1F32\x1F33\x1F34\x1F35\x1F36\x1F37\x1F40\x1F41\x1F42\x1F43"
723 U"\x1F44\x1F45\x1F51\x1F53\x1F55\x1F57\x1F60\x1F61\x1F62\x1F63\x1F64\x1F65\x1F66\x1F67\x1F80"
724 U"\x1F81\x1F82\x1F83\x1F84\x1F85\x1F86\x1F87\x1F90\x1F91\x1F92\x1F93\x1F94\x1F95\x1F96\x1F97"
725 U"\x1FA0\x1FA1\x1FA2\x1FA3\x1FA4\x1FA5\x1FA6\x1FA7\x1FB0\x1FB1\x1FD0\x1FD1\x1FE0\x1FE1";
726
727 if (wc >= from[0] && wc <= from[ARRAY_SIZE(from) - 2]) {
728 /* Binary search since these are sorted. */
729 size_t min = 0;
730 size_t max = ARRAY_SIZE(from) - 2;
731 while (max >= min) {
732 const size_t mid = (min + max) / 2;
733 if (wc > from[mid]) {
734 min = mid + 1;
735 }
736 else if (wc < from[mid]) {
737 max = mid - 1;
738 }
739 else {
740 return to[mid];
741 }
742 }
743 }
744
745 return wc;
746}
747
748 /* -------------------------------------------------------------------- */
749
751{
753}
754
755int BLI_str_utf8_size_safe(const char *p)
756{
757 return utf8_char_compute_skip(*p);
758}
759
761{
762 /* Originally `g_utf8_get_char` in GLIB. */
763
764 const uchar c = uchar(*p);
765
766 char mask = 0;
768 if (UNLIKELY(len == -1)) {
769 return BLI_UTF8_ERR;
770 }
771 return utf8_char_decode(p, mask, len, BLI_UTF8_ERR);
772}
773
775{
776 const uint result = BLI_str_utf8_as_unicode_or_error(p);
777 if (UNLIKELY(result == BLI_UTF8_ERR)) {
778 return *p;
779 }
780 return result;
781}
782
784 const size_t p_len,
785 size_t *__restrict index)
786{
787 const uchar c = uchar(*(p += *index));
788
789 BLI_assert(*index < p_len);
790 BLI_assert(c != '\0');
791
792 char mask = 0;
794 if (UNLIKELY(len == -1) || (*index + size_t(len) > p_len)) {
795 return BLI_UTF8_ERR;
796 }
797
798 const uint result = utf8_char_decode(p, mask, len, BLI_UTF8_ERR);
799 if (UNLIKELY(result == BLI_UTF8_ERR)) {
800 return BLI_UTF8_ERR;
801 }
802 *index += size_t(len);
803 BLI_assert(*index <= p_len);
804 return result;
805}
806
808 const size_t p_len,
809 size_t *__restrict index)
810{
811 uint result = BLI_str_utf8_as_unicode_step_or_error(p, p_len, index);
812 if (UNLIKELY(result == BLI_UTF8_ERR)) {
813 result = uint(p[*index]);
814 *index += 1;
815 }
816 BLI_assert(*index <= p_len);
817 return result;
818}
819
820/* was g_unichar_to_utf8 */
821
822#define UTF8_VARS_FROM_CHAR32(Char, First, Len) \
823 if (Char < 0x80) { \
824 First = 0; \
825 Len = 1; \
826 } \
827 else if (Char < 0x800) { \
828 First = 0xc0; \
829 Len = 2; \
830 } \
831 else if (Char < 0x10000) { \
832 First = 0xe0; \
833 Len = 3; \
834 } \
835 else if (Char < 0x200000) { \
836 First = 0xf0; \
837 Len = 4; \
838 } \
839 else if (Char < 0x4000000) { \
840 First = 0xf8; \
841 Len = 5; \
842 } \
843 else { \
844 First = 0xfc; \
845 Len = 6; \
846 } \
847 (void)0
848
850{
851 /* If this gets modified, also update the copy in g_string_insert_unichar() */
852 uint len = 0;
853 uint first;
854
855 UTF8_VARS_FROM_CHAR32(c, first, len);
856 (void)first;
857
858 return len;
859}
860
861size_t BLI_str_utf8_from_unicode(uint c, char *dst, const size_t dst_maxncpy)
862
863{
864 BLI_string_debug_size(dst, dst_maxncpy);
865
866 /* If this gets modified, also update the copy in g_string_insert_unichar() */
867 uint len = 0;
868 uint first;
869
870 UTF8_VARS_FROM_CHAR32(c, first, len);
871
872 if (UNLIKELY(dst_maxncpy < len)) {
873 /* NULL terminate instead of writing a partial byte. */
874 memset(dst, 0x0, dst_maxncpy);
875 return dst_maxncpy;
876 }
877
878 for (uint i = len - 1; i > 0; i--) {
879 dst[i] = char((c & 0x3f) | 0x80);
880 c >>= 6;
881 }
882 dst[0] = char(c | first);
883
884 return len;
885}
886
887size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
888 const char *__restrict src_c,
889 const size_t dst_w_maxncpy)
890{
891 BLI_assert(dst_w_maxncpy != 0);
892 BLI_string_debug_size(dst_w, dst_w_maxncpy);
893
894 const size_t maxlen = dst_w_maxncpy - 1;
895 size_t len = 0;
896
897 const size_t src_c_len = strlen(src_c);
898 const char *src_c_end = src_c + src_c_len;
899 size_t index = 0;
900 while ((index < src_c_len) && (len != maxlen)) {
901 const uint unicode = BLI_str_utf8_as_unicode_step_or_error(src_c, src_c_len, &index);
902 if (unicode != BLI_UTF8_ERR) {
903 *dst_w = unicode;
904 }
905 else {
906 *dst_w = '?';
907 const char *src_c_next = BLI_str_find_next_char_utf8(src_c + index, src_c_end);
908 index = size_t(src_c_next - src_c);
909 }
910 dst_w++;
911 len++;
912 }
913
914 *dst_w = 0;
915
916 return len;
917}
918
919size_t BLI_str_utf32_as_utf8(char *__restrict dst,
920 const char32_t *__restrict src,
921 const size_t dst_maxncpy)
922{
923 BLI_assert(dst_maxncpy != 0);
924 BLI_string_debug_size(dst, dst_maxncpy);
925
926 size_t len = 0;
927 while (*src && len < dst_maxncpy) {
928 len += BLI_str_utf8_from_unicode(uint(*src++), dst + len, dst_maxncpy - len);
929 }
930 dst[len] = '\0';
931 /* Return the correct length when part of the final byte did not fit into the string. */
932 while ((len > 0) && UNLIKELY(dst[len - 1] == '\0')) {
933 len--;
934 }
935 return len;
936}
937
938size_t BLI_str_utf32_as_utf8_len_ex(const char32_t *src, const size_t src_maxlen)
939{
940 size_t len = 0;
941 const char32_t *src_end = src + src_maxlen;
942
943 while ((src < src_end) && *src) {
945 }
946
947 return len;
948}
949
950size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
951{
952 size_t len = 0;
953
954 while (*src) {
956 }
957
958 return len;
959}
960
961const char *BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
962{
963 /* Originally `g_utf8_find_prev_char` in GLIB. */
964
965 BLI_assert(p >= str_start);
966 if (str_start < p) {
967 for (--p; p >= str_start; p--) {
968 if ((*p & 0xc0) != 0x80) {
969 return (char *)p;
970 }
971 }
972 }
973 return p;
974}
975
976const char *BLI_str_find_next_char_utf8(const char *p, const char *str_end)
977{
978 /* Originally `g_utf8_find_next_char` in GLIB. */
979
980 BLI_assert(p <= str_end);
981 if ((p < str_end) && (*p != '\0')) {
982 for (++p; p < str_end && (*p & 0xc0) == 0x80; p++) {
983 /* do nothing */
984 }
985 }
986 return p;
987}
988
989size_t BLI_str_partition_utf8(const char *str,
990 const uint delim[],
991 const char **r_sep,
992 const char **r_suf)
993{
994 return BLI_str_partition_ex_utf8(str, nullptr, delim, r_sep, r_suf, false);
995}
996
997size_t BLI_str_rpartition_utf8(const char *str,
998 const uint delim[],
999 const char **r_sep,
1000 const char **r_suf)
1001{
1002 return BLI_str_partition_ex_utf8(str, nullptr, delim, r_sep, r_suf, true);
1003}
1004
1006 const char *end,
1007 const uint delim[],
1008 const char **r_sep,
1009 const char **r_suf,
1010 const bool from_right)
1011{
1012 const size_t str_len = end ? size_t(end - str) : strlen(str);
1013 if (end == nullptr) {
1014 end = str + str_len;
1015 }
1016
1017 /* Note that here, we assume end points to a valid utf8 char! */
1019
1020 char *suf = (char *)(str + str_len);
1021 size_t index = 0;
1022 for (char *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(end, str) : str);
1023 from_right ? (sep > str) : ((sep < end) && (*sep != '\0'));
1024 sep = (char *)(from_right ? (str != sep ? BLI_str_find_prev_char_utf8(sep, str) : nullptr) :
1025 str + index))
1026 {
1027 size_t index_ofs = 0;
1028 const uint c = BLI_str_utf8_as_unicode_step_or_error(sep, size_t(end - sep), &index_ofs);
1029 if (UNLIKELY(c == BLI_UTF8_ERR)) {
1030 break;
1031 }
1032 index += index_ofs;
1033
1034 for (const uint *d = delim; *d != '\0'; d++) {
1035 if (*d == c) {
1036 /* `suf` is already correct in case from_right is true. */
1037 *r_sep = sep;
1038 *r_suf = from_right ? suf : (char *)(str + index);
1039 return size_t(sep - str);
1040 }
1041 }
1042
1043 suf = sep; /* Useful in 'from_right' case! */
1044 }
1045
1046 *r_suf = *r_sep = nullptr;
1047 return str_len;
1048}
1049
1050/* -------------------------------------------------------------------- */
1060int BLI_str_utf8_offset_to_index(const char *str, const size_t str_len, const int offset_target)
1061{
1062 BLI_assert(offset_target >= 0);
1063 const size_t offset_target_as_size = size_t(offset_target);
1064 size_t offset = 0;
1065 int index = 0;
1066 /* Note that `offset != offset_target_as_size` works for valid utf8 strings. */
1067 while ((offset < str_len) && (offset < offset_target_as_size)) {
1068 /* Use instead of #BLI_str_utf8_size_safe to match behavior when limiting the string length. */
1069 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset);
1070 UNUSED_VARS(code);
1071 index++;
1072 BLI_assert(offset <= offset_target_as_size); /* See DOXY section comment. */
1073 }
1074 return index;
1075}
1076
1077int BLI_str_utf8_offset_from_index(const char *str, const size_t str_len, const int index_target)
1078{
1079 BLI_assert(index_target >= 0);
1080 size_t offset = 0;
1081 int index = 0;
1082 while ((offset < str_len) && (index < index_target)) {
1083 /* Use instead of #BLI_str_utf8_size_safe to match behavior when limiting the string length. */
1084 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset);
1085 UNUSED_VARS(code);
1086 index++;
1087 }
1088 return int(offset);
1089}
1090
1091int BLI_str_utf8_offset_to_column(const char *str, const size_t str_len, const int offset_target)
1092{
1093 BLI_assert(offset_target >= 0);
1094 const size_t offset_target_clamp = std::min(size_t(offset_target), str_len);
1095 size_t offset = 0;
1096 int column = 0;
1097 while (offset < offset_target_clamp) {
1098 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset);
1099 column += BLI_wcwidth_safe(code);
1100 BLI_assert(offset <= size_t(offset_target)); /* See DOXY section comment. */
1101 }
1102 return column;
1103}
1104
1105int BLI_str_utf8_offset_from_column(const char *str, const size_t str_len, const int column_target)
1106{
1107 size_t offset = 0, offset_next = 0;
1108 int column = 0;
1109 while ((offset < str_len) && (column < column_target)) {
1110 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset_next);
1111 column += BLI_wcwidth_safe(code);
1112 if (column > column_target) {
1113 break;
1114 }
1115 offset = offset_next;
1116 }
1117 return int(offset);
1118}
1119
1121 const size_t str_len,
1122 const int offset_target,
1123 const int tab_width)
1124{
1125 BLI_assert(offset_target >= 0);
1126 const size_t offset_target_clamp = std::min(size_t(offset_target), str_len);
1127 size_t offset = 0;
1128 int column = 0;
1129 while (offset < offset_target_clamp) {
1130 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset);
1131 /* The following line is the only change compared with #BLI_str_utf8_offset_to_column. */
1132 column += (code == '\t') ? (tab_width - (column % tab_width)) : BLI_wcwidth_safe(code);
1133 BLI_assert(offset <= size_t(offset_target)); /* See DOXY section comment. */
1134 }
1135 return column;
1136}
1137
1139 const size_t str_len,
1140 const int column_target,
1141 const int tab_width)
1142{
1143 size_t offset = 0, offset_next = 0;
1144 int column = 0;
1145 while ((offset < str_len) && (column < column_target)) {
1146 const uint code = BLI_str_utf8_as_unicode_step_safe(str, str_len, &offset_next);
1147 /* The following line is the only change compared with #BLI_str_utf8_offset_from_column. */
1148 column += (code == '\t') ? (tab_width - (column % tab_width)) : BLI_wcwidth_safe(code);
1149 if (column > column_target) {
1150 break;
1151 }
1152 offset = offset_next;
1153 }
1154 return int(offset);
1155}
1156
#define BLI_assert(a)
Definition BLI_assert.h:50
#define ATTR_FALLTHROUGH
#define BLI_INLINE
#define BLI_string_debug_size(str, str_maxncpy)
Definition BLI_string.h:668
#define BLI_UTF8_ERR
unsigned char uchar
unsigned int uint
#define ARRAY_SIZE(arr)
#define UNUSED_VARS(...)
#define UNLIKELY(x)
#define ELEM(...)
unsigned int U
Definition btGjkEpa3.h:78
int len
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define str(s)
int count
ccl_device_inline float4 mask(const int4 mask, const float4 a)
#define min(a, b)
Definition sort.c:32
BLI_INLINE int utf8_char_compute_skip_or_error(const char c)
ptrdiff_t BLI_str_utf8_invalid_byte(const char *str, size_t length)
uint BLI_str_utf8_as_unicode_or_error(const char *p)
int BLI_wcswidth_or_error(const char32_t *pwcs, size_t n)
int BLI_str_utf8_offset_from_column(const char *str, const size_t str_len, const int column_target)
int BLI_str_utf8_char_width_or_error(const char *p)
BLI_INLINE int utf8_char_compute_skip_or_error_with_mask(const char c, char *r_mask)
size_t BLI_str_utf8_from_unicode_len(const uint c)
BLI_INLINE int utf8_char_compute_skip(const char c)
size_t BLI_str_partition_ex_utf8(const char *str, const char *end, const uint delim[], const char **r_sep, const char **r_suf, const bool from_right)
size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
char * BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t dst_maxncpy)
size_t BLI_strlen_utf8(const char *strc)
char32_t BLI_str_utf32_char_to_lower(const char32_t wc)
#define UTF8_VARS_FROM_CHAR32(Char, First, Len)
size_t BLI_strnlen_utf8(const char *strc, const size_t strc_maxlen)
size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
char32_t BLI_str_utf32_char_to_upper(const char32_t wc)
uint BLI_str_utf8_as_unicode_safe(const char *p)
BLI_INLINE uint utf8_char_decode(const char *p, const char mask, const int len, const uint err)
size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t dst_maxncpy)
size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t dst_w_maxncpy)
const char * BLI_str_find_next_char_utf8(const char *p, const char *str_end)
size_t BLI_str_utf32_as_utf8_len_ex(const char32_t *src, const size_t src_maxlen)
const char * BLI_str_find_prev_char_utf8(const char *p, const char *str_start)
size_t BLI_wstrlen_utf8(const wchar_t *src)
int BLI_wcwidth_safe(char32_t ucs)
int BLI_str_utf8_offset_to_index(const char *str, const size_t str_len, const int offset_target)
int BLI_str_utf8_offset_to_column_with_tabs(const char *str, const size_t str_len, const int offset_target, const int tab_width)
size_t BLI_str_utf32_as_utf8(char *__restrict dst, const char32_t *__restrict src, const size_t dst_maxncpy)
int BLI_str_utf8_offset_to_column(const char *str, const size_t str_len, const int offset_target)
BLI_INLINE char * str_utf8_copy_max_bytes_impl(char *dst, const char *src, size_t dst_maxncpy)
uint BLI_str_utf8_as_unicode_step_safe(const char *__restrict p, const size_t p_len, size_t *__restrict index)
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, const char *__restrict src_c, const size_t dst_w_maxncpy)
size_t BLI_str_utf8_from_unicode(uint c, char *dst, const size_t dst_maxncpy)
int BLI_str_utf8_invalid_strip(char *str, size_t length)
int BLI_str_utf8_size_safe(const char *p)
size_t BLI_str_rpartition_utf8(const char *str, const uint delim[], const char **r_sep, const char **r_suf)
int BLI_wcwidth_or_error(char32_t ucs)
uint BLI_str_utf8_as_unicode_step_or_error(const char *__restrict p, const size_t p_len, size_t *__restrict index)
size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t dst_maxncpy)
size_t BLI_str_partition_utf8(const char *str, const uint delim[], const char **r_sep, const char **r_suf)
int BLI_str_utf8_offset_from_index(const char *str, const size_t str_len, const int index_target)
int BLI_str_utf8_char_width_safe(const char *p)
int BLI_str_utf8_size_or_error(const char *p)
int BLI_str_utf8_offset_from_column_with_tabs(const char *str, const size_t str_len, const int column_target, const int tab_width)
size_t BLI_strnlen_utf8_ex(const char *strc, const size_t strc_maxlen, size_t *r_len_bytes)
float max
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
Definition utfconv.cc:182