|
Blender V4.3
|
#include <algorithm>#include <cstdio>#include <cstdlib>#include <cstring>#include <cwchar>#include <cwctype>#include <wcwidth.h>#include "BLI_utildefines.h"#include "BLI_string.h"#include "BLI_string_utf8.h"#include "BLI_strict_flags.h"Go to the source code of this file.
Macros | |
| #define | UTF8_VARS_FROM_CHAR32(Char, First, Len) |
Functions | |
| ptrdiff_t | BLI_str_utf8_invalid_byte (const char *str, size_t length) |
| int | BLI_str_utf8_invalid_strip (char *str, size_t length) |
| BLI_INLINE char * | str_utf8_copy_max_bytes_impl (char *dst, const char *src, size_t dst_maxncpy) |
| char * | BLI_strncpy_utf8 (char *__restrict dst, const char *__restrict src, size_t dst_maxncpy) |
| size_t | BLI_strncpy_utf8_rlen (char *__restrict dst, const char *__restrict src, size_t dst_maxncpy) |
| size_t | BLI_strncpy_wchar_as_utf8 (char *__restrict dst, const wchar_t *__restrict src, const size_t dst_maxncpy) |
| size_t | BLI_wstrlen_utf8 (const wchar_t *src) |
| size_t | BLI_strlen_utf8_ex (const char *strc, size_t *r_len_bytes) |
| size_t | BLI_strlen_utf8 (const char *strc) |
| size_t | BLI_strnlen_utf8_ex (const char *strc, const size_t strc_maxlen, size_t *r_len_bytes) |
| size_t | BLI_strnlen_utf8 (const char *strc, const size_t strc_maxlen) |
| size_t | BLI_strncpy_wchar_from_utf8 (wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t dst_w_maxncpy) |
| int | BLI_wcwidth_or_error (char32_t ucs) |
| int | BLI_wcwidth_safe (char32_t ucs) |
| int | BLI_wcswidth_or_error (const char32_t *pwcs, size_t n) |
| int | BLI_str_utf8_char_width_or_error (const char *p) |
| int | BLI_str_utf8_char_width_safe (const char *p) |
| int | BLI_str_utf8_size_or_error (const char *p) |
| int | BLI_str_utf8_size_safe (const char *p) |
| uint | BLI_str_utf8_as_unicode_or_error (const char *p) |
| uint | BLI_str_utf8_as_unicode_safe (const char *p) |
| uint | BLI_str_utf8_as_unicode_step_or_error (const char *__restrict p, const size_t p_len, size_t *__restrict index) |
| uint | BLI_str_utf8_as_unicode_step_safe (const char *__restrict p, const size_t p_len, size_t *__restrict index) |
| size_t | BLI_str_utf8_from_unicode_len (const uint c) |
| size_t | BLI_str_utf8_from_unicode (uint c, char *dst, const size_t dst_maxncpy) |
| size_t | BLI_str_utf8_as_utf32 (char32_t *__restrict dst_w, const char *__restrict src_c, const size_t dst_w_maxncpy) |
| size_t | BLI_str_utf32_as_utf8 (char *__restrict dst, const char32_t *__restrict src, const size_t dst_maxncpy) |
| size_t | BLI_str_utf32_as_utf8_len_ex (const char32_t *src, const size_t src_maxlen) |
| size_t | BLI_str_utf32_as_utf8_len (const char32_t *src) |
| const char * | BLI_str_find_prev_char_utf8 (const char *p, const char *str_start) |
| const char * | BLI_str_find_next_char_utf8 (const char *p, const char *str_end) |
| size_t | BLI_str_partition_utf8 (const char *str, const uint delim[], const char **r_sep, const char **r_suf) |
| size_t | BLI_str_rpartition_utf8 (const char *str, const uint delim[], const char **r_sep, const char **r_suf) |
| size_t | BLI_str_partition_ex_utf8 (const char *str, const char *end, const uint delim[], const char **r_sep, const char **r_suf, const bool from_right) |
UTF8 Character Decoding (Skip & Mask Lookup) | |
Derived from GLIB Ranges (zero based, inclusive):
Invalid values fall back to 1 byte or -1 (for an error value).
| |
| BLI_INLINE int | utf8_char_compute_skip (const char c) |
| BLI_INLINE int | utf8_char_compute_skip_or_error (const char c) |
| BLI_INLINE int | utf8_char_compute_skip_or_error_with_mask (const char c, char *r_mask) |
| BLI_INLINE uint | utf8_char_decode (const char *p, const char mask, const int len, const uint err) |
UTF32 Case Conversion | |
| |
| char32_t | BLI_str_utf32_char_to_upper (const char32_t wc) |
| char32_t | BLI_str_utf32_char_to_lower (const char32_t wc) |
Offset Conversion in Strings | |
| |
| int | BLI_str_utf8_offset_to_index (const char *str, const size_t str_len, const int offset_target) |
| int | BLI_str_utf8_offset_from_index (const char *str, const size_t str_len, const int index_target) |
| int | BLI_str_utf8_offset_to_column (const char *str, const size_t str_len, const int offset_target) |
| int | BLI_str_utf8_offset_from_column (const char *str, const size_t str_len, const int column_target) |
| int | BLI_str_utf8_offset_to_column_with_tabs (const char *str, const size_t str_len, const int offset_target, const int tab_width) |
| int | BLI_str_utf8_offset_from_column_with_tabs (const char *str, const size_t str_len, const int column_target, const int tab_width) |
| #define UTF8_VARS_FROM_CHAR32 | ( | Char, | |
| First, | |||
| Len ) |
Definition at line 822 of file string_utf8.cc.
Referenced by BLI_str_utf8_from_unicode(), and BLI_str_utf8_from_unicode_len().
| const char * BLI_str_find_next_char_utf8 | ( | const char * | p, |
| const char * | str_end ) |
Definition at line 976 of file string_utf8.cc.
References BLI_assert.
Referenced by BLI_str_utf8_as_utf32().
| const char * BLI_str_find_prev_char_utf8 | ( | const char * | p, |
| const char * | str_start ) |
Definition at line 961 of file string_utf8.cc.
References BLI_assert.
Referenced by BLI_str_partition_ex_utf8().
| size_t BLI_str_partition_ex_utf8 | ( | const char * | str, |
| const char * | end, | ||
| const uint | delim[], | ||
| const char ** | r_sep, | ||
| const char ** | r_suf, | ||
| const bool | from_right ) |
Definition at line 1005 of file string_utf8.cc.
References BLI_assert, BLI_str_find_prev_char_utf8(), BLI_str_utf8_as_unicode_or_error(), BLI_str_utf8_as_unicode_step_or_error(), BLI_UTF8_ERR, str, and UNLIKELY.
Referenced by BLI_str_partition_utf8(), and BLI_str_rpartition_utf8().
| size_t BLI_str_partition_utf8 | ( | const char * | str, |
| const uint | delim[], | ||
| const char ** | r_sep, | ||
| const char ** | r_suf ) |
Definition at line 989 of file string_utf8.cc.
References BLI_str_partition_ex_utf8(), and str.
| size_t BLI_str_rpartition_utf8 | ( | const char * | str, |
| const uint | delim[], | ||
| const char ** | r_sep, | ||
| const char ** | r_suf ) |
Definition at line 997 of file string_utf8.cc.
References BLI_str_partition_ex_utf8(), and str.
| size_t BLI_str_utf32_as_utf8 | ( | char *__restrict | dst, |
| const char32_t *__restrict | src, | ||
| const size_t | dst_maxncpy ) |
Definition at line 919 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_from_unicode(), BLI_string_debug_size, len, and UNLIKELY.
| size_t BLI_str_utf32_as_utf8_len | ( | const char32_t * | src | ) |
Definition at line 950 of file string_utf8.cc.
References BLI_str_utf8_from_unicode_len(), and len.
Referenced by BKE_vfont_clipboard_set(), and ED_curve_editfont_load().
| size_t BLI_str_utf32_as_utf8_len_ex | ( | const char32_t * | src, |
| size_t | src_maxlen ) |
Definition at line 938 of file string_utf8.cc.
References BLI_str_utf8_from_unicode_len(), and len.
Referenced by font_select_to_buffer().
| char32_t BLI_str_utf32_char_to_lower | ( | char32_t | wc | ) |
Return the lowercase of a 32-bit character or the character when no case change is needed.
Definition at line 642 of file string_utf8.cc.
References ARRAY_SIZE, max, and min.
Referenced by set_case().
| char32_t BLI_str_utf32_char_to_upper | ( | char32_t | wc | ) |
Return the uppercase of a 32-bit character or the character when no case change is needed.
Definition at line 531 of file string_utf8.cc.
References ARRAY_SIZE, max, and min.
Referenced by set_case().
| uint BLI_str_utf8_as_unicode_or_error | ( | const char * | p | ) |
| p | a pointer to Unicode character encoded as UTF-8 |
Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does not point to a valid UTF-8 encoded character, results are undefined. If you are not sure that the bytes are complete valid Unicode characters, you should use g_utf8_get_char_validated() instead.
Return value: the resulting character
Definition at line 760 of file string_utf8.cc.
References BLI_UTF8_ERR, len, UNLIKELY, utf8_char_compute_skip_or_error_with_mask(), and utf8_char_decode().
Referenced by BLI_str_partition_ex_utf8(), BLI_str_utf8_as_unicode_safe(), BLI_str_utf8_char_width_or_error(), BLI_str_utf8_char_width_safe(), insert_text_invoke(), key_event_glyph_or_text(), text_autocomplete_build(), and text_insert_invoke().
| uint BLI_str_utf8_as_unicode_safe | ( | const char * | p | ) |
Definition at line 774 of file string_utf8.cc.
References BLI_str_utf8_as_unicode_or_error(), BLI_UTF8_ERR, result, and UNLIKELY.
Referenced by blender::string_search::extract_normalized_words(), and blender::string_search::get_fuzzy_match_errors().
| uint BLI_str_utf8_as_unicode_step_or_error | ( | const char *__restrict | p, |
| const size_t | p_len, | ||
| size_t *__restrict | index ) |
Definition at line 783 of file string_utf8.cc.
References BLI_assert, BLI_UTF8_ERR, len, result, UNLIKELY, utf8_char_compute_skip_or_error_with_mask(), and utf8_char_decode().
Referenced by BLI_str_partition_ex_utf8(), BLI_str_utf8_as_unicode_step_safe(), and BLI_str_utf8_as_utf32().
| uint BLI_str_utf8_as_unicode_step_safe | ( | const char *__restrict | p, |
| const size_t | p_len, | ||
| size_t *__restrict | index ) |
Definition at line 807 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_as_unicode_step_or_error(), BLI_UTF8_ERR, result, and UNLIKELY.
Referenced by BLI_str_utf8_offset_from_column(), BLI_str_utf8_offset_from_column_with_tabs(), BLI_str_utf8_offset_from_index(), BLI_str_utf8_offset_to_column(), BLI_str_utf8_offset_to_column_with_tabs(), and BLI_str_utf8_offset_to_index().
| size_t BLI_str_utf8_as_utf32 | ( | char32_t *__restrict | dst_w, |
| const char *__restrict | src_c, | ||
| const size_t | dst_w_maxncpy ) |
Definition at line 887 of file string_utf8.cc.
References BLI_assert, BLI_str_find_next_char_utf8(), BLI_str_utf8_as_unicode_step_or_error(), BLI_string_debug_size, BLI_UTF8_ERR, and len.
Referenced by BLI_strncpy_wchar_from_utf8().
| int BLI_str_utf8_char_width_or_error | ( | const char * | p | ) |
Definition at line 501 of file string_utf8.cc.
References BLI_str_utf8_as_unicode_or_error(), BLI_UTF8_ERR, and BLI_wcwidth_or_error().
Referenced by blf_str_offset_from_cursor_position(), BLI_str_cursor_step_next_utf8(), and BLI_str_cursor_step_prev_utf8().
| int BLI_str_utf8_char_width_safe | ( | const char * | p | ) |
Definition at line 511 of file string_utf8.cc.
References BLI_str_utf8_as_unicode_or_error(), BLI_UTF8_ERR, and BLI_wcwidth_safe().
Referenced by console_cursor_wrap_offset(), flatten_column_to_offset(), flatten_width(), space_text_cursor_set_to_pos_wrapped(), space_text_get_cursor_rel(), textview_wrap_offsets(), txt_wrap_move_bol(), and txt_wrap_move_eol().
| size_t BLI_str_utf8_from_unicode | ( | unsigned int | c, |
| char * | dst, | ||
| size_t | dst_maxncpy ) |
BLI_str_utf8_from_unicode:
| c | a Unicode character code |
| dst | output buffer, must have at least dst_maxncpy bytes of space. If the length required by c exceeds dst_maxncpy, the bytes available bytes will be zeroed and dst_maxncpy returned. |
Converts a single character to UTF-8.
Definition at line 861 of file string_utf8.cc.
References BLI_string_debug_size, len, UNLIKELY, and UTF8_VARS_FROM_CHAR32.
Referenced by BLI_str_utf32_as_utf8(), BLI_strncpy_wchar_as_utf8(), find_family_object(), blender::io::usd::make_safe_name(), txt_add_char_intern(), txt_extended_ascii_as_utf8(), txt_replace_char(), and wm_event_add_ghostevent().
| size_t BLI_str_utf8_from_unicode_len | ( | const uint | c | ) |
Definition at line 849 of file string_utf8.cc.
References len, and UTF8_VARS_FROM_CHAR32.
Referenced by BLI_str_utf32_as_utf8_len(), BLI_str_utf32_as_utf8_len_ex(), BLI_wstrlen_utf8(), and text_insert_invoke().
| ptrdiff_t BLI_str_utf8_invalid_byte | ( | const char * | str, |
| size_t | length ) |
Find first UTF-8 invalid byte in given str, of length bytes.
Definition at line 150 of file string_utf8.cc.
References ELEM, str, and utf8_char_compute_skip().
Referenced by BLI_str_utf8_invalid_strip(), and txt_extended_ascii_as_utf8().
| int BLI_str_utf8_invalid_strip | ( | char * | str, |
| size_t | length ) |
Remove any invalid UTF-8 byte (taking into account multi-bytes sequence of course).
Definition at line 285 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_invalid_byte(), and str.
Referenced by BKE_id_new_name_validate(), BKE_vfontdata_from_freetypefont(), id_name_final_build(), outputNumInput(), SEQ_edit_sequence_name_set(), TEST(), ui_textedit_end(), and wm_clipboard_text_get_ex().
| int BLI_str_utf8_offset_from_column | ( | const char * | str, |
| const size_t | str_len, | ||
| const int | column_target ) |
Definition at line 1105 of file string_utf8.cc.
References BLI_str_utf8_as_unicode_step_safe(), BLI_wcwidth_safe(), int, and str.
| int BLI_str_utf8_offset_from_column_with_tabs | ( | const char * | str, |
| const size_t | str_len, | ||
| const int | column_target, | ||
| const int | tab_width ) |
Definition at line 1138 of file string_utf8.cc.
References BLI_str_utf8_as_unicode_step_safe(), BLI_wcwidth_safe(), int, and str.
Referenced by text_convert_whitespace_exec(), textview_draw_string(), txt_move_down(), txt_move_up(), and txt_wrap_move_bol().
Return the byte offset in str from index_target.
| index_target | The unicode index, where multi-byte characters are counted once. There is no need to clamp this value, the index is logically clamped to BLI_strlen_utf8(str) or below. |
Definition at line 1077 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_as_unicode_step_safe(), int, str, and UNUSED_VARS.
Referenced by blender::nodes::node_geo_string_to_curves_cc::get_text_layout(), blender::nodes::node_fn_slice_string_cc::node_build_multi_function(), TEST(), and txt_sel_set().
| int BLI_str_utf8_offset_to_column | ( | const char * | str, |
| const size_t | str_len, | ||
| const int | offset_target ) |
Definition at line 1091 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_as_unicode_step_safe(), BLI_wcwidth_safe(), and str.
| int BLI_str_utf8_offset_to_column_with_tabs | ( | const char * | str, |
| const size_t | str_len, | ||
| const int | offset_target, | ||
| const int | tab_width ) |
Definition at line 1120 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_as_unicode_step_safe(), BLI_wcwidth_safe(), and str.
Referenced by text_convert_whitespace_exec(), textview_draw_sel(), txt_move_down(), and txt_move_up().
| int BLI_str_utf8_offset_to_index | ( | const char * | str, |
| const size_t | str_len, | ||
| const int | offset_target ) |
Definition at line 1060 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_as_unicode_step_safe(), str, and UNUSED_VARS.
Referenced by text_jump_to_file_at_point_exec().
| int BLI_str_utf8_size_or_error | ( | const char * | p | ) |
Definition at line 750 of file string_utf8.cc.
References utf8_char_compute_skip_or_error().
Referenced by handleNumInput(), ui_do_but_textedit(), ui_handle_menu_letter_press_search(), wm_event_add_ghostevent(), WM_event_print(), and WM_event_utf8_to_ascii().
| int BLI_str_utf8_size_safe | ( | const char * | p | ) |
Use when we want to skip errors.
Definition at line 755 of file string_utf8.cc.
References utf8_char_compute_skip().
Referenced by BLI_strlen_utf8_ex(), BLI_strnlen_utf8_ex(), console_cursor_wrap_offset(), console_indent_or_autocomplete_exec(), console_insert_invoke(), flatten_column_to_offset(), flatten_string(), flatten_width(), blender::string_search::get_fuzzy_match_errors(), space_text_cursor_set_to_pos_wrapped(), space_text_get_cursor_rel(), text_convert_whitespace_exec(), text_font_draw_character_utf8(), text_format_fill(), text_insert_invoke(), textview_wrap_offsets(), txt_wrap_move_bol(), txt_wrap_move_eol(), txtfmt_glsl_format_line(), txtfmt_osl_format_line(), txtfmt_pov_format_line(), txtfmt_pov_ini_format_line(), txtfmt_py_format_line(), and ui_text_clip_cursor().
| size_t BLI_strlen_utf8 | ( | const char * | strc | ) |
Definition at line 421 of file string_utf8.cc.
References BLI_strlen_utf8_ex().
Referenced by blo_do_versions_260(), insert_text_exec(), key_event_glyph_or_text(), blender::nodes::node_fn_string_length_cc::node_build_multi_function(), radial_control_paint_cursor(), TEST(), txt_sel_set(), ui_but_text_password_hide(), and blender::ed::object::voxel_size_edit_draw().
| size_t BLI_strlen_utf8_ex | ( | const char * | strc, |
| size_t * | r_len_bytes ) |
Definition at line 396 of file string_utf8.cc.
References BLI_str_utf8_size_safe(), len, and UNLIKELY.
Referenced by BLI_strlen_utf8().
| char * BLI_strncpy_utf8 | ( | char *__restrict | dst, |
| const char *__restrict | src, | ||
| size_t | dst_maxncpy ) |
Definition at line 343 of file string_utf8.cc.
References BLI_assert, BLI_string_debug_size, and str_utf8_copy_max_bytes_impl().
| size_t BLI_strncpy_utf8_rlen | ( | char *__restrict | dst, |
| const char *__restrict | src, | ||
| size_t | dst_maxncpy ) |
Definition at line 352 of file string_utf8.cc.
References BLI_assert, BLI_string_debug_size, and str_utf8_copy_max_bytes_impl().
| size_t BLI_strncpy_wchar_as_utf8 | ( | char *__restrict | dst, |
| const wchar_t *__restrict | src, | ||
| const size_t | dst_maxncpy ) |
Definition at line 366 of file string_utf8.cc.
References BLI_assert, BLI_str_utf8_from_unicode(), BLI_string_debug_size, len, and UNLIKELY.
| size_t BLI_strncpy_wchar_from_utf8 | ( | wchar_t *__restrict | dst_w, |
| const char *__restrict | src_c, | ||
| const size_t | dst_w_maxncpy ) |
Definition at line 461 of file string_utf8.cc.
References BLI_str_utf8_as_utf32(), BLI_string_debug_size, and conv_utf_8_to_16().
| size_t BLI_strnlen_utf8 | ( | const char * | strc, |
| size_t | strc_maxlen ) |
| strc | the string to measure the length. |
| strc_maxlen | the string length (in bytes) |
Definition at line 455 of file string_utf8.cc.
References BLI_strnlen_utf8_ex().
Referenced by blender::string_search::count_utf8_code_points(), TEST(), and ui_text_position_to_hidden().
| size_t BLI_strnlen_utf8_ex | ( | const char * | strc, |
| const size_t | strc_maxlen, | ||
| size_t * | r_len_bytes ) |
Definition at line 427 of file string_utf8.cc.
References BLI_str_utf8_size_safe(), len, and UNLIKELY.
Referenced by BLI_strnlen_utf8().
| int BLI_wcswidth_or_error | ( | const char32_t * | pwcs, |
| size_t | n ) |
Definition at line 496 of file string_utf8.cc.
| int BLI_wcwidth_or_error | ( | char32_t | ucs | ) |
Count columns that character/string occupies (based on wcwidth.co).
Definition at line 478 of file string_utf8.cc.
Referenced by blf_glyph_render(), BLI_str_cursor_step_next_utf32(), BLI_str_cursor_step_prev_utf32(), BLI_str_utf8_char_width_or_error(), and BLI_wcwidth_safe().
| int BLI_wcwidth_safe | ( | char32_t | ucs | ) |
Definition at line 487 of file string_utf8.cc.
References BLI_wcwidth_or_error().
Referenced by blf_font_draw_mono(), BLI_str_utf8_char_width_safe(), BLI_str_utf8_offset_from_column(), BLI_str_utf8_offset_from_column_with_tabs(), BLI_str_utf8_offset_to_column(), and BLI_str_utf8_offset_to_column_with_tabs().
| size_t BLI_wstrlen_utf8 | ( | const wchar_t * | src | ) |
wchar_t length in UTF-8. Definition at line 385 of file string_utf8.cc.
References BLI_str_utf8_from_unicode_len(), and len.
| BLI_INLINE char * str_utf8_copy_max_bytes_impl | ( | char * | dst, |
| const char * | src, | ||
| size_t | dst_maxncpy ) |
Internal utility for implementing BLI_strncpy_utf8 / BLI_strncpy_utf8_rlen.
Compatible with BLI_strncpy, but ensure no partial UTF8 chars.
Definition at line 318 of file string_utf8.cc.
References ATTR_FALLTHROUGH, UNLIKELY, and utf8_char_compute_skip().
Referenced by BLI_strncpy_utf8(), and BLI_strncpy_utf8_rlen().
| BLI_INLINE int utf8_char_compute_skip | ( | const char | c | ) |
Definition at line 56 of file string_utf8.cc.
References UNLIKELY.
Referenced by BLI_str_utf8_invalid_byte(), BLI_str_utf8_size_safe(), and str_utf8_copy_max_bytes_impl().
| BLI_INLINE int utf8_char_compute_skip_or_error | ( | const char | c | ) |
Definition at line 78 of file string_utf8.cc.
Referenced by BLI_str_utf8_size_or_error().
| BLI_INLINE int utf8_char_compute_skip_or_error_with_mask | ( | const char | c, |
| char * | r_mask ) |
Definition at line 101 of file string_utf8.cc.
Referenced by BLI_str_utf8_as_unicode_or_error(), and BLI_str_utf8_as_unicode_step_or_error().
| BLI_INLINE uint utf8_char_decode | ( | const char * | p, |
| const char | mask, | ||
| const int | len, | ||
| const uint | err ) |
Decode a UTF8 code-point, use in combination with utf8_char_compute_skip_or_error_with_mask.
Definition at line 134 of file string_utf8.cc.
References count, len, mask(), and result.
Referenced by BLI_str_utf8_as_unicode_or_error(), and BLI_str_utf8_as_unicode_step_or_error().