Blender V4.3
BLI_string_utf8.h File Reference
#include "BLI_compiler_attrs.h"
#include "BLI_sys_types.h"

Go to the source code of this file.

Macros

#define BLI_UTF8_MAX   6
 
#define BLI_UTF8_WIDTH_MAX   2 /* columns */
 
#define BLI_UTF8_ERR   ((unsigned int)-1)
 
String Copy/Format Macros

Avoid repeating destination with sizeof(..).

Note
ARRAY_SIZE allows pointers on some platforms.
#define STRNCPY_UTF8(dst, src)   BLI_strncpy_utf8(dst, src, ARRAY_SIZE(dst))
 
#define STRNCPY_UTF8_RLEN(dst, src)   BLI_strncpy_utf8_rlen(dst, src, ARRAY_SIZE(dst))
 

Functions

char * BLI_strncpy_utf8 (char *__restrict dst, const char *__restrict src, size_t dst_maxncpy) ATTR_NONNULL(1
 
char size_t BLI_strncpy_utf8_rlen (char *__restrict dst, const char *__restrict src, size_t dst_maxncpy) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
 
char size_t ptrdiff_t BLI_str_utf8_invalid_byte (const char *str, size_t length) ATTR_NONNULL(1)
 
int BLI_str_utf8_invalid_strip (char *str, size_t length) ATTR_NONNULL(1)
 
int BLI_str_utf8_size_or_error (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_size_safe (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
unsigned int BLI_str_utf8_as_unicode_or_error (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
unsigned int BLI_str_utf8_as_unicode_safe (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
unsigned int BLI_str_utf8_as_unicode_step_safe (const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
 
unsigned int unsigned int BLI_str_utf8_as_unicode_step_or_error (const char *__restrict p, size_t p_len, size_t *__restrict index) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
 
unsigned int unsigned int size_t BLI_str_utf8_from_unicode_len (unsigned int c) ATTR_WARN_UNUSED_RESULT
 
size_t BLI_str_utf8_from_unicode (unsigned int c, char *dst, size_t dst_maxncpy) ATTR_NONNULL(2)
 
size_t BLI_str_utf8_as_utf32 (char32_t *__restrict dst_w, const char *__restrict src_c, size_t dst_w_maxncpy) ATTR_NONNULL(1
 
size_t size_t BLI_str_utf32_as_utf8 (char *__restrict dst, const char32_t *__restrict src, size_t dst_maxncpy) ATTR_NONNULL(1
 
size_t size_t size_t BLI_str_utf32_as_utf8_len_ex (const char32_t *src, size_t src_maxlen) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
size_t BLI_str_utf32_as_utf8_len (const char32_t *src) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
const char * BLI_str_find_prev_char_utf8 (const char *p, const char *str_start) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
 
const char const char * BLI_str_find_next_char_utf8 (const char *p, const char *str_end) ATTR_WARN_UNUSED_RESULT ATTR_RETURNS_NONNULL ATTR_NONNULL(1
 
const char const char size_t BLI_wstrlen_utf8 (const wchar_t *src) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT
 
size_t BLI_strlen_utf8_ex (const char *strc, size_t *r_len_bytes) ATTR_NONNULL(1
 
size_t BLI_strlen_utf8 (const char *strc) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT
 
size_t BLI_strnlen_utf8_ex (const char *strc, size_t strc_maxlen, size_t *r_len_bytes) ATTR_NONNULL(1
 
size_t size_t BLI_strnlen_utf8 (const char *strc, size_t strc_maxlen) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT
 
size_t BLI_strncpy_wchar_as_utf8 (char *__restrict dst, const wchar_t *__restrict src, size_t dst_maxncpy) ATTR_NONNULL(1
 
size_t size_t BLI_strncpy_wchar_from_utf8 (wchar_t *__restrict dst_w, const char *__restrict src_c, size_t dst_w_maxncpy) ATTR_NONNULL(1
 
size_t size_t int BLI_wcwidth_or_error (char32_t ucs) ATTR_WARN_UNUSED_RESULT
 
int BLI_wcwidth_safe (char32_t ucs) ATTR_WARN_UNUSED_RESULT
 
int BLI_wcswidth_or_error (const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
char32_t BLI_str_utf32_char_to_upper (char32_t wc)
 
char32_t BLI_str_utf32_char_to_lower (char32_t wc)
 
int BLI_str_utf8_char_width_or_error (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_char_width_safe (const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
size_t BLI_str_partition_utf8 (const char *str, const unsigned int delim[], const char **r_sep, const char **r_suf) ATTR_NONNULL(1
 
size_t size_t BLI_str_rpartition_utf8 (const char *str, const unsigned int delim[], const char **r_sep, const char **r_suf) ATTR_NONNULL(1
 
size_t size_t size_t BLI_str_partition_ex_utf8 (const char *str, const char *end, const unsigned int delim[], const char **r_sep, const char **r_suf, bool from_right) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
 
size_t size_t size_t int BLI_str_utf8_offset_to_index (const char *str, size_t str_len, int offset_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_offset_from_index (const char *str, size_t str_len, int index_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_offset_to_column (const char *str, size_t str_len, int offset_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_offset_from_column (const char *str, size_t str_len, int column_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_offset_to_column_with_tabs (const char *str, size_t str_len, int offset_target, int tab_width) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 
int BLI_str_utf8_offset_from_column_with_tabs (const char *str, size_t str_len, int column_target, int tab_width) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
 

Variables

size_t ATTR_WARN_UNUSED_RESULT
 

Macro Definition Documentation

◆ BLI_UTF8_ERR

◆ BLI_UTF8_MAX

#define BLI_UTF8_MAX   6

◆ BLI_UTF8_WIDTH_MAX

#define BLI_UTF8_WIDTH_MAX   2 /* columns */

Definition at line 245 of file BLI_string_utf8.h.

Referenced by textview_wrap_offsets().

◆ STRNCPY_UTF8

#define STRNCPY_UTF8 ( dst,
src )   BLI_strncpy_utf8(dst, src, ARRAY_SIZE(dst))

Definition at line 254 of file BLI_string_utf8.h.

Referenced by action_groups_add_new(), ANIM_armature_bonecoll_name_set(), ANIM_bonecoll_new(), arg_handle_engine_set(), asset_metadata_tag_add(), BKE_attribute_rename(), BKE_gpencil_layer_addnew(), BKE_id_new_name_validate(), BKE_keyblock_add(), BKE_keyingset_add(), BKE_object_defgroup_set_name(), BKE_preferences_asset_library_name_set(), BKE_preferences_extension_repo_name_set(), BKE_shaderfx_new(), BKE_view_layer_add(), BKE_view_layer_add_aov(), BKE_view_layer_add_lightgroup(), BKE_view_layer_rename(), BKE_view_layer_rename_lightgroup(), blo_do_versions_userdef(), boid_new_rule(), calculatePropRatio(), blender::animrig::ChannelBag::channel_group_create(), blender::animrig::convert_to_layered_action(), CustomData_set_layer_unique_name(), driver_add_new_variable(), dynamicPaintSurface_setUniqueName(), ED_info_draw_stats(), ED_node_shader_default(), blender::init_data(), blender::ed::asset::index::init_indexer_entry_from_value(), blender::animrig::Action::layer_add(), blender::bke::greasepencil::convert::legacy_object_modifier_common(), blender::ed::object::modifier_add(), modifier_allocate_and_init(), blender::bke::node_init(), ntreeCompositOutputFileAddSocket(), ntreeCompositOutputFileSetLayer(), ntreeCompositOutputFileSetPath(), blender::ed::object::object_speaker_add_exec(), preferences_extension_repo_add_exec(), proj_paint_add_slot(), SEQ_ensure_unique_name(), blender::ed::object::shaderfx_add(), blender::animrig::Action::slot_add(), blender::animrig::Action::slot_name_define(), blender::animrig::Action::slot_name_propagate(), blender::animrig::tests::TEST_F(), ui_but_anim_expression_create(), ui_but_anim_expression_set(), blender::ed::space_node::ui_node_menu_column(), version_node_add_empty(), view_layer_add(), WM_dropboxmap_find(), WM_keyconfig_new(), and wm_keymap_new().

◆ STRNCPY_UTF8_RLEN

#define STRNCPY_UTF8_RLEN ( dst,
src )   BLI_strncpy_utf8_rlen(dst, src, ARRAY_SIZE(dst))

Definition at line 255 of file BLI_string_utf8.h.

Function Documentation

◆ BLI_str_find_next_char_utf8()

const char const char * BLI_str_find_next_char_utf8 ( const char * p,
const char * str_end )
Parameters
pa pointer to a position within a UTF-8 encoded string
str_enda pointer to the byte following the end of the string.

Finds the start of the next UTF-8 character in the string after p

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

Returns
a pointer to the found character or a pointer to the null terminating character '\0'.

Referenced by BLI_str_cursor_step_next_utf8(), ui_text_clip_give_next_off(), and ui_text_position_from_hidden().

◆ BLI_str_find_prev_char_utf8()

const char * BLI_str_find_prev_char_utf8 ( const char * p,
const char * str_start )

BLI_str_find_prev_char_utf8:

Parameters
ppointer to some position within str
str_startpointer to the beginning of a UTF-8 encoded string

Given a position p with a UTF-8 encoded string str, find the start of the previous UTF-8 character starting before. p Returns str_start if no UTF-8 characters are present in str_start before p.

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

Returns
A pointer to the found character.

Referenced by blf_font_width_to_rstrlen(), BLI_str_cursor_step_prev_utf8(), text_delete_exec(), txt_wrap_move_eol(), ui_text_clip_cursor(), ui_text_clip_give_prev_off(), ui_text_clip_right_label(), and unit_find_str().

◆ BLI_str_partition_ex_utf8()

size_t size_t size_t BLI_str_partition_ex_utf8 ( const char * str,
const char * end,
const unsigned int delim[],
const char ** r_sep,
const char ** r_suf,
bool from_right )

◆ BLI_str_partition_utf8()

size_t BLI_str_partition_utf8 ( const char * str,
const unsigned int delim[],
const char ** r_sep,
const char ** r_suf )

Referenced by TEST().

◆ BLI_str_rpartition_utf8()

size_t size_t BLI_str_rpartition_utf8 ( const char * str,
const unsigned int delim[],
const char ** r_sep,
const char ** r_suf )

Referenced by TEST().

◆ BLI_str_utf32_as_utf8()

size_t size_t BLI_str_utf32_as_utf8 ( char *__restrict dst,
const char32_t *__restrict src,
size_t dst_maxncpy )

◆ BLI_str_utf32_as_utf8_len()

size_t BLI_str_utf32_as_utf8_len ( const char32_t * src)
Returns
The UTF-32 len in UTF-8.

Definition at line 950 of file string_utf8.cc.

References BLI_str_utf8_from_unicode_len(), and len.

Referenced by BKE_vfont_clipboard_set(), and ED_curve_editfont_load().

◆ BLI_str_utf32_as_utf8_len_ex()

size_t size_t size_t BLI_str_utf32_as_utf8_len_ex ( const char32_t * src,
size_t src_maxlen )
Returns
The UTF-32 len in UTF-8 with a clamped length.

Definition at line 938 of file string_utf8.cc.

References BLI_str_utf8_from_unicode_len(), and len.

Referenced by font_select_to_buffer().

◆ BLI_str_utf32_char_to_lower()

char32_t BLI_str_utf32_char_to_lower ( char32_t wc)

Return the lowercase of a 32-bit character or the character when no case change is needed.

Note
A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases.

Definition at line 642 of file string_utf8.cc.

References ARRAY_SIZE, max, and min.

Referenced by set_case().

◆ BLI_str_utf32_char_to_upper()

char32_t BLI_str_utf32_char_to_upper ( char32_t wc)

Return the uppercase of a 32-bit character or the character when no case change is needed.

Note
A 1:1 mapping doesn't account for multiple characters as part of conversion in some cases.

Definition at line 531 of file string_utf8.cc.

References ARRAY_SIZE, max, and min.

Referenced by set_case().

◆ BLI_str_utf8_as_unicode_or_error()

unsigned int BLI_str_utf8_as_unicode_or_error ( const char * p)
Parameters
pa pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does not point to a valid UTF-8 encoded character, results are undefined. If you are not sure that the bytes are complete valid Unicode characters, you should use g_utf8_get_char_validated() instead.

Return value: the resulting character

Definition at line 760 of file string_utf8.cc.

References BLI_UTF8_ERR, len, UNLIKELY, utf8_char_compute_skip_or_error_with_mask(), and utf8_char_decode().

Referenced by BLI_str_partition_ex_utf8(), BLI_str_utf8_as_unicode_safe(), BLI_str_utf8_char_width_or_error(), BLI_str_utf8_char_width_safe(), insert_text_invoke(), key_event_glyph_or_text(), text_autocomplete_build(), and text_insert_invoke().

◆ BLI_str_utf8_as_unicode_safe()

unsigned int BLI_str_utf8_as_unicode_safe ( const char * p)

◆ BLI_str_utf8_as_unicode_step_or_error()

unsigned int unsigned int BLI_str_utf8_as_unicode_step_or_error ( const char *__restrict p,
size_t p_len,
size_t *__restrict index )

UTF8 decoding that steps over the index (unless an error is encountered).

Parameters
pThe text to step over.
p_lenThe length of p.
indexIndex of p to step over.
Returns
the code-point or BLI_UTF8_ERR if there is a decoding error.
Note
The behavior for clipped text (where p_len limits decoding trailing bytes) must have the same behavior is encountering a nil byte, so functions that only use the first part of a string has matching behavior to functions that null terminate the text.

Referenced by cursor_delim_type_utf8().

◆ BLI_str_utf8_as_unicode_step_safe()

unsigned int BLI_str_utf8_as_unicode_step_safe ( const char *__restrict p,
size_t p_len,
size_t *__restrict index )

UTF8 decoding that steps over the index. When an error is encountered fall back to LATIN1, stepping over a single byte.

Parameters
pThe text to step over.
p_lenThe length of p.
indexIndex of p to step over.
Returns
the code-point (p + *index) if there is a decoding error.

Referenced by blf_glyph_from_utf8_and_step(), blender::string_search::damerau_levenshtein_distance(), blender::string_search::extract_normalized_words(), blender::string_search::match_word_initials(), text_autocomplete_build(), text_insert_exec(), txt_insert_buf(), txt_replace_char(), and utf8_as_char32().

◆ BLI_str_utf8_as_utf32()

size_t BLI_str_utf8_as_utf32 ( char32_t *__restrict dst_w,
const char *__restrict src_c,
size_t dst_w_maxncpy )

◆ BLI_str_utf8_char_width_or_error()

int BLI_str_utf8_char_width_or_error ( const char * p)

◆ BLI_str_utf8_char_width_safe()

◆ BLI_str_utf8_from_unicode()

size_t BLI_str_utf8_from_unicode ( unsigned int c,
char * dst,
size_t dst_maxncpy )

BLI_str_utf8_from_unicode:

Parameters
ca Unicode character code
dstoutput buffer, must have at least dst_maxncpy bytes of space. If the length required by c exceeds dst_maxncpy, the bytes available bytes will be zeroed and dst_maxncpy returned.

Converts a single character to UTF-8.

Returns
number of bytes written.

Definition at line 861 of file string_utf8.cc.

References BLI_string_debug_size, len, UNLIKELY, and UTF8_VARS_FROM_CHAR32.

Referenced by BLI_str_utf32_as_utf8(), BLI_strncpy_wchar_as_utf8(), find_family_object(), blender::io::usd::make_safe_name(), txt_add_char_intern(), txt_extended_ascii_as_utf8(), txt_replace_char(), and wm_event_add_ghostevent().

◆ BLI_str_utf8_from_unicode_len()

unsigned int unsigned int size_t BLI_str_utf8_from_unicode_len ( unsigned int c)

◆ BLI_str_utf8_invalid_byte()

char size_t ptrdiff_t BLI_str_utf8_invalid_byte ( const char * str,
size_t length )

Find first UTF-8 invalid byte in given str, of length bytes.

Returns
the offset of the first invalid byte.

Definition at line 150 of file string_utf8.cc.

References ELEM, str, and utf8_char_compute_skip().

Referenced by BLI_str_utf8_invalid_strip(), and txt_extended_ascii_as_utf8().

◆ BLI_str_utf8_invalid_strip()

int BLI_str_utf8_invalid_strip ( char * str,
size_t length )

Remove any invalid UTF-8 byte (taking into account multi-bytes sequence of course).

Returns
number of stripped bytes.

Definition at line 285 of file string_utf8.cc.

References BLI_assert, BLI_str_utf8_invalid_byte(), and str.

Referenced by BKE_id_new_name_validate(), BKE_vfontdata_from_freetypefont(), id_name_final_build(), outputNumInput(), SEQ_edit_sequence_name_set(), TEST(), ui_textedit_end(), and wm_clipboard_text_get_ex().

◆ BLI_str_utf8_offset_from_column()

int BLI_str_utf8_offset_from_column ( const char * str,
size_t str_len,
int column_target )

Definition at line 1105 of file string_utf8.cc.

References BLI_str_utf8_as_unicode_step_safe(), BLI_wcwidth_safe(), int, and str.

◆ BLI_str_utf8_offset_from_column_with_tabs()

int BLI_str_utf8_offset_from_column_with_tabs ( const char * str,
size_t str_len,
int column_target,
int tab_width )

◆ BLI_str_utf8_offset_from_index()

int BLI_str_utf8_offset_from_index ( const char * str,
size_t str_len,
int index_target )

Return the byte offset in str from index_target.

Parameters
index_targetThe unicode index, where multi-byte characters are counted once. There is no need to clamp this value, the index is logically clamped to BLI_strlen_utf8(str) or below.

Definition at line 1077 of file string_utf8.cc.

References BLI_assert, BLI_str_utf8_as_unicode_step_safe(), int, str, and UNUSED_VARS.

Referenced by blender::nodes::node_geo_string_to_curves_cc::get_text_layout(), blender::nodes::node_fn_slice_string_cc::node_build_multi_function(), TEST(), and txt_sel_set().

◆ BLI_str_utf8_offset_to_column()

int BLI_str_utf8_offset_to_column ( const char * str,
size_t str_len,
int offset_target )

◆ BLI_str_utf8_offset_to_column_with_tabs()

int BLI_str_utf8_offset_to_column_with_tabs ( const char * str,
size_t str_len,
int offset_target,
int tab_width )

◆ BLI_str_utf8_offset_to_index()

size_t size_t size_t int BLI_str_utf8_offset_to_index ( const char * str,
size_t str_len,
int offset_target )

◆ BLI_str_utf8_size_or_error()

int BLI_str_utf8_size_or_error ( const char * p)
Returns
The size (in bytes) of a single UTF-8 char.
Warning
Can return -1 on bad chars.

Definition at line 750 of file string_utf8.cc.

References utf8_char_compute_skip_or_error().

Referenced by handleNumInput(), ui_do_but_textedit(), ui_handle_menu_letter_press_search(), wm_event_add_ghostevent(), WM_event_print(), and WM_event_utf8_to_ascii().

◆ BLI_str_utf8_size_safe()

◆ BLI_strlen_utf8()

◆ BLI_strlen_utf8_ex()

size_t BLI_strlen_utf8_ex ( const char * strc,
size_t * r_len_bytes )

◆ BLI_strncpy_utf8()

◆ BLI_strncpy_utf8_rlen()

char size_t BLI_strncpy_utf8_rlen ( char *__restrict dst,
const char *__restrict src,
size_t dst_maxncpy )

Referenced by BLI_uniquename_cb().

◆ BLI_strncpy_wchar_as_utf8()

size_t BLI_strncpy_wchar_as_utf8 ( char *__restrict dst,
const wchar_t *__restrict src,
size_t dst_maxncpy )

◆ BLI_strncpy_wchar_from_utf8()

size_t size_t BLI_strncpy_wchar_from_utf8 ( wchar_t *__restrict dst_w,
const char *__restrict src_c,
size_t dst_w_maxncpy )

Referenced by fsmenu_read_system().

◆ BLI_strnlen_utf8()

size_t size_t BLI_strnlen_utf8 ( const char * strc,
size_t strc_maxlen )
Parameters
strcthe string to measure the length.
strc_maxlenthe string length (in bytes)
Returns
the unicode length (not in bytes!)

Definition at line 455 of file string_utf8.cc.

References BLI_strnlen_utf8_ex().

Referenced by blender::string_search::count_utf8_code_points(), TEST(), and ui_text_position_to_hidden().

◆ BLI_strnlen_utf8_ex()

size_t BLI_strnlen_utf8_ex ( const char * strc,
size_t strc_maxlen,
size_t * r_len_bytes )

Referenced by ui_textedit_insert_buf().

◆ BLI_wcswidth_or_error()

int BLI_wcswidth_or_error ( const char32_t * pwcs,
size_t n )

Definition at line 496 of file string_utf8.cc.

◆ BLI_wcwidth_or_error()

size_t size_t int BLI_wcwidth_or_error ( char32_t ucs)

Count columns that character/string occupies (based on wcwidth.co).

Definition at line 478 of file string_utf8.cc.

Referenced by blf_glyph_render(), BLI_str_cursor_step_next_utf32(), BLI_str_cursor_step_prev_utf32(), BLI_str_utf8_char_width_or_error(), and BLI_wcwidth_safe().

◆ BLI_wcwidth_safe()

◆ BLI_wstrlen_utf8()

const char const char size_t BLI_wstrlen_utf8 ( const wchar_t * src)
Returns
the wchar_t length in UTF-8.

Definition at line 385 of file string_utf8.cc.

References BLI_str_utf8_from_unicode_len(), and len.

Variable Documentation

◆ ATTR_WARN_UNUSED_RESULT

size_t ATTR_WARN_UNUSED_RESULT

Definition at line 156 of file BLI_string_utf8.h.