Blender V4.3
utfconv.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2012 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include "utfconv.hh"
10
11size_t count_utf_8_from_16(const wchar_t *string16)
12{
13 int i;
14 size_t count = 0;
15 wchar_t u = 0;
16 if (!string16) {
17 return 0;
18 }
19
20 for (i = 0; (u = string16[i]); i++) {
21 if (u < 0x0080) {
22 count += 1;
23 }
24 else {
25 if (u < 0x0800) {
26 count += 2;
27 }
28 else {
29 if (u < 0xD800) {
30 count += 3;
31 }
32 else {
33 if (u < 0xDC00) {
34 i++;
35 if ((u = string16[i]) == 0) {
36 break;
37 }
38 if (u >= 0xDC00 && u < 0xE000) {
39 count += 4;
40 }
41 }
42 else {
43 if (u < 0xE000) {
44 /* Illegal. */
45 }
46 else {
47 count += 3;
48 }
49 }
50 }
51 }
52 }
53 }
54
55 return ++count;
56}
57
58size_t count_utf_16_from_8(const char *string8)
59{
60 size_t count = 0;
61 char u;
62 char type = 0;
63 unsigned int u32 = 0;
64
65 if (!string8) {
66 return 0;
67 }
68
69 for (; (u = *string8); string8++) {
70 if (type == 0) {
71 if ((u & 0x01 << 7) == 0) {
72 count++;
73 u32 = 0;
74 continue;
75 } // 1 utf-8 char
76 if ((u & 0x07 << 5) == 0xC0) {
77 type = 1;
78 u32 = u & 0x1F;
79 continue;
80 } // 2 utf-8 char
81 if ((u & 0x0F << 4) == 0xE0) {
82 type = 2;
83 u32 = u & 0x0F;
84 continue;
85 } // 3 utf-8 char
86 if ((u & 0x1F << 3) == 0xF0) {
87 type = 3;
88 u32 = u & 0x07;
89 continue;
90 } // 4 utf-8 char
91 continue;
92 }
93 if ((u & 0xC0) == 0x80) {
94 u32 = (u32 << 6) | (u & 0x3F);
95 type--;
96 }
97 else {
98 u32 = 0;
99 type = 0;
100 }
101
102 if (type == 0) {
103 if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
104 count++;
105 }
106 else if (0x10000 <= u32 && u32 < 0x110000) {
107 count += 2;
108 }
109 u32 = 0;
110 }
111 }
112
113 return ++count;
114}
115
116int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
117{
118 char *out8end = out8 + size8;
119 wchar_t u = 0;
120 int err = 0;
121 if (!size8 || !in16 || !out8) {
122 return UTF_ERROR_NULL_IN;
123 }
124 out8end--;
125
126 for (; out8 < out8end && (u = *in16); in16++, out8++) {
127 if (u < 0x0080) {
128 *out8 = u;
129 }
130 else if (u < 0x0800) {
131 if (out8 + 1 >= out8end) {
132 break;
133 }
134 *out8++ = (0x3 << 6) | (0x1F & (u >> 6));
135 *out8 = (0x1 << 7) | (0x3F & (u));
136 }
137 else if (u < 0xD800 || u >= 0xE000) {
138 if (out8 + 2 >= out8end) {
139 break;
140 }
141 *out8++ = (0x7 << 5) | (0xF & (u >> 12));
142 *out8++ = (0x1 << 7) | (0x3F & (u >> 6));
143 *out8 = (0x1 << 7) | (0x3F & (u));
144 }
145 else if (u < 0xDC00) {
146 wchar_t u2 = *++in16;
147
148 if (!u2) {
149 break;
150 }
151 if (u2 >= 0xDC00 && u2 < 0xE000) {
152 if (out8 + 3 >= out8end) {
153 break;
154 }
155 unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
156
157 *out8++ = (0xF << 4) | (0x7 & (uc >> 18));
158 *out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
159 *out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
160 *out8 = (0x1 << 7) | (0x3F & (uc));
161 }
162 else {
163 out8--;
164 err |= UTF_ERROR_ILLCHAR;
165 }
166 }
167 else if (u < 0xE000) {
168 out8--;
169 err |= UTF_ERROR_ILLCHAR;
170 }
171 }
172
173 *out8 = *out8end = 0;
174
175 if (*in16) {
176 err |= UTF_ERROR_SMALL;
177 }
178
179 return err;
180}
181
182int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
183{
184 char u;
185 char type = 0;
186 unsigned int u32 = 0;
187 wchar_t *out16end = out16 + size16;
188 int err = 0;
189 if (!size16 || !in8 || !out16) {
190 return UTF_ERROR_NULL_IN;
191 }
192 out16end--;
193
194 for (; out16 < out16end && (u = *in8); in8++) {
195 if (type == 0) {
196 if ((u & 0x01 << 7) == 0) {
197 *out16 = u;
198 out16++;
199 u32 = 0;
200 continue;
201 } // 1 utf-8 char
202 if ((u & 0x07 << 5) == 0xC0) {
203 type = 1;
204 u32 = u & 0x1F;
205 continue;
206 } // 2 utf-8 char
207 if ((u & 0x0F << 4) == 0xE0) {
208 type = 2;
209 u32 = u & 0x0F;
210 continue;
211 } // 3 utf-8 char
212 if ((u & 0x1F << 3) == 0xF0) {
213 type = 3;
214 u32 = u & 0x07;
215 continue;
216 } // 4 utf-8 char
217 err |= UTF_ERROR_ILLCHAR;
218 continue;
219 }
220 if ((u & 0xC0) == 0x80) {
221 u32 = (u32 << 6) | (u & 0x3F);
222 type--;
223 }
224 else {
225 u32 = 0;
226 type = 0;
227 err |= UTF_ERROR_ILLSEQ;
228 }
229
230 if (type == 0) {
231 if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
232 *out16 = u32;
233 out16++;
234 }
235 else if (0x10000 <= u32 && u32 < 0x110000) {
236 if (out16 + 1 >= out16end) {
237 break;
238 }
239 u32 -= 0x10000;
240 *out16 = 0xD800 + (u32 >> 10);
241 out16++;
242 *out16 = 0xDC00 + (u32 & 0x3FF);
243 out16++;
244 }
245 u32 = 0;
246 }
247 }
248
249 *out16 = *out16end = 0;
250
251 if (*in8) {
252 err |= UTF_ERROR_SMALL;
253 }
254
255 return err;
256}
257
258/* UNUSED FUNCTIONS */
259#if 0
260static int is_ascii(const char *in8)
261{
262 for (; *in8; in8++)
263 if (0x80 & *in8)
264 return 0;
265
266 return 1;
267}
268
269static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
270{
271 char *cur = inout8 + maxcutpoint;
272 char cc;
273 if (!inout8)
274 return;
275
276 cc = *cur;
277}
278#endif
279
280char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
281{
282 size_t bsize = count_utf_8_from_16(in16);
283 char *out8 = NULL;
284 if (!bsize) {
285 return NULL;
286 }
287 out8 = (char *)malloc(sizeof(char) * (bsize + add));
288 conv_utf_16_to_8(in16, out8, bsize);
289 return out8;
290}
291
292wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
293{
294 size_t bsize = count_utf_16_from_8(in8);
295 wchar_t *out16 = NULL;
296 if (!bsize) {
297 return NULL;
298 }
299 out16 = (wchar_t *)malloc(sizeof(wchar_t) * (bsize + add));
300 conv_utf_8_to_16(in8, out16, bsize);
301 return out16;
302}
#define NULL
int count
static void add(blender::Map< std::string, std::string > &messages, Message &msg)
Definition msgfmt.cc:227
wchar_t * alloc_utf16_from_8(const char *in8, size_t add)
Definition utfconv.cc:292
size_t count_utf_8_from_16(const wchar_t *string16)
Definition utfconv.cc:11
char * alloc_utf_8_from_16(const wchar_t *in16, size_t add)
Definition utfconv.cc:280
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
Definition utfconv.cc:182
int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
Definition utfconv.cc:116
size_t count_utf_16_from_8(const char *string8)
Definition utfconv.cc:58
#define UTF_ERROR_ILLSEQ
Definition utfconv.hh:41
#define UTF_ERROR_ILLCHAR
Definition utfconv.hh:37
#define UTF_ERROR_NULL_IN
Definition utfconv.hh:35
#define UTF_ERROR_SMALL
Definition utfconv.hh:39