Blender V5.0
messages.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2009-2015 Artyom Beilis (Tonkikh)
2 * SPDX-FileCopyrightText: 2021-2023 Alexander Grund
3 * SPDX-FileCopyrightText: 2025 Blender Authors
4 *
5 * SPDX-License-Identifier: BSL-1.0
6 *
7 * Adapted from boost::locale */
8
12
13#include "messages.hh"
14
15#include <algorithm>
16#include <cstdint>
17#include <cstdio>
18#include <memory>
19#include <string>
20#include <string_view>
21
22#include "BLI_assert.h"
23#include "BLI_fileops.h"
24#include "BLI_hash.hh"
25#include "BLI_map.hh"
26#include "BLI_path_utils.hh"
27#include "BLI_string_ref.hh"
28#include "BLI_vector.hh"
29
30#ifdef _WIN32
31# include "BLI_winstuff.h"
32#endif
33
34#include "CLG_log.h"
35
36namespace blender::locale {
37
38static CLG_LogRef LOG = {"translation"};
39
40/* Upper/lower case, intentionally restricted to ASCII. */
41
42static constexpr bool is_upper_ascii(const char c)
43{
44 return 'A' <= c && c <= 'Z';
45}
46
47static constexpr bool is_lower_ascii(const char c)
48{
49 return 'a' <= c && c <= 'z';
50}
51
52static bool make_lower_ascii(char &c)
53{
54 if (is_upper_ascii(c)) {
55 c += 'a' - 'A';
56 return true;
57 }
58 return false;
59}
60
61static bool make_upper_ascii(char &c)
62{
63 if (is_lower_ascii(c)) {
64 c += 'A' - 'a';
65 return true;
66 }
67 return false;
68}
69
70static constexpr bool is_numeric_ascii(const char c)
71{
72 return '0' <= c && c <= '9';
73}
74
75/* Info about a locale. */
76
77class Info {
78 public:
79 std::string language = "C";
80 std::string script;
81 std::string country;
82 std::string variant;
83
84 Info(const StringRef locale_full_name)
85 {
86 std::string locale_name(locale_full_name);
87
88 /* If locale name not specified, try to get the appropriate one from the system. */
89#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
90 if (locale_name.empty()) {
91 locale_name = macos_user_locale();
92 }
93#endif
94
95 if (locale_name.empty()) {
96 const char *lc_all = BLI_getenv("LC_ALL");
97 if (lc_all) {
98 locale_name = lc_all;
99 }
100 }
101 if (locale_name.empty()) {
102 const char *lang = BLI_getenv("LANG");
103 if (lang) {
104 locale_name = lang;
105 }
106 }
107
108#ifdef _WIN32
109 if (locale_name.empty()) {
110 char buf[128] = {};
111 if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) {
112 locale_name = buf;
113 if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) {
114 std::string region = buf;
115 if (locale_name == "zh") {
116 if (region == "TW" || region == "HK" || region == "MO") {
117 /* Traditional for Taiwan, Hong Kong, Macau. */
118 locale_name += "_HANT";
119 }
120 else {
121 /* Simplified for all other areas. */
122 locale_name += "_HANS";
123 }
124 }
125 else {
126 locale_name += "_" + region;
127 }
128 }
129 }
130 }
131#endif
132
133 parse_from_lang(locale_name);
134 }
135
136 std::string to_full_name() const
137 {
138 std::string result = language;
139 if (!script.empty()) {
140 result += '_' + script;
141 }
142 if (!country.empty()) {
143 result += '_' + country;
144 }
145 if (!variant.empty()) {
146 result += '@' + variant;
147 }
148 return result;
149 }
150
151 private:
152 /* Locale parsing. */
153 bool parse_from_variant(const std::string_view input)
154 {
155 if (language == "C" || input.empty()) {
156 return false;
157 }
158 variant = input;
159 /* No assumptions, just make it lowercase. */
160 for (char &c : variant) {
162 }
163 return true;
164 }
165
166 bool parse_from_encoding(const std::string_view input)
167 {
168 const int64_t end = input.find_first_of('@');
169 std::string tmp(input.substr(0, end));
170 if (tmp.empty()) {
171 return false;
172 }
173 /* tmp contains encoding, we ignore it. */
174 if (end >= input.size()) {
175 return true;
176 }
177 BLI_assert(input[end] == '@');
178 return parse_from_variant(input.substr(end + 1));
179 }
180
181 bool parse_from_country(const std::string_view input)
182 {
183 if (language == "C") {
184 return false;
185 }
186
187 const int64_t end = input.find_first_of("@.");
188 std::string tmp(input.substr(0, end));
189 if (tmp.empty()) {
190 return false;
191 }
192
193 for (char &c : tmp) {
195 }
196
197 /* If it's ALL uppercase ASCII, assume ISO 3166 country id. */
198 if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) {
199 /* else handle special cases:
200 * - en_US_POSIX is an alias for C
201 * - M49 country code: 3 digits */
202 if (language == "en" && tmp == "US_POSIX") {
203 language = "C";
204 tmp.clear();
205 }
206 else if (tmp.size() != 3u ||
207 std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end())
208 {
209 return false;
210 }
211 }
212
213 country = tmp;
214 if (end >= input.size()) {
215 return true;
216 }
217 if (input[end] == '.') {
218 return parse_from_encoding(input.substr(end + 1));
219 }
220 BLI_assert(input[end] == '@');
221 return parse_from_variant(input.substr(end + 1));
222 }
223
224 bool parse_from_script(const std::string_view input)
225 {
226 const int64_t end = input.find_first_of("-_@.");
227 std::string tmp(input.substr(0, end));
228 /* Script is exactly 4 ASCII characters, otherwise it is not present. */
229 if (tmp.length() != 4) {
230 return parse_from_country(input);
231 }
232
233 for (char &c : tmp) {
234 if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
235 return parse_from_country(input);
236 }
237 }
238 make_upper_ascii(tmp[0]); /* Capitalize first letter only. */
239 script = tmp;
240
241 if (end >= input.size()) {
242 return true;
243 }
244 if (ELEM(input[end], '-', '_')) {
245 return parse_from_country(input.substr(end + 1));
246 }
247 if (input[end] == '.') {
248 return parse_from_encoding(input.substr(end + 1));
249 }
250 BLI_assert(input[end] == '@');
251 return parse_from_variant(input.substr(end + 1));
252 }
253
254 bool parse_from_lang(const std::string_view input)
255 {
256 const int64_t end = input.find_first_of("-_@.");
257 std::string tmp(input.substr(0, end));
258 if (tmp.empty()) {
259 return false;
260 }
261 for (char &c : tmp) {
262 if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
263 return false;
264 }
265 }
266 if (!ELEM(tmp, "c", "posix")) { /* Keep default if C or POSIX. */
267 language = tmp;
268 }
269
270 if (end >= input.size()) {
271 return true;
272 }
273 if (ELEM(input[end], '-', '_')) {
274 return parse_from_script(input.substr(end + 1));
275 }
276 if (input[end] == '.') {
277 return parse_from_encoding(input.substr(end + 1));
278 }
279 BLI_assert(input[end] == '@');
280 return parse_from_variant(input.substr(end + 1));
281 }
282};
283
284/* .mo file reader. */
285
286class MOFile {
287 uint32_t keys_offset_ = 0;
288 uint32_t translations_offset_ = 0;
289
290 Vector<char> data_;
291 bool native_byteorder_ = false;
292 size_t size_ = false;
293
294 std::string error_;
295
296 public:
297 MOFile(const std::string &filepath)
298 {
299 FILE *file = BLI_fopen(filepath.c_str(), "rb");
300 if (!file) {
301 return;
302 }
303
304 fseek(file, 0, SEEK_END);
305 const int64_t len = BLI_ftell(file);
306 if (len >= 0) {
307 fseek(file, 0, SEEK_SET);
308 data_.resize(len);
309 if (fread(data_.data(), 1, len, file) != len) {
310 data_.clear();
311 error_ = "Failed to read file";
312 }
313 }
314 else {
315 error_ = "Wrong file object";
316 }
317
318 fclose(file);
319
320 if (error_.empty()) {
321 read_data();
322 }
323 }
324
325 const char *key(int id)
326 {
327 const uint32_t off = get(keys_offset_ + id * 8 + 4);
328 return data_.data() + off;
329 }
330
332 {
333 const uint32_t len = get(translations_offset_ + id * 8);
334 const uint32_t off = get(translations_offset_ + id * 8 + 4);
335 if (len > data_.size() || off > data_.size() - len) {
336 error_ = "Bad mo-file format";
337 return "";
338 }
339 return StringRef(&data_[off], len);
340 }
341
342 size_t size() const
343 {
344 return size_;
345 }
346
347 bool empty() const
348 {
349 return size_ == 0;
350 }
351
352 const std::string &error() const
353 {
354 return error_;
355 }
356
357 private:
358 void read_data()
359 {
360 if (data_.size() < 4) {
361 error_ = "Invalid 'mo' file format - the file is too short";
362 return;
363 }
364
365 uint32_t magic;
366 memcpy(&magic, data_.data(), sizeof(magic));
367 if (magic == 0x950412de) {
368 native_byteorder_ = true;
369 }
370 else if (magic == 0xde120495) {
371 native_byteorder_ = false;
372 }
373 else {
374 error_ = "Invalid file format - invalid magic number";
375 return;
376 }
377
378 // Read all format sizes
379 size_ = get(8);
380 keys_offset_ = get(12);
381 translations_offset_ = get(16);
382 }
383
384 uint32_t get(int offset)
385 {
386 if (offset > data_.size() - 4) {
387 error_ = "Bad mo-file format";
388 return 0;
389 }
390 uint32_t v;
391 memcpy(&v, &data_[offset], 4);
392 if (!native_byteorder_) {
393 v = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) |
394 ((v & 0xFF000000) >> 24);
395 }
396
397 return v;
398 }
399};
400
401/* Message lookup key. */
402
406
408 {
409 return get_default_hash(this->context, this->str);
410 }
411};
412
414 std::string context;
415 std::string str;
416
418 {
419 const size_t pos = c.find(char(4));
420 if (pos == StringRef::not_found) {
421 this->str = c;
422 }
423 else {
424 this->context = c.substr(0, pos);
425 this->str = c.substr(pos + 1);
426 }
427 }
428
430 {
431 return get_default_hash(this->context, this->str);
432 }
433
434 static uint64_t hash_as(const MessageKeyRef &key)
435 {
436 return key.hash();
437 }
438};
439
440inline bool operator==(const MessageKey &a, const MessageKey &b)
441{
442 return a.context == b.context && a.str == b.str;
443}
444
445inline bool operator==(const MessageKeyRef &a, const MessageKey &b)
446{
447 return a.context == b.context && a.str == b.str;
448}
449
450/* Messages translation based on .mo files. */
451
453 using Catalog = Map<MessageKey, std::string>;
454 Vector<Catalog> catalogs_;
455 std::string error_;
456
457 public:
458 MOMessages(const Info &info,
459 const Vector<std::string> &domains,
460 const Vector<std::string> &paths)
461 {
462 const Vector<std::string> catalog_paths = get_catalog_paths(info, paths);
463 for (size_t i = 0; i < domains.size(); i++) {
464 const std::string &domain_name = domains[i];
465 const std::string filename = domain_name + ".mo";
466 Catalog catalog;
467 for (const std::string &path : catalog_paths) {
468 if (load_file(path + "/" + filename, catalog)) {
469 break;
470 }
471 }
472 catalogs_.append(std::move(catalog));
473 }
474 }
475
476 std::optional<StringRefNull> translate(const int domain,
477 const StringRef context,
478 const StringRef str) const
479 {
480 if (domain < 0 || domain >= catalogs_.size()) {
481 return std::nullopt;
482 }
483 const MessageKeyRef key{context, str};
484 const std::string *result = catalogs_[domain].lookup_ptr_as(key);
485 if (!result) {
486 return std::nullopt;
487 }
488 return *result;
489 }
490
491 const std::string &error()
492 {
493 return error_;
494 }
495
496 private:
497 Vector<std::string> get_catalog_paths(const Info &info, const Vector<std::string> &paths)
498 {
499 /* Find language folders. */
500 Vector<std::string> lang_folders;
501 if (info.language.empty()) {
502 return {};
503 }
504
505 /* Blender uses non-standard uppercase script zh_HANS instead of zh_Hans, try both. */
506 Vector<std::string> scripts = {info.script};
507 if (!info.script.empty()) {
508 std::string script_uppercase = info.script;
509 for (char &c : script_uppercase) {
511 }
512 scripts.append(script_uppercase);
513 }
514
515 for (const std::string &script : scripts) {
516 std::string language = info.language;
517 if (!script.empty()) {
518 language += "_" + script;
519 }
520 if (!info.variant.empty() && !info.country.empty()) {
521 lang_folders.append(language + "_" + info.country + "@" + info.variant);
522 }
523 if (!info.variant.empty()) {
524 lang_folders.append(language + "@" + info.variant);
525 }
526 if (!info.country.empty()) {
527 lang_folders.append(language + "_" + info.country);
528 }
529 lang_folders.append(language);
530 }
531
532 /* Find catalogs in language folders. */
533 Vector<std::string> result;
534 result.reserve(lang_folders.size() * paths.size());
535 for (const std::string &lang_folder : lang_folders) {
536 for (const std::string &search_path : paths) {
537 result.append(search_path + "/" + lang_folder + "/LC_MESSAGES");
538 }
539 }
540 return result;
541 }
542
543 bool load_file(const std::string &filepath, Catalog &catalog)
544 {
545 MOFile mo(filepath);
546 if (!mo.error().empty()) {
547 error_ = mo.error();
548 return false;
549 }
550 if (mo.empty()) {
551 return false;
552 }
553
554 /* Only support UTF8 encoded files, as created by our msgfmt tool. */
555 const std::string mo_encoding = extract(mo.value(0), "charset=", " \r\n;");
556 if (mo_encoding.empty()) {
557 error_ = "Invalid mo-format, encoding is not specified";
558 return false;
559 }
560 if (mo_encoding != "UTF-8") {
561 error_ = "supported mo-format, encoding must be UTF-8";
562 return false;
563 }
564
565 CLOG_INFO(&LOG, "Load messages from \"%s\"", filepath.c_str());
566
567 /* Create context + key to translated string mapping. */
568 for (size_t i = 0; i < mo.size(); i++) {
569 const MessageKey key(mo.key(i));
570 catalog.add(std::move(key), std::string(mo.value(i)));
571 }
572
573 return true;
574 }
575
576 static std::string extract(StringRef meta, const std::string &key, const StringRef separators)
577 {
578 const size_t pos = meta.find(key);
579 if (pos == StringRef::not_found) {
580 return "";
581 }
582 meta = meta.substr(pos + key.size());
583 const size_t end_pos = meta.find_first_of(separators);
584 return std::string(meta.substr(0, end_pos));
585 }
586};
587
588/* Public API */
589
590static std::unique_ptr<MOMessages> global_messages;
591static std::string global_full_name;
592
593void init(const StringRef locale_full_name,
594 const Vector<std::string> &domains,
595 const Vector<std::string> &paths)
596{
597 Info info(locale_full_name);
598 if (global_full_name == info.to_full_name()) {
599 return;
600 }
601
602 global_messages = std::make_unique<MOMessages>(info, domains, paths);
604
605 if (global_messages->error().empty()) {
606 CLOG_INFO(&LOG, "Locale %s used for translation", global_full_name.c_str());
607 }
608 else {
609 CLOG_ERROR(&LOG, "Locale %s: %s", global_full_name.c_str(), global_messages->error().c_str());
610 free();
611 }
612}
613
614void free()
615{
616 global_messages.reset();
617 global_full_name = "";
618}
619
620std::optional<StringRefNull> translate(const int domain,
621 const StringRef context,
622 const StringRef key)
623{
624 if (!global_messages) {
625 return std::nullopt;
626 }
627
628 return global_messages->translate(domain, context, key);
629}
630
631const char *full_name()
632{
633 return global_full_name.c_str();
634}
635
636} // namespace blender::locale
#define BLI_assert(a)
Definition BLI_assert.h:46
File and directory operations.
FILE * BLI_fopen(const char *filepath, const char *mode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
int64_t BLI_ftell(FILE *stream) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition storage.cc:190
const char * BLI_getenv(const char *env) ATTR_NONNULL(1) ATTR_WARN_UNUSED_RESULT
#define ELEM(...)
Compatibility-like things for windows.
#define CLOG_ERROR(clg_ref,...)
Definition CLG_log.h:188
#define CLOG_INFO(clg_ref,...)
Definition CLG_log.h:190
ATTR_WARN_UNUSED_RESULT const BMVert * v
void init()
long long int int64_t
unsigned long long int uint64_t
void append(const T &value)
void reserve(const int64_t min_capacity)
static constexpr int64_t not_found
constexpr int64_t find(char c, int64_t pos=0) const
constexpr StringRef substr(int64_t start, int64_t size) const
constexpr int64_t find_first_of(StringRef chars, int64_t pos=0) const
int64_t size() const
void append(const T &value)
std::string script
Definition messages.cc:80
std::string variant
Definition messages.cc:82
Info(const StringRef locale_full_name)
Definition messages.cc:84
std::string to_full_name() const
Definition messages.cc:136
std::string country
Definition messages.cc:81
std::string language
Definition messages.cc:79
MOFile(const std::string &filepath)
Definition messages.cc:297
const std::string & error() const
Definition messages.cc:352
StringRef value(int id)
Definition messages.cc:331
size_t size() const
Definition messages.cc:342
const char * key(int id)
Definition messages.cc:325
MOMessages(const Info &info, const Vector< std::string > &domains, const Vector< std::string > &paths)
Definition messages.cc:458
std::optional< StringRefNull > translate(const int domain, const StringRef context, const StringRef str) const
Definition messages.cc:476
const std::string & error()
Definition messages.cc:491
#define str(s)
uint pos
#define input
static constexpr bool is_upper_ascii(const char c)
Definition messages.cc:42
static std::unique_ptr< MOMessages > global_messages
Definition messages.cc:590
std::optional< StringRefNull > translate(const int domain, const StringRef context, const StringRef key)
Definition messages.cc:620
static bool make_upper_ascii(char &c)
Definition messages.cc:61
static bool make_lower_ascii(char &c)
Definition messages.cc:52
bool operator==(const MessageKey &a, const MessageKey &b)
Definition messages.cc:440
static constexpr bool is_lower_ascii(const char c)
Definition messages.cc:47
const char * full_name()
Definition messages.cc:631
static std::string global_full_name
Definition messages.cc:591
static constexpr bool is_numeric_ascii(const char c)
Definition messages.cc:70
static CLG_LogRef LOG
Definition messages.cc:38
std::string macos_user_locale()
uint64_t get_default_hash(const T &v, const Args &...args)
Definition BLI_hash.hh:233
uint64_t hash() const
Definition messages.cc:429
static uint64_t hash_as(const MessageKeyRef &key)
Definition messages.cc:434
MessageKey(const StringRef c)
Definition messages.cc:417
i
Definition text_draw.cc:230
static int magic(const Tex *tex, const float texvec[3], TexResult *texres)
uint len