Blender V5.0
msgfmt.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2017 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5/*
6 * Based on C++ version by `Sergey Sharybin <sergey.vfx@gmail.com>`.
7 * Based on Python script `msgfmt.py` from Python source code tree, which was written by
8 * `Martin v. Löwis <loewis@informatik.hu-berlin.de>`.
9 *
10 * Generate binary message catalog from textual translation description.
11 *
12 * This program converts a textual Uniform-style message catalog (.po file)
13 * into a binary GNU catalog (.mo file).
14 * This is essentially the same function as the GNU msgfmt program,
15 * however, it is a simpler implementation.
16 *
17 * Usage: msgfmt input.po output.po
18 */
19
20#include <algorithm>
21#include <cstdlib>
22#include <cstring>
23#include <string>
24#include <utility>
25
26#include "BLI_fileops.h"
27#include "BLI_linklist.h"
28#include "BLI_map.hh"
29#include "BLI_string_ref.hh"
30#include "BLI_utildefines.h"
31#include "BLI_vector.hh"
32
33#include "MEM_guardedalloc.h"
34
35/* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */
36#ifdef WIN32
37struct Global {
38 void *dummy;
39};
40
41Global G;
42#endif
43
50
51struct Message {
52 std::string ctxt;
53 std::string id;
54 std::string str;
55
56 bool is_fuzzy = false;
57};
58
59static blender::StringRef unescape(std::string &str)
60{
61 int curr, next;
62 for (curr = next = 0; next < str.size(); curr++, next++) {
63 if (str[next] == '\\') {
64 /* Get rid of trailing escape char. */
65 if (next == str.size() - 1) {
66 curr--;
67 continue;
68 }
69 switch (str[next + 1]) {
70 case '\\':
71 str[curr] = '\\';
72 next++;
73 break;
74 case 'n':
75 str[curr] = '\n';
76 next++;
77 break;
78 case 't':
79 str[curr] = '\t';
80 next++;
81 break;
82 default:
83 /* Get rid of useless escape char. */
84 next++;
85 str[curr] = str[next];
86 }
87 }
88 else if (curr != next) {
89 str[curr] = str[next];
90 }
91 }
92 blender::StringRef ret_str = str;
93 BLI_assert(curr <= str.size());
94
95 if (ret_str[0] == '"' && ret_str[curr - 1] == '"') {
96 return ret_str.substr(1, curr - 2);
97 }
98 return ret_str.substr(0, curr);
99}
100
101BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
102{
103 size_t i;
104 for (i = 0; i < sizeof(value); i++) {
105 bytes[i] = char((value >> (int(i) * 8)) & 0xff);
106 }
107 return i;
108}
109
110BLI_INLINE size_t msg_to_bytes(const std::string &msg, char *bytes, uint32_t size)
111{
112 BLI_assert(msg.size() == size - 1);
113 memcpy(bytes, msg.c_str(), size);
114 return size;
115}
116
117struct Offset {
119};
120
121/* Return the generated binary output. */
122static char *generate(blender::Map<std::string, std::string> &messages, size_t *r_output_size)
123{
125 struct Item {
127 blender::StringRef value;
128
129 Item(const MapItem &other) : key(other.key), value(other.value) {}
130 Item(const Item &other) = default;
131 Item &operator=(const Item &other) = default;
132 };
133 const uint32_t num_keys = messages.size();
134
135 /* Get a vector of (key, value) pairs sorted by their keys. */
136 blender::Vector<Item> items = {};
137 for (const auto message_items_iter : messages.items()) {
138 items.append(Item(message_items_iter));
139 }
140 std::sort(items.begin(), items.end(), [](const Item &a, const Item &b) -> bool {
141 return a.key < b.key;
142 });
143
144 Offset *offsets = MEM_calloc_arrayN<Offset>(num_keys, __func__);
145 uint32_t tot_keys_len = 0;
146 uint32_t tot_vals_len = 0;
147
148 for (int i = 0; i < num_keys; i++) {
149 Offset &off = offsets[i];
150
151 /* For each string, we need size and file offset.
152 * Each string is null terminated; the null does not count into the size. */
153 off.key_offset = tot_keys_len;
154 off.key_len = uint32_t(items[i].key.size());
155 tot_keys_len += off.key_len + 1;
156
157 off.val_offset = tot_vals_len;
158 off.val_len = uint32_t(items[i].value.size());
159 tot_vals_len += off.val_len + 1;
160 }
161
162 /* The header is 7 32-bit unsigned integers.
163 * Then comes the keys index table, then the values index table. */
164 const uint32_t idx_keystart = 7 * 4;
165 const uint32_t idx_valstart = idx_keystart + 8 * num_keys;
166 /* We don't use hash tables, so the keys start right after the index tables. */
167 const uint32_t keystart = idx_valstart + 8 * num_keys;
168 /* and the values start after the keys */
169 const uint32_t valstart = keystart + tot_keys_len;
170
171 /* Final buffer representing the binary MO file. */
172 *r_output_size = valstart + tot_vals_len;
173 char *output = MEM_calloc_arrayN<char>(*r_output_size, __func__);
174 char *h = output;
175 char *ik = output + idx_keystart;
176 char *iv = output + idx_valstart;
177 char *k = output + keystart;
178 char *v = output + valstart;
179
180 h += uint32_to_bytes(0x950412de, h); /* Magic */
181 h += uint32_to_bytes(0x0, h); /* Version */
182 h += uint32_to_bytes(num_keys, h); /* Number of entries */
183 h += uint32_to_bytes(idx_keystart, h); /* Start of key index */
184 h += uint32_to_bytes(idx_valstart, h); /* Start of value index */
185 h += uint32_to_bytes(0, h); /* Size of hash table */
186 h += uint32_to_bytes(0, h); /* Offset of hash table */
187
188 BLI_assert(h == ik);
189
190 for (int i = 0; i < num_keys; i++) {
191 const Offset &off = offsets[i];
192
193 /* The index table first has the list of keys, then the list of values.
194 * Each entry has first the size of the string, then the file offset. */
195 ik += uint32_to_bytes(off.key_len, ik);
196 ik += uint32_to_bytes(off.key_offset + keystart, ik);
197 iv += uint32_to_bytes(off.val_len, iv);
198 iv += uint32_to_bytes(off.val_offset + valstart, iv);
199
200 k += msg_to_bytes(items[i].key, k, off.key_len + 1);
201 v += msg_to_bytes(items[i].value, v, off.val_len + 1);
202 }
203
204 BLI_assert(ik == output + idx_valstart);
205 BLI_assert(iv == output + keystart);
206 BLI_assert(k == output + valstart);
207
208 MEM_freeN(offsets);
209
210 return output;
211}
212
213static void clear(Message &msg)
214{
215 msg.ctxt.clear();
216 msg.id.clear();
217 msg.str.clear();
218 msg.is_fuzzy = false;
219}
220
221/* Add a non-fuzzy translation to the dictionary. */
223{
224 if (!msg.is_fuzzy && !msg.str.empty()) {
225 std::string msgkey;
226 if (msg.ctxt.empty()) {
227 msgkey = std::move(msg.id);
228 }
229 else {
230 /* '\x04' is the context/msgid separator. */
231 msgkey = msg.ctxt + "\x04" + msg.id;
232 }
233
234 messages.add(std::move(msgkey), std::move(msg.str));
235 }
236 clear(msg);
237}
238
239static int make(const char *input_file_name, const char *output_file_name)
240{
242
243 const char *msgctxt_kw = "msgctxt";
244 const char *msgid_kw = "msgid";
245 const char *msgid_plural_kw = "msgid_plural";
246 const char *msgstr_kw = "msgstr";
247 const size_t msgctxt_len = strlen(msgctxt_kw);
248 const size_t msgid_len = strlen(msgid_kw);
249 const size_t msgid_plural_len = strlen(msgid_plural_kw);
250 const size_t msgstr_len = strlen(msgstr_kw);
251
252 /* NOTE: For now, we assume file encoding is always UTF8. */
253
254 eSectionType section = SECTION_NONE;
255 bool is_plural = false;
256
257 Message msg{};
258
259 LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name);
260 LinkNode *ifl = input_file_lines;
261
262 /* Parse the catalog. */
263 for (int lno = 1; ifl; ifl = ifl->next, lno++) {
264 std::string line = static_cast<char *>(ifl->link);
265 blender::StringRef l = line;
266 if (l.is_empty()) {
267 continue;
268 }
269 const bool is_comment = (l[0] == '#');
270 /* If we get a comment line after a msgstr, this is a new entry. */
271 if (is_comment) {
272 if (section == SECTION_STR) {
273 add(messages, msg);
274 section = SECTION_NONE;
275 }
276 /* Record a fuzzy mark. */
277 if (l[1] == ',' && l.find("fuzzy") != blender::StringRef::not_found) {
278 msg.is_fuzzy = true;
279 }
280 /* Skip comments */
281 continue;
282 }
283 if (l.startswith(msgctxt_kw)) {
284 if (section == SECTION_STR) {
285 /* New message, output previous section. */
286 add(messages, msg);
287 }
288 if (!ELEM(section, SECTION_NONE, SECTION_STR)) {
289 printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno);
290 return EXIT_FAILURE;
291 }
292 section = SECTION_CTX;
293 l = l.substr(msgctxt_len);
294 clear(msg);
295 }
296 else if (l.startswith(msgid_plural_kw)) {
297 /* This is a message with plural forms. */
298 if (section != SECTION_ID) {
299 printf("msgid_plural not preceded by msgid on %s:%d\n", input_file_name, lno);
300 return EXIT_FAILURE;
301 }
302 l = l.substr(msgid_plural_len);
303 msg.id += "\0"; /* separator of singular and plural */
304 is_plural = true;
305 }
306 else if (l.startswith(msgid_kw)) {
307 if (section == SECTION_STR) {
308 add(messages, msg);
309 }
310 if (section != SECTION_CTX) {
311 clear(msg);
312 }
313 section = SECTION_ID;
314 l = l.substr(msgid_len);
315 is_plural = false;
316 }
317 else if (l.startswith(msgstr_kw)) {
318 l = l.substr(msgstr_len);
319 /* Now we are in a `msgstr` section. */
320 section = SECTION_STR;
321 if (l[0] == '[') {
322 if (!is_plural) {
323 printf("plural without msgid_plural on %s:%d\n", input_file_name, lno);
324 return EXIT_FAILURE;
325 }
326 int64_t close_bracket_idx = l.find(']');
327 if (close_bracket_idx == blender::StringRef::not_found) {
328 printf("Syntax error on %s:%d\n", input_file_name, lno);
329 return EXIT_FAILURE;
330 }
331 l = l.substr(close_bracket_idx + 1);
332 if (!msg.str.empty()) {
333 msg.str += "\0"; /* Separator of the various plural forms. */
334 }
335 }
336 else {
337 if (is_plural) {
338 printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno);
339 return EXIT_FAILURE;
340 }
341 }
342 }
343 /* Skip empty lines. */
344 l = l.trim();
345 if (l.is_empty()) {
346 if (section == SECTION_STR) {
347 add(messages, msg);
348 }
349 section = SECTION_NONE;
350 continue;
351 }
352 line = l;
353 l = unescape(line);
354 if (section == SECTION_CTX) {
355 msg.ctxt += l;
356 }
357 else if (section == SECTION_ID) {
358 msg.id += l;
359 }
360 else if (section == SECTION_STR) {
361 msg.str += l;
362 }
363 else {
364 printf("Syntax error on %s:%d\n", input_file_name, lno);
365 return EXIT_FAILURE;
366 }
367 }
368 /* Add last entry */
369 if (section == SECTION_STR) {
370 add(messages, msg);
371 }
372
373 BLI_file_free_lines(input_file_lines);
374
375 /* Compute output */
376 size_t output_size;
377 char *output = generate(messages, &output_size);
378
379 FILE *fp = BLI_fopen(output_file_name, "wb");
380 fwrite(output, 1, output_size, fp);
381 fclose(fp);
382
384
385 return EXIT_SUCCESS;
386}
387
388int main(int argc, char **argv)
389{
390 if (argc != 3) {
391 printf("Usage: %s <input.po> <output.mo>\n", argv[0]);
392 return EXIT_FAILURE;
393 }
394 const char *input_file = argv[1];
395 const char *output_file = argv[2];
396
397 return make(input_file, output_file);
398}
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_INLINE
File and directory operations.
FILE * BLI_fopen(const char *filepath, const char *mode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
struct LinkNode * BLI_file_read_as_lines(const char *filepath) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition storage.cc:563
void BLI_file_free_lines(struct LinkNode *lines)
Definition storage.cc:609
#define ELEM(...)
Read Guarded memory(de)allocation.
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
MutableMapItem< Key, Value > MutableItem
Definition BLI_map.hh:133
bool add(const Key &key, const Value &value)
Definition BLI_map.hh:295
int64_t size() const
Definition BLI_map.hh:976
ItemIterator items() const &
Definition BLI_map.hh:902
static constexpr int64_t not_found
constexpr StringRef substr(int64_t start, int64_t size) const
int64_t size() const
void append(const T &value)
#define str(s)
#define main()
#define printf(...)
#define output
void * MEM_calloc_arrayN(size_t len, size_t size, const char *str)
Definition mallocn.cc:123
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
static ulong * next
#define G(x, y, z)
BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
Definition msgfmt.cc:101
static void clear(Message &msg)
Definition msgfmt.cc:213
static int make(const char *input_file_name, const char *output_file_name)
Definition msgfmt.cc:239
eSectionType
Definition msgfmt.cc:44
@ SECTION_CTX
Definition msgfmt.cc:46
@ SECTION_STR
Definition msgfmt.cc:48
@ SECTION_NONE
Definition msgfmt.cc:45
@ SECTION_ID
Definition msgfmt.cc:47
static void add(blender::Map< std::string, std::string > &messages, Message &msg)
Definition msgfmt.cc:222
static char * generate(blender::Map< std::string, std::string > &messages, size_t *r_output_size)
Definition msgfmt.cc:122
static blender::StringRef unescape(std::string &str)
Definition msgfmt.cc:59
BLI_INLINE size_t msg_to_bytes(const std::string &msg, char *bytes, uint32_t size)
Definition msgfmt.cc:110
void * link
struct LinkNode * next
std::string id
Definition msgfmt.cc:53
std::string str
Definition msgfmt.cc:54
bool is_fuzzy
Definition msgfmt.cc:56
std::string ctxt
Definition msgfmt.cc:52
uint32_t key_len
Definition msgfmt.cc:118
uint32_t val_offset
Definition msgfmt.cc:118
uint32_t val_len
Definition msgfmt.cc:118
uint32_t key_offset
Definition msgfmt.cc:118
i
Definition text_draw.cc:230