Blender V4.3
msgfmt.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2017 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5/*
6 * Based on C++ version by `Sergey Sharybin <sergey.vfx@gmail.com>`.
7 * Based on Python script `msgfmt.py` from Python source code tree, which was written by
8 * `Martin v. Löwis <loewis@informatik.hu-berlin.de>`.
9 *
10 * Generate binary message catalog from textual translation description.
11 *
12 * This program converts a textual Uniform-style message catalog (.po file)
13 * into a binary GNU catalog (.mo file).
14 * This is essentially the same function as the GNU msgfmt program,
15 * however, it is a simpler implementation.
16 *
17 * Usage: msgfmt input.po output.po
18 */
19
20#include <algorithm>
21#include <cstdlib>
22#include <cstring>
23#include <string>
24#include <utility>
25
26#include "BLI_fileops.h"
27#include "BLI_linklist.h"
28#include "BLI_map.hh"
29#include "BLI_string_ref.hh"
30#include "BLI_utildefines.h"
31#include "BLI_vector.hh"
32
33#include "MEM_guardedalloc.h"
34
35/* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */
36#ifdef WIN32
37struct Global {
38 void *dummy;
39};
40
41Global G;
42#endif
43
50
51struct Message {
52 std::string ctxt = "";
53 std::string id = "";
54 std::string str = "";
55
56 bool is_fuzzy = false;
57};
58
59static blender::StringRef unescape(std::string &str)
60{
61 int curr, next;
62 for (curr = next = 0; next < str.size(); curr++, next++) {
63 if (str[next] == '\\') {
64 /* Get rid of trailing escape char. */
65 if (next == str.size() - 1) {
66 curr--;
67 continue;
68 }
69 switch (str[next + 1]) {
70 case '\\':
71 str[curr] = '\\';
72 next++;
73 break;
74 case 'n':
75 str[curr] = '\n';
76 next++;
77 break;
78 case 't':
79 str[curr] = '\t';
80 next++;
81 break;
82 default:
83 /* Get rid of useless escape char. */
84 next++;
85 str[curr] = str[next];
86 }
87 }
88 else if (curr != next) {
89 str[curr] = str[next];
90 }
91 }
92 blender::StringRef ret_str = str;
93 BLI_assert(curr <= str.size());
94
95 if (ret_str[0] == '"' && ret_str[curr - 1] == '"') {
96 return ret_str.substr(1, curr - 2);
97 }
98 return ret_str.substr(0, curr);
99}
100
101BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
102{
103 size_t i;
104 for (i = 0; i < sizeof(value); i++) {
105 bytes[i] = char((value >> (int(i) * 8)) & 0xff);
106 }
107 return i;
108}
109
110BLI_INLINE size_t msg_to_bytes(const std::string &msg, char *bytes, uint32_t size)
111{
112 BLI_assert(msg.size() == size - 1);
113 memcpy(bytes, msg.c_str(), size);
114 return size;
115}
116
120
121/* Return the generated binary output. */
122static char *generate(blender::Map<std::string, std::string> &messages, size_t *r_output_size)
123{
125 struct Item {
127 blender::StringRef value;
128
129 Item(const MapItem &other) : key(other.key), value(other.value) {}
130 Item(const Item &other) : key(other.key), value(other.value) {}
131 Item &operator=(const Item &other)
132 {
133 this->key = other.key;
134 this->value = other.value;
135 return *this;
136 }
137 };
138 const uint32_t num_keys = messages.size();
139
140 /* Get a vector of (key, value) pairs sorted by their keys. */
141 blender::Vector<Item> items = {};
142 for (const auto message_items_iter : messages.items()) {
143 items.append(Item(message_items_iter));
144 }
145 std::sort(items.begin(), items.end(), [](const Item &a, const Item &b) -> bool {
146 return a.key < b.key;
147 });
148
149 Offset *offsets = MEM_cnew_array<Offset>(num_keys, __func__);
150 uint32_t tot_keys_len = 0;
151 uint32_t tot_vals_len = 0;
152
153 for (int i = 0; i < num_keys; i++) {
154 Offset &off = offsets[i];
155
156 /* For each string, we need size and file offset.
157 * Each string is nullptr terminated; the nullptr does not count into the size. */
158 off.key_offset = tot_keys_len;
159 off.key_len = uint32_t(items[i].key.size());
160 tot_keys_len += off.key_len + 1;
161
162 off.val_offset = tot_vals_len;
163 off.val_len = uint32_t(items[i].value.size());
164 tot_vals_len += off.val_len + 1;
165 }
166
167 /* The header is 7 32-bit unsigned integers.
168 * Then comes the keys index table, then the values index table. */
169 const uint32_t idx_keystart = 7 * 4;
170 const uint32_t idx_valstart = idx_keystart + 8 * num_keys;
171 /* We don't use hash tables, so the keys start right after the index tables. */
172 const uint32_t keystart = idx_valstart + 8 * num_keys;
173 /* and the values start after the keys */
174 const uint32_t valstart = keystart + tot_keys_len;
175
176 /* Final buffer representing the binary MO file. */
177 *r_output_size = valstart + tot_vals_len;
178 char *output = MEM_cnew_array<char>(*r_output_size, __func__);
179 char *h = output;
180 char *ik = output + idx_keystart;
181 char *iv = output + idx_valstart;
182 char *k = output + keystart;
183 char *v = output + valstart;
184
185 h += uint32_to_bytes(0x950412de, h); /* Magic */
186 h += uint32_to_bytes(0x0, h); /* Version */
187 h += uint32_to_bytes(num_keys, h); /* Number of entries */
188 h += uint32_to_bytes(idx_keystart, h); /* Start of key index */
189 h += uint32_to_bytes(idx_valstart, h); /* Start of value index */
190 h += uint32_to_bytes(0, h); /* Size of hash table */
191 h += uint32_to_bytes(0, h); /* Offset of hash table */
192
193 BLI_assert(h == ik);
194
195 for (int i = 0; i < num_keys; i++) {
196 const Offset &off = offsets[i];
197
198 /* The index table first has the list of keys, then the list of values.
199 * Each entry has first the size of the string, then the file offset. */
200 ik += uint32_to_bytes(off.key_len, ik);
201 ik += uint32_to_bytes(off.key_offset + keystart, ik);
202 iv += uint32_to_bytes(off.val_len, iv);
203 iv += uint32_to_bytes(off.val_offset + valstart, iv);
204
205 k += msg_to_bytes(items[i].key, k, off.key_len + 1);
206 v += msg_to_bytes(items[i].value, v, off.val_len + 1);
207 }
208
209 BLI_assert(ik == output + idx_valstart);
210 BLI_assert(iv == output + keystart);
211 BLI_assert(k == output + valstart);
212
213 MEM_freeN(offsets);
214
215 return output;
216}
217
218static void clear(Message &msg)
219{
220 msg.ctxt.clear();
221 msg.id.clear();
222 msg.str.clear();
223 msg.is_fuzzy = false;
224}
225
226/* Add a non-fuzzy translation to the dictionary. */
228{
229 if (!msg.is_fuzzy && !msg.str.empty()) {
230 std::string msgkey;
231 if (msg.ctxt.empty()) {
232 msgkey = std::move(msg.id);
233 }
234 else {
235 /* '\x04' is the context/msgid separator. */
236 msgkey = msg.ctxt + "\x04" + msg.id;
237 }
238
239 messages.add(std::move(msgkey), std::move(msg.str));
240 }
241 clear(msg);
242}
243
244static int make(const char *input_file_name, const char *output_file_name)
245{
247
248 const char *msgctxt_kw = "msgctxt";
249 const char *msgid_kw = "msgid";
250 const char *msgid_plural_kw = "msgid_plural";
251 const char *msgstr_kw = "msgstr";
252 const size_t msgctxt_len = strlen(msgctxt_kw);
253 const size_t msgid_len = strlen(msgid_kw);
254 const size_t msgid_plural_len = strlen(msgid_plural_kw);
255 const size_t msgstr_len = strlen(msgstr_kw);
256
257 /* NOTE: For now, we assume file encoding is always utf-8. */
258
259 eSectionType section = SECTION_NONE;
260 bool is_plural = false;
261
262 Message msg{};
263
264 LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name);
265 LinkNode *ifl = input_file_lines;
266
267 /* Parse the catalog. */
268 for (int lno = 1; ifl; ifl = ifl->next, lno++) {
269 std::string line = static_cast<char *>(ifl->link);
270 blender::StringRef l = line;
271 if (l.is_empty()) {
272 continue;
273 }
274 const bool is_comment = (l[0] == '#');
275 /* If we get a comment line after a msgstr, this is a new entry. */
276 if (is_comment) {
277 if (section == SECTION_STR) {
278 add(messages, msg);
279 section = SECTION_NONE;
280 }
281 /* Record a fuzzy mark. */
282 if (l[1] == ',' && l.find("fuzzy") != blender::StringRef::not_found) {
283 msg.is_fuzzy = true;
284 }
285 /* Skip comments */
286 continue;
287 }
288 if (l.startswith(msgctxt_kw)) {
289 if (section == SECTION_STR) {
290 /* New message, output previous section. */
291 add(messages, msg);
292 }
293 if (!ELEM(section, SECTION_NONE, SECTION_STR)) {
294 printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno);
295 return EXIT_FAILURE;
296 }
297 section = SECTION_CTX;
298 l = l.substr(msgctxt_len);
299 clear(msg);
300 }
301 else if (l.startswith(msgid_plural_kw)) {
302 /* This is a message with plural forms. */
303 if (section != SECTION_ID) {
304 printf("msgid_plural not preceded by msgid on %s:%d\n", input_file_name, lno);
305 return EXIT_FAILURE;
306 }
307 l = l.substr(msgid_plural_len);
308 msg.id += "\0"; /* separator of singular and plural */
309 is_plural = true;
310 }
311 else if (l.startswith(msgid_kw)) {
312 if (section == SECTION_STR) {
313 add(messages, msg);
314 }
315 if (section != SECTION_CTX) {
316 clear(msg);
317 }
318 section = SECTION_ID;
319 l = l.substr(msgid_len);
320 is_plural = false;
321 }
322 else if (l.startswith(msgstr_kw)) {
323 l = l.substr(msgstr_len);
324 /* Now we are in a `msgstr` section. */
325 section = SECTION_STR;
326 if (l[0] == '[') {
327 if (!is_plural) {
328 printf("plural without msgid_plural on %s:%d\n", input_file_name, lno);
329 return EXIT_FAILURE;
330 }
331 int64_t close_bracket_idx = l.find(']');
332 if (close_bracket_idx == blender::StringRef::not_found) {
333 printf("Syntax error on %s:%d\n", input_file_name, lno);
334 return EXIT_FAILURE;
335 }
336 l = l.substr(close_bracket_idx + 1);
337 if (!msg.str.empty()) {
338 msg.str += "\0"; /* Separator of the various plural forms. */
339 }
340 }
341 else {
342 if (is_plural) {
343 printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno);
344 return EXIT_FAILURE;
345 }
346 }
347 }
348 /* Skip empty lines. */
349 l = l.trim();
350 if (l.is_empty()) {
351 if (section == SECTION_STR) {
352 add(messages, msg);
353 }
354 section = SECTION_NONE;
355 continue;
356 }
357 line = l;
358 l = unescape(line);
359 if (section == SECTION_CTX) {
360 msg.ctxt += l;
361 }
362 else if (section == SECTION_ID) {
363 msg.id += l;
364 }
365 else if (section == SECTION_STR) {
366 msg.str += l;
367 }
368 else {
369 printf("Syntax error on %s:%d\n", input_file_name, lno);
370 return EXIT_FAILURE;
371 }
372 }
373 /* Add last entry */
374 if (section == SECTION_STR) {
375 add(messages, msg);
376 }
377
378 BLI_file_free_lines(input_file_lines);
379
380 /* Compute output */
381 size_t output_size;
382 char *output = generate(messages, &output_size);
383
384 FILE *fp = BLI_fopen(output_file_name, "wb");
385 fwrite(output, 1, output_size, fp);
386 fclose(fp);
387
388 MEM_freeN(output);
389
390 return EXIT_SUCCESS;
391}
392
393int main(int argc, char **argv)
394{
395 if (argc != 3) {
396 printf("Usage: %s <input.po> <output.mo>\n", argv[0]);
397 return EXIT_FAILURE;
398 }
399 const char *input_file = argv[1];
400 const char *output_file = argv[2];
401
402 return make(input_file, output_file);
403}
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_INLINE
File and directory operations.
FILE * BLI_fopen(const char *filepath, const char *mode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
struct LinkNode * BLI_file_read_as_lines(const char *filepath) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition storage.cc:554
void BLI_file_free_lines(struct LinkNode *lines)
Definition storage.cc:600
#define ELEM(...)
Read Guarded memory(de)allocation.
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
#define output
bool add(const Key &key, const Value &value)
Definition BLI_map.hh:271
int64_t size() const
Definition BLI_map.hh:927
ItemIterator items() const
Definition BLI_map.hh:864
static constexpr int64_t not_found
constexpr StringRef substr(int64_t start, int64_t size) const
int64_t size() const
void append(const T &value)
local_group_size(16, 16) .push_constant(Type b
#define printf
#define str(s)
void MEM_freeN(void *vmemh)
Definition mallocn.cc:105
static ulong * next
#define G(x, y, z)
BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
Definition msgfmt.cc:101
static void clear(Message &msg)
Definition msgfmt.cc:218
static int make(const char *input_file_name, const char *output_file_name)
Definition msgfmt.cc:244
eSectionType
Definition msgfmt.cc:44
@ SECTION_CTX
Definition msgfmt.cc:46
@ SECTION_STR
Definition msgfmt.cc:48
@ SECTION_NONE
Definition msgfmt.cc:45
@ SECTION_ID
Definition msgfmt.cc:47
static void add(blender::Map< std::string, std::string > &messages, Message &msg)
Definition msgfmt.cc:227
static char * generate(blender::Map< std::string, std::string > &messages, size_t *r_output_size)
Definition msgfmt.cc:122
static blender::StringRef unescape(std::string &str)
Definition msgfmt.cc:59
BLI_INLINE size_t msg_to_bytes(const std::string &msg, char *bytes, uint32_t size)
Definition msgfmt.cc:110
int main()
unsigned int uint32_t
Definition stdint.h:80
__int64 int64_t
Definition stdint.h:89
void * link
struct LinkNode * next
std::string id
Definition msgfmt.cc:53
std::string str
Definition msgfmt.cc:54
bool is_fuzzy
Definition msgfmt.cc:56
std::string ctxt
Definition msgfmt.cc:52
uint32_t key_len
Definition msgfmt.cc:118
uint32_t val_offset
Definition msgfmt.cc:118
uint32_t val_len
Definition msgfmt.cc:118
uint32_t key_offset
Definition msgfmt.cc:118