Blender V4.3
text_format_py.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include <cstring>
10
11#include "BLI_blenlib.h"
12
13#include "DNA_space_types.h"
14#include "DNA_text_types.h"
15
16#include "BKE_text.h"
17
18#include "text_format.hh"
19
20/* -------------------------------------------------------------------- */
52 /* Force single column, sorted list. */
53 /* clang-format off */
54 "and",
55 "as",
56 "assert",
57 "async",
58 "await",
59 "break",
60 "case",
61 "continue",
62 "del",
63 "elif",
64 "else",
65 "except",
66 "finally",
67 "for",
68 "from",
69 "global",
70 "if",
71 "import",
72 "in",
73 "is",
74 "lambda",
75 "match",
76 "nonlocal",
77 "not",
78 "or",
79 "pass",
80 "raise",
81 "return",
82 "try",
83 "while",
84 "with",
85 "yield",
86 /* clang-format on */
87};
91
94 /* Force single column, sorted list. */
95 /* clang-format off */
96 "class",
97 "def",
98 /* clang-format on */
99};
102
104static const char *text_format_py_literals_bool_data[] = {
105 /* Force single column, sorted list. */
106 /* clang-format off */
107 "False",
108 "None",
109 "True",
110 /* clang-format on */
111};
114
117/* -------------------------------------------------------------------- */
121static int txtfmt_py_find_builtinfunc(const char *string)
122{
124
125 /* If next source char is an identifier (eg. 'i' in "definite") no match */
126 if (i == 0 || text_check_identifier(string[i])) {
127 return -1;
128 }
129 return i;
130}
131
132static int txtfmt_py_find_specialvar(const char *string)
133{
135
136 /* If next source char is an identifier (eg. 'i' in "definite") no match */
137 if (i == 0 || text_check_identifier(string[i])) {
138 return -1;
139 }
140 return i;
141}
142
143static int txtfmt_py_find_decorator(const char *string)
144{
145 if (string[0] != '@') {
146 return -1;
147 }
148 if (!text_check_identifier(string[1])) {
149 return -1;
150 }
151 /* Interpret as matrix multiplication when followed by whitespace. */
152 if (text_check_whitespace(string[1])) {
153 return -1;
154 }
155
156 int i = 1;
157 while (text_check_identifier(string[i])) {
158 i++;
159 }
160 return i;
161}
162
163static int txtfmt_py_find_bool(const char *string)
164{
166
167 /* If next source char is an identifier (eg. 'i' in "Nonetheless") no match */
168 if (i == 0 || text_check_identifier(string[i])) {
169 return -1;
170 }
171 return i;
172}
173
174/* Numeral character matching. */
175#define TXTFMT_PY_NUMERAL_STRING_COUNT_IMPL(txtfmt_py_numeral_char_is_fn) \
176 { \
177 uint count = 0; \
178 for (; txtfmt_py_numeral_char_is_fn(*string); string += 1) { \
179 count += 1; \
180 } \
181 return count; \
182 } \
183 ((void)0)
184
185/* Binary. */
186static bool txtfmt_py_numeral_char_is_binary(const char c)
187{
188 return ELEM(c, '0', '1') || (c == '_');
189}
194
195/* Octal. */
196static bool txtfmt_py_numeral_char_is_octal(const char c)
197{
198 return (c >= '0' && c <= '7') || (c == '_');
199}
204
205/* Decimal. */
206static bool txtfmt_py_numeral_char_is_decimal(const char c)
207{
208 return (c >= '0' && c <= '9') || (c == '_');
209}
214
215/* Hexadecimal. */
217{
218 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c == '_');
219}
224
225/* Zeros. */
226static bool txtfmt_py_numeral_char_is_zero(const char c)
227{
228 return ELEM(c, '0', '_');
229}
234
235#undef TXTFMT_PY_NUMERAL_STRING_COUNT_IMPL
236
237static int txtfmt_py_find_numeral_inner(const char *string)
238{
239 if (string == nullptr || *string == '\0') {
240 return -1;
241 }
242
243 const char first = *string, second = *(string + 1);
244
245 /* Decimal dot must be followed by a digit, any decimal digit.
246 * Note that the there can be any number of leading zeros after
247 * the decimal point (leading zeros are not allowed in integers) */
248 if (first == '.') {
249 if (text_check_digit(second)) {
250 return 1 + txtfmt_py_numeral_string_count_decimal(string + 1);
251 }
252 }
253 else if (first == '0') {
254 /* Numerals starting with '0x' or '0X' is followed by hexadecimal digits. */
255 if (ELEM(second, 'x', 'X')) {
256 return 2 + txtfmt_py_numeral_string_count_hexadecimal(string + 2);
257 }
258 /* Numerals starting with '0o' or '0O' is followed by octal digits. */
259 if (ELEM(second, 'o', 'O')) {
260 return 2 + txtfmt_py_numeral_string_count_octal(string + 2);
261 }
262 /* Numerals starting with '0b' or '0B' is followed by binary digits. */
263 if (ELEM(second, 'b', 'B')) {
264 return 2 + txtfmt_py_numeral_string_count_binary(string + 2);
265 }
266 /* Other numerals starting with '0' can be followed by any number of '0' characters. */
267 if (ELEM(second, '0', '_')) {
268 return 2 + txtfmt_py_numeral_string_count_zeros(string + 2);
269 }
270 }
271 /* Any non-zero digit is the start of a decimal number. */
272 else if (first > '0' && first <= '9') {
273 return 1 + txtfmt_py_numeral_string_count_decimal(string + 1);
274 }
275 /* A single zero is also allowed. */
276 return (first == '0') ? 1 : 0;
277}
278
279static int txtfmt_py_literal_numeral(const char *string, char prev_fmt)
280{
281 if (string == nullptr || *string == '\0') {
282 return -1;
283 }
284
285 const char first = *string, second = *(string + 1);
286
287 if (prev_fmt == FMT_TYPE_NUMERAL) {
288 /* Previous was a number; if immediately followed by 'e' or 'E' and a digit,
289 * it's a base 10 exponent (scientific notation). */
290 if (ELEM(first, 'e', 'E') && (text_check_digit(second) || second == '-')) {
291 return 1 + txtfmt_py_find_numeral_inner(string + 1);
292 }
293 /* Previous was a number; if immediately followed by '.' it's a floating point decimal number.
294 * NOTE: keep the decimal point, it's needed to allow leading zeros. */
295 if (first == '.') {
296 return txtfmt_py_find_numeral_inner(string);
297 }
298 /* "Imaginary" part of a complex number ends with 'j' */
299 if (ELEM(first, 'j', 'J') && !text_check_digit(second)) {
300 return 1;
301 }
302 }
303 else if ((prev_fmt != FMT_TYPE_DEFAULT) &&
304 (text_check_digit(first) || (first == '.' && text_check_digit(second))))
305 {
306 /* New numeral, starting with a digit or a decimal point followed by a digit. */
307 return txtfmt_py_find_numeral_inner(string);
308 }
309 /* Not a literal numeral. */
310 return 0;
311}
312
313static char txtfmt_py_format_identifier(const char *str)
314{
315 char fmt;
316
317 /* Keep aligned args for readability. */
318 /* clang-format off */
319
321 } else if (txtfmt_py_find_builtinfunc(str) != -1) { fmt = FMT_TYPE_KEYWORD;
322 } else if (txtfmt_py_find_decorator(str) != -1) { fmt = FMT_TYPE_RESERVED;
323 } else { fmt = FMT_TYPE_DEFAULT;
324 }
325
326 /* clang-format on */
327 return fmt;
328}
329
332/* -------------------------------------------------------------------- */
336static void txtfmt_py_format_line(SpaceText *st, TextLine *line, const bool do_next)
337{
338 FlattenString fs;
339 const char *str;
340 char *fmt;
341 char cont_orig, cont, find, prev = ' ';
342 int len, i;
343
344 /* Get continuation from previous line */
345 if (line->prev && line->prev->format != nullptr) {
346 fmt = line->prev->format;
347 cont = fmt[strlen(fmt) + 1]; /* Just after the null-terminator */
348 BLI_assert((FMT_CONT_ALL & cont) == cont);
349 /* So slashes beginning on continuation display properly, see: #118767. */
351 prev = FMT_TYPE_STRING;
352 }
353 }
354 else {
355 cont = FMT_CONT_NOP;
356 }
357
358 /* Get original continuation from this line */
359 if (line->format != nullptr) {
360 fmt = line->format;
361 cont_orig = fmt[strlen(fmt) + 1]; /* Just after the null-terminator */
362 BLI_assert((FMT_CONT_ALL & cont_orig) == cont_orig);
363 }
364 else {
365 cont_orig = 0xFF;
366 }
367
368 len = flatten_string(st, &fs, line->line);
369 str = fs.buf;
370 if (!text_check_format_len(line, len)) {
372 return;
373 }
374 fmt = line->format;
375
376 while (*str) {
377 /* Handle escape sequences by skipping both \ and next char */
378 if (*str == '\\') {
379 *fmt = prev;
380 fmt++;
381 str++;
382 if (*str == '\0') {
383 break;
384 }
385 *fmt = prev;
386 fmt++;
388 continue;
389 }
390 /* Handle continuations */
391 if (cont) {
392 /* Triple strings ("""...""" or '''...''') */
393 if (cont & FMT_CONT_TRIPLE) {
394 find = (cont & FMT_CONT_QUOTEDOUBLE) ? '"' : '\'';
395 if (*str == find && *(str + 1) == find && *(str + 2) == find) {
396 *fmt = FMT_TYPE_STRING;
397 fmt++;
398 str++;
399 *fmt = FMT_TYPE_STRING;
400 fmt++;
401 str++;
402 cont = FMT_CONT_NOP;
403 }
404 /* Handle other strings */
405 }
406 else {
407 find = (cont & FMT_CONT_QUOTEDOUBLE) ? '"' : '\'';
408 if (*str == find) {
409 cont = FMT_CONT_NOP;
410 }
411 }
412
413 *fmt = FMT_TYPE_STRING;
415 }
416 /* Not in a string... */
417 else {
418 /* Deal with comments first */
419 if (*str == '#') {
420 /* fill the remaining line */
421 text_format_fill(&str, &fmt, FMT_TYPE_COMMENT, len - int(fmt - line->format));
422 }
423 else if (ELEM(*str, '"', '\'')) {
424 /* Strings */
425 find = *str;
426 cont = (*str == '"') ? FMT_CONT_QUOTEDOUBLE : FMT_CONT_QUOTESINGLE;
427 if (*(str + 1) == find && *(str + 2) == find) {
428 *fmt = FMT_TYPE_STRING;
429 fmt++;
430 str++;
431 *fmt = FMT_TYPE_STRING;
432 fmt++;
433 str++;
434 cont |= FMT_CONT_TRIPLE;
435 }
436 *fmt = FMT_TYPE_STRING;
437 }
438 else if (ELEM(*str, 'f', 'F', 'r', 'R', 'u', 'U') && ELEM(*(str + 1), '"', '\'')) {
439 /* Strings with single letter prefixes (f-strings, raw strings, and unicode strings).
440 * Format the prefix as part of the string. */
441 *fmt = FMT_TYPE_STRING;
442 fmt++;
443 str++;
444 find = *str;
445 cont = (*str == '"') ? FMT_CONT_QUOTEDOUBLE : FMT_CONT_QUOTESINGLE;
446 if (*(str + 1) == find && *(str + 2) == find) {
447 *fmt = FMT_TYPE_STRING;
448 fmt++;
449 str++;
450 *fmt = FMT_TYPE_STRING;
451 fmt++;
452 str++;
453 cont |= FMT_CONT_TRIPLE;
454 }
455 *fmt = FMT_TYPE_STRING;
456 }
457 else if (((ELEM(*str, 'f', 'F') && ELEM(*(str + 1), 'r', 'R')) ||
458 (ELEM(*str, 'r', 'R') && ELEM(*(str + 1), 'f', 'F'))) &&
459 ELEM(*(str + 2), '"', '\''))
460 {
461 /* Strings with two letter prefixes (raw f-strings).
462 * Format the prefix as part of the string. */
463 *fmt = FMT_TYPE_STRING;
464 fmt++;
465 str++;
466 *fmt = FMT_TYPE_STRING;
467 fmt++;
468 str++;
469 find = *str;
470 cont = (*str == '"') ? FMT_CONT_QUOTEDOUBLE : FMT_CONT_QUOTESINGLE;
471 if (*(str + 1) == find && *(str + 2) == find) {
472 *fmt = FMT_TYPE_STRING;
473 fmt++;
474 str++;
475 *fmt = FMT_TYPE_STRING;
476 fmt++;
477 str++;
478 cont |= FMT_CONT_TRIPLE;
479 }
480 *fmt = FMT_TYPE_STRING;
481 }
482 /* White-space (all white-space has been converted to spaces). */
483 else if (*str == ' ') {
484 *fmt = FMT_TYPE_WHITESPACE;
485 }
486 /* Literal numerals, "numbers". */
487 else if ((i = txtfmt_py_literal_numeral(str, prev)) > 0) {
489 }
490 /* Booleans */
491 else if (prev != FMT_TYPE_DEFAULT && (i = txtfmt_py_find_bool(str)) != -1) {
492 if (i > 0) {
494 }
495 else {
497 *fmt = FMT_TYPE_DEFAULT;
498 }
499 }
500 /* Punctuation */
501 else if ((*str != '@') && text_check_delim(*str)) {
502 *fmt = FMT_TYPE_SYMBOL;
503 }
504 /* Identifiers and other text (no previous white-space/delimiters so text continues). */
505 else if (prev == FMT_TYPE_DEFAULT) {
507 *fmt = FMT_TYPE_DEFAULT;
508 }
509 /* Not white-space, a digit, punctuation, or continuing text.
510 * Must be new, check for special words. */
511 else {
512 /* Keep aligned arguments for readability. */
513 /* clang-format off */
514
515 /* Special vars(v) or built-in keywords(b) */
516 /* keep in sync with `txtfmt_py_format_identifier()`. */
517 if ((i = txtfmt_py_find_specialvar(str)) != -1) { prev = FMT_TYPE_SPECIAL;
518 } else if ((i = txtfmt_py_find_builtinfunc(str)) != -1) { prev = FMT_TYPE_KEYWORD;
519 } else if ((i = txtfmt_py_find_decorator(str)) != -1) { prev = FMT_TYPE_DIRECTIVE;
520 }
521
522 /* clang-format on */
523
524 if (i > 0) {
525 if (prev == FMT_TYPE_DIRECTIVE) { /* can contain utf8 */
526 text_format_fill(&str, &fmt, prev, i);
527 }
528 else {
529 text_format_fill_ascii(&str, &fmt, prev, i);
530 }
531 }
532 else {
534 *fmt = FMT_TYPE_DEFAULT;
535 }
536 }
537 }
538 prev = *fmt;
539 fmt++;
540 str++;
541 }
542
543 /* Terminate and add continuation char */
544 *fmt = '\0';
545 fmt++;
546 *fmt = cont;
547
548 /* If continuation has changed and we're allowed, process the next line */
549 if (cont != cont_orig && do_next && line->next) {
550 txtfmt_py_format_line(st, line->next, do_next);
551 }
552
554}
555
558/* -------------------------------------------------------------------- */
578
bool text_check_digit(char ch)
Definition text.cc:2303
bool text_check_identifier(char ch)
Definition text.cc:2314
bool text_check_delim(char ch)
Definition text.cc:2287
bool text_check_whitespace(char ch)
Definition text.cc:2369
#define BLI_assert(a)
Definition BLI_assert.h:50
int BLI_str_utf8_size_safe(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1)
unsigned int uint
#define ARRAY_SIZE(arr)
#define ELEM(...)
int len
#define str(s)
SpaceLink * next
const char * comment_line
char(* format_identifier)(const char *string)
const char ** ext
void(* format_line)(SpaceText *st, TextLine *line, bool do_next)
int flatten_string(const SpaceText *st, FlattenString *fs, const char *in)
void text_format_fill(const char **str_p, char **fmt_p, const char type, const int len)
void flatten_string_free(FlattenString *fs)
const bool text_format_string_literals_check_sorted_array(const Span< const char * > string_literals)
int text_format_string_literal_find(const Span< const char * > string_literals, const char *text)
void text_format_fill_ascii(const char **str_p, char **fmt_p, const char type, const int len)
void ED_text_format_register(TextFormatType *tft)
int text_check_format_len(TextLine *line, uint len)
@ FMT_CONT_QUOTEDOUBLE
@ FMT_CONT_QUOTESINGLE
@ FMT_CONT_TRIPLE
@ FMT_CONT_NOP
@ FMT_TYPE_DIRECTIVE
@ FMT_TYPE_STRING
@ FMT_TYPE_COMMENT
@ FMT_TYPE_SPECIAL
@ FMT_TYPE_DEFAULT
@ FMT_TYPE_KEYWORD
@ FMT_TYPE_WHITESPACE
@ FMT_TYPE_NUMERAL
@ FMT_TYPE_RESERVED
@ FMT_TYPE_SYMBOL
#define FMT_CONT_ALL
static bool txtfmt_py_numeral_char_is_hexadecimal(const char c)
static int txtfmt_py_find_numeral_inner(const char *string)
static uint txtfmt_py_numeral_string_count_hexadecimal(const char *string)
static uint txtfmt_py_numeral_string_count_octal(const char *string)
static bool txtfmt_py_numeral_char_is_decimal(const char c)
#define TXTFMT_PY_NUMERAL_STRING_COUNT_IMPL(txtfmt_py_numeral_char_is_fn)
static char txtfmt_py_format_identifier(const char *str)
static int txtfmt_py_find_specialvar(const char *string)
static int txtfmt_py_find_bool(const char *string)
static int txtfmt_py_find_decorator(const char *string)
static bool txtfmt_py_numeral_char_is_binary(const char c)
static const Span< const char * > text_format_py_literals_builtinfunc(text_format_py_literals_builtinfunc_data, ARRAY_SIZE(text_format_py_literals_builtinfunc_data))
static const Span< const char * > text_format_py_literals_specialvar(text_format_py_literals_specialvar_data, ARRAY_SIZE(text_format_py_literals_specialvar_data))
static int txtfmt_py_literal_numeral(const char *string, char prev_fmt)
static bool txtfmt_py_numeral_char_is_zero(const char c)
static uint txtfmt_py_numeral_string_count_zeros(const char *string)
static uint txtfmt_py_numeral_string_count_binary(const char *string)
static int txtfmt_py_find_builtinfunc(const char *string)
static const char * text_format_py_literals_specialvar_data[]
static void txtfmt_py_format_line(SpaceText *st, TextLine *line, const bool do_next)
static uint txtfmt_py_numeral_string_count_decimal(const char *string)
static const Span< const char * > text_format_py_literals_bool(text_format_py_literals_bool_data, ARRAY_SIZE(text_format_py_literals_bool_data))
static bool txtfmt_py_numeral_char_is_octal(const char c)
static const char * text_format_py_literals_bool_data[]
void ED_text_format_register_py()
static const char * text_format_py_literals_builtinfunc_data[]