Blender V5.0
shader_parser.hh
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
33
34#pragma once
35
36#include <algorithm>
37#include <cassert>
38#include <chrono>
39#include <cstdint>
40#include <functional>
41#include <iostream>
42#include <stack>
43#include <string>
44#include <vector>
45
47
48enum TokenType : char {
50 /* Use ascii chars to store them in string, and for easy debugging / testing. */
51 Word = 'w',
52 NewLine = '\n',
53 Space = ' ',
54 Dot = '.',
55 Hash = '#',
56 Ampersand = '&',
57 Number = '0',
58 String = '_',
59 ParOpen = '(',
60 ParClose = ')',
65 AngleOpen = '<',
67 Assign = '=',
68 SemiColon = ';',
69 Question = '?',
70 Not = '!',
71 Colon = ':',
72 Comma = ',',
73 Star = '*',
74 Plus = '+',
75 Minus = '-',
76 Divide = '/',
77 Tilde = '~',
78 Backslash = '\\',
79 /* Keywords */
80 Break = 'b',
81 Const = 'c',
82 Constexpr = 'C',
83 Decrement = 'D',
84 Deref = 'D',
85 Do = 'd',
86 Equal = 'E',
87 NotEqual = 'e',
88 For = 'f',
89 While = 'F',
90 GEqual = 'G',
91 Case = 'H',
92 Switch = 'h',
93 Else = 'I',
94 If = 'i',
95 LEqual = 'L',
96 Enum = 'M',
97 Static = 'm',
98 Namespace = 'n',
100 Continue = 'O',
102 Return = 'r',
103 Class = 'S',
104 Struct = 's',
105 Template = 't',
106 This = 'T',
107 Using = 'u',
108 Private = 'v',
109 Public = 'V',
110};
111
112enum class ScopeType : char {
113 /* Use ascii chars to store them in string, and for easy debugging / testing. */
114 Global = 'G',
116 Struct = 'S',
117 Function = 'F',
118 LoopArgs = 'l',
119 LoopBody = 'p',
124 Template = 'T',
129 /* Added scope inside function body. */
130 Local = 'L',
131 /* Added scope inside FunctionArgs. */
133 /* Added scope inside LoopArgs. */
134 LoopArg = 'r',
135
136};
137
138/* Poor man's IndexRange. */
142
143 IndexRange(size_t start, size_t size) : start(start), size(size) {}
144
145 bool overlaps(IndexRange other) const
146 {
147 return ((start < other.start) && (other.start < (start + size))) ||
148 ((other.start < start) && (start < (other.start + other.size)));
149 }
150
152 {
153 return start + size - 1;
154 }
155};
156
157/* Poor man's OffsetIndices. */
159 std::vector<size_t> offsets;
160
161 IndexRange operator[](const int64_t index) const
162 {
163 return {offsets[index], offsets[index + 1] - offsets[index]};
164 }
165
166 void clear()
167 {
168 offsets.clear();
169 };
170};
171
172struct Scope;
173
175 std::string str;
176
177 std::string token_types;
178 std::string scope_types;
179 /* Ranges of characters per token. */
181 /* Index of bottom most scope per token. */
182 std::vector<int> token_scope;
183 /* Range of token per scope. */
184 std::vector<IndexRange> scope_ranges;
185
186 /* If keep_whitespace is false, whitespaces are merged with the previous token. */
187 void tokenize(const bool keep_whitespace)
188 {
189 if (str.empty()) {
190 *this = {};
191 return;
192 }
193
194 {
195 /* Tokenization. */
196 token_types.clear();
197 token_offsets.clear();
198
199 token_types += char(to_type(str[0]));
200 token_offsets.offsets.emplace_back(0);
201
202 /* When doing whitespace merging, keep knowledge about whether previous char was whitespace.
203 * This allows to still split words on spaces. */
204 bool prev_was_whitespace = (token_types[0] == NewLine || token_types[0] == Space);
205 bool inside_preprocessor_directive = token_types[0] == Hash;
206 bool next_character_is_escape = false;
207 bool inside_string = false;
208
209 int offset = 0;
210 for (const char &c : str.substr(1)) {
211 offset++;
212 TokenType type = to_type(c);
213 TokenType prev = TokenType(token_types.back());
214
215 /* Merge string literal. */
216 if (inside_string) {
217 if (!next_character_is_escape && c == '\"') {
218 inside_string = false;
219 }
220 next_character_is_escape = c == '\\';
221 continue;
222 }
223 if (c == '\"') {
224 inside_string = true;
225 }
226 /* Detect preprocessor directive newlines `\\\n`. */
227 if (prev == Backslash && type == NewLine) {
229 continue;
230 }
231 /* Make sure to keep the ending newline for a preprocessor directive. */
232 if (inside_preprocessor_directive && type == NewLine) {
233 inside_preprocessor_directive = false;
234 token_types += char(type);
235 token_offsets.offsets.emplace_back(offset);
236 continue;
237 }
238 if (type == Hash) {
239 inside_preprocessor_directive = true;
240 }
241 /* Merge newlines and spaces with previous token. */
242 if (!keep_whitespace && (type == NewLine || type == Space)) {
243 prev_was_whitespace = true;
244 continue;
245 }
246 /* Merge '=='. */
247 if (prev == Assign && type == Assign) {
248 token_types.back() = Equal;
249 continue;
250 }
251 /* Merge '!='. */
252 if (prev == '!' && type == Assign) {
253 token_types.back() = NotEqual;
254 continue;
255 }
256 /* Merge '>='. */
257 if (prev == '>' && type == Assign) {
258 token_types.back() = GEqual;
259 continue;
260 }
261 /* Merge '<='. */
262 if (prev == '<' && type == Assign) {
263 token_types.back() = LEqual;
264 continue;
265 }
266 /* Merge '->'. */
267 if (prev == '-' && type == '>') {
268 token_types.back() = Deref;
269 continue;
270 }
271 /* If digit is part of word. */
272 if (type == Number && prev == Word && !prev_was_whitespace) {
273 continue;
274 }
275 /* If 'x' is part of hex literal. */
276 if (c == 'x' && prev == Number) {
277 continue;
278 }
279 /* If 'A-F' is part of hex literal. */
280 if (c >= 'A' && c <= 'F' && prev == Number) {
281 continue;
282 }
283 /* If 'a-f' is part of hex literal. */
284 if (c >= 'a' && c <= 'f' && prev == Number) {
285 continue;
286 }
287 /* If 'u' is part of unsigned int literal. */
288 if (c == 'u' && prev == Number) {
289 continue;
290 }
291 /* If dot is part of float literal. */
292 if (type == Dot && prev == Number) {
293 continue;
294 }
295 /* If 'f' suffix is part of float literal. */
296 if (c == 'f' && prev == Number) {
297 continue;
298 }
299 /* If 'e' is part of float literal. */
300 if (c == 'e' && prev == Number) {
301 continue;
302 }
303 /* If sign is part of float literal after exponent. */
304 if ((c == '+' || c == '-') && prev == Number) {
305 continue;
306 }
307 /* Detect increment. */
308 if (type == '+' && prev == '+') {
309 token_types.back() = Increment;
310 continue;
311 }
312 /* Detect decrement. */
313 if (type == '+' && prev == '+') {
314 token_types.back() = Decrement;
315 continue;
316 }
317 /* Only merge these token. Otherwise, always emit a token. */
318 if (type != Word && type != NewLine && type != Space && type != Number) {
319 prev = Word;
320 }
321 /* Split words on whitespaces even when merging. */
322 if (!keep_whitespace && type == Word && prev_was_whitespace) {
323 prev = Space;
324 prev_was_whitespace = false;
325 }
326 /* Emit a token if we don't merge. */
327 if (type != prev) {
328 token_types += char(type);
329 token_offsets.offsets.emplace_back(offset);
330 }
331 }
332 offset++;
333 token_offsets.offsets.emplace_back(offset);
334 }
335 {
336 /* Keywords detection. */
337 int tok_id = -1;
338 for (char &c : token_types) {
339 tok_id++;
340 if (TokenType(c) == Word) {
341 IndexRange range = token_offsets[tok_id];
342 std::string word = str.substr(range.start, range.size);
343 if (!keep_whitespace) {
344 size_t last_non_whitespace = word.find_last_not_of(" \n");
345 if (last_non_whitespace != std::string::npos) {
346 word = word.substr(0, last_non_whitespace + 1);
347 }
348 }
349
350 if (word == "namespace") {
351 c = Namespace;
352 }
353 else if (word == "struct") {
354 c = Struct;
355 }
356 else if (word == "class") {
357 c = Class;
358 }
359 else if (word == "const") {
360 c = Const;
361 }
362 else if (word == "constexpr") {
363 c = Constexpr;
364 }
365 else if (word == "return") {
366 c = Return;
367 }
368 else if (word == "break") {
369 c = Break;
370 }
371 else if (word == "continue") {
372 c = Continue;
373 }
374 else if (word == "case") {
375 c = Case;
376 }
377 else if (word == "switch") {
378 c = Switch;
379 }
380 else if (word == "if") {
381 c = If;
382 }
383 else if (word == "else") {
384 c = Else;
385 }
386 else if (word == "while") {
387 c = While;
388 }
389 else if (word == "do") {
390 c = Do;
391 }
392 else if (word == "for") {
393 c = For;
394 }
395 else if (word == "template") {
396 c = Template;
397 }
398 else if (word == "this") {
399 c = This;
400 }
401 else if (word == "static") {
402 c = Static;
403 }
404 else if (word == "private") {
405 c = Private;
406 }
407 else if (word == "public") {
408 c = Public;
409 }
410 else if (word == "enum") {
411 c = Enum;
412 }
413 else if (word == "using") {
414 c = Using;
415 }
416 }
417 }
418 }
419 }
420
421 using report_callback = std::function<void(
422 int error_line, int error_char, std::string error_line_string, const char *error_str)>;
423
424 void parse_scopes(report_callback &report_error);
425
426 private:
427 TokenType to_type(const char c)
428 {
429 switch (c) {
430 case '\n':
431 return TokenType::NewLine;
432 case ' ':
433 return TokenType::Space;
434 case '#':
435 return TokenType::Hash;
436 case '&':
438 case '.':
439 return TokenType::Dot;
440 case '(':
441 return TokenType::ParOpen;
442 case ')':
443 return TokenType::ParClose;
444 case '{':
446 case '}':
448 case '[':
450 case ']':
452 case '<':
454 case '>':
456 case '=':
457 return TokenType::Assign;
458 case '!':
459 return TokenType::Not;
460 case '*':
461 return TokenType::Star;
462 case '-':
463 return TokenType::Minus;
464 case '+':
465 return TokenType::Plus;
466 case '/':
467 return TokenType::Divide;
468 case '~':
469 return TokenType::Tilde;
470 case '\\':
472 case '\"':
473 return TokenType::String;
474 case '?':
475 return TokenType::Question;
476 case ':':
477 return TokenType::Colon;
478 case ',':
479 return TokenType::Comma;
480 case ';':
482 case '0':
483 case '1':
484 case '2':
485 case '3':
486 case '4':
487 case '5':
488 case '6':
489 case '7':
490 case '9':
491 return TokenType::Number;
492 default:
493 return TokenType::Word;
494 }
495 }
496};
497
498struct Token {
499 /* String view for nicer debugging experience. Isn't actually used. */
500 std::string_view str_view;
501
504
505 static Token invalid()
506 {
507 return {"", nullptr, 0};
508 }
509
511 {
512 if (index < 0 || index > (data->token_offsets.offsets.size() - 2)) {
513 return invalid();
514 }
515 IndexRange index_range = data->token_offsets[index];
516 return {std::string_view(data->str).substr(index_range.start, index_range.size), data, index};
517 }
518
519 bool is_valid() const
520 {
521 return data != nullptr && index >= 0;
522 }
523 bool is_invalid() const
524 {
525 return !is_valid();
526 }
527
528 /* String index range. */
530 {
531 if (is_invalid()) {
532 return {0, 0};
533 }
534 return data->token_offsets[index];
535 }
536
537 Token prev() const
538 {
539 return from_position(data, index - 1);
540 }
541 Token next() const
542 {
543 return from_position(data, index + 1);
544 }
545
547 {
548 Token tok = this->next();
549 while (tok.is_valid() && tok != type) {
550 tok = tok.next();
551 }
552 return tok;
553 }
554
555 /* Return the first container scope of this token that has the given type.
556 * Returns invalid scope on failure. */
558
559 /* Return start of namespace identifier is the token is part of one. */
561 {
562 if (*this != Word) {
563 return *this;
564 }
565 /* Scan back identifier that could contain namespaces. */
566 Token tok = *this;
567 while (tok.is_valid()) {
568 if (tok.prev() == ':') {
569 tok = tok.prev().prev().prev();
570 }
571 else {
572 return tok;
573 }
574 }
575 return tok;
576 }
577
578 /* For a word, return the name containing the prefix namespaces if present. */
579 std::string full_symbol_name() const
580 {
581 size_t start = this->namespace_start().str_index_start();
582 size_t end = this->str_index_last_no_whitespace();
583 return data->str.substr(start, end - start + 1);
584 }
585
586 /* Only usable when building with whitespace. */
588 {
589 Token next = this->next();
590 while (next == ' ' || next == '\n') {
591 next = next.next();
592 }
593 return next;
594 }
595
596 /* Returns the scope that contains this token. */
597 Scope scope() const;
598
599 size_t str_index_start() const
600 {
601 return index_range().start;
602 }
603
604 size_t str_index_last() const
605 {
606 return index_range().last();
607 }
608
610 {
611 return data->str.find_last_not_of(" \n", str_index_last());
612 }
613
614 /* Index of the first character of the line this token is. */
615 size_t line_start() const
616 {
617 size_t pos = data->str.rfind('\n', str_index_start());
618 return (pos == std::string::npos) ? 0 : (pos + 1);
619 }
620
621 /* Index of the last character of the line this token is, excluding `\n`. */
622 size_t line_end() const
623 {
624 size_t pos = data->str.find('\n', str_index_start());
625 return (pos == std::string::npos) ? (data->str.size() - 1) : (pos - 1);
626 }
627
628 std::string str_with_whitespace() const
629 {
630 return data->str.substr(index_range().start, index_range().size);
631 }
632
633 std::string str() const
634 {
635 std::string str = this->str_with_whitespace();
636 return str.substr(0, str.find_last_not_of(" \n") + 1);
637 }
638
639 /* Return the content without the first and last characters. */
640 std::string str_exclusive() const
641 {
642 std::string str = this->str();
643 if (str.length() < 2) {
644 return "";
645 }
646 return str.substr(1, str.length() - 2);
647 }
648
649 /* Return the line number this token is found at. Take into account the #line directives. */
650 size_t line_number() const
651 {
652 std::string directive = "#line ";
653 /* String to count the number of line. */
654 std::string sub_str = data->str.substr(0, str_index_start());
655 size_t nearest_line_directive = sub_str.rfind(directive);
656 size_t line_count = 1;
657 if (nearest_line_directive != std::string::npos) {
658 sub_str = sub_str.substr(nearest_line_directive + directive.size());
659 line_count = std::stoll(sub_str) - 1;
660 }
661 return line_count + std::count(sub_str.begin(), sub_str.end(), '\n');
662 }
663
664 /* Return the offset to the start of the line. */
665 size_t char_number() const
666 {
667 std::string sub_str = data->str.substr(0, str_index_start());
668 size_t nearest_line_directive = sub_str.rfind('\n');
669 return (nearest_line_directive == std::string::npos) ?
670 (sub_str.size()) :
671 (sub_str.size() - nearest_line_directive - 1);
672 }
673
674 /* Return the line the token is at. */
675 std::string line_str() const
676 {
677 size_t start = data->str.rfind('\n', str_index_start());
678 size_t end = data->str.find('\n', str_index_start());
679 start = (start != std::string::npos) ? start + 1 : 0;
680 return data->str.substr(start, end - start);
681 }
682
684 {
685 if (is_invalid()) {
686 return Invalid;
687 }
688 return TokenType(data->token_types[index]);
689 }
690
692 {
693 return this->type() == type;
694 }
696 {
697 return !(*this == type);
698 }
699 bool operator==(char type) const
700 {
701 return *this == TokenType(type);
702 }
703 bool operator!=(char type) const
704 {
705 return *this != TokenType(type);
706 }
707
708 bool operator==(const Token &other) const
709 {
710 return this->index == other.index && this->data == other.data;
711 }
712 bool operator!=(const Token &other) const
713 {
714 return !(*this == other);
715 }
716};
717
718struct Scope {
719 /* String view for nicer debugging experience. Isn't actually used. */
720 std::string_view token_view;
721 std::string_view str_view;
722
725
727 {
728 IndexRange index_range = data->scope_ranges[index];
729 int str_start = data->token_offsets[index_range.start].start;
730 int str_end = data->token_offsets[index_range.last()].last();
731 return {std::string_view(data->token_types).substr(index_range.start, index_range.size),
732 std::string_view(data->str).substr(str_start, str_end - str_start + 1),
733 data,
734 index};
735 }
736
737 Token start() const
738 {
740 }
741
742 Token end() const
743 {
744 return Token::from_position(data, range().last());
745 }
746
748 {
749 return data->scope_ranges[index];
750 }
751
753 {
755 }
756
757 size_t token_count() const
758 {
759 return range().size;
760 }
761
763 {
764 return ScopeType(data->scope_types[index]);
765 }
766
767 /* Returns the scope that contains this scope. */
768 Scope scope() const
769 {
770 return start().prev().scope();
771 }
772
773 static Scope invalid()
774 {
775 return {"", "", nullptr, -1};
776 }
777
778 bool is_valid() const
779 {
780 return data != nullptr && index >= 0;
781 }
782 bool is_invalid() const
783 {
784 return !is_valid();
785 }
786
787 bool contains(const Scope sub) const
788 {
789 Scope parent = sub.scope();
790 while (parent.type() != ScopeType::Global && parent != *this) {
791 parent = parent.scope();
792 }
793 return parent == *this;
794 }
795
796 std::string str() const
797 {
798 if (this->is_invalid()) {
799 return "";
800 }
801 return data->str.substr(start().str_index_start(),
802 end().str_index_last() - start().str_index_start() + 1);
803 }
804
805 /* Return the content without the first and last characters. */
806 std::string str_exclusive() const
807 {
808 if (this->is_invalid()) {
809 return "";
810 }
811 return data->str.substr(start().str_index_start() + 1,
812 end().str_index_last() - start().str_index_start() - 1);
813 }
814
815 Token find_token(const char token_type) const
816 {
817 size_t pos = data->token_types.substr(range().start, range().size).find(token_type);
818 return (pos != std::string::npos) ? Token::from_position(data, range().start + pos) :
820 }
821
822 bool contains_token(const char token_type) const
823 {
824 return find_token(token_type).is_valid();
825 }
826
827 void foreach_match(const std::string &pattern,
828 std::function<void(const std::vector<Token>)> callback) const
829 {
830 assert(!pattern.empty());
831 const std::string_view scope_tokens =
832 std::string_view(data->token_types).substr(range().start, range().size);
833
834 auto count_match = [](const std::string_view &s, const std::string_view &pattern) {
835 size_t pos = 0, occurrences = 0;
836 while ((pos = s.find(pattern, pos)) != std::string::npos) {
837 occurrences += 1;
838 pos += pattern.length();
839 }
840 return occurrences;
841 };
842 const int control_token_count = count_match(pattern, "?") * 2 + count_match(pattern, "..") * 2;
843
844 if (range().size < pattern.size() - control_token_count) {
845 return;
846 }
847
848 const size_t searchable_range = scope_tokens.size() -
849 (pattern.size() - 1 - control_token_count);
850
851 std::vector<Token> match;
852 match.resize(pattern.size());
853
854 for (size_t pos = 0; pos < searchable_range; pos++) {
855 size_t cursor = range().start + pos;
856
857 for (int i = 0; i < pattern.size(); i++) {
858 bool is_last_token = i == pattern.size() - 1;
859 TokenType token_type = TokenType(data->token_types[cursor]);
860 TokenType curr_search_token = TokenType(pattern[i]);
861 TokenType next_search_token = TokenType(is_last_token ? '\0' : pattern[i + 1]);
862
863 /* Scope skipping. */
864 if (!is_last_token && curr_search_token == '.' && next_search_token == '.') {
865 cursor = match[i - 1].scope().end().index;
866 i++;
867 continue;
868 }
869
870 /* Regular token. */
871 if (curr_search_token == token_type) {
872 match[i] = Token::from_position(data, cursor++);
873
874 if (is_last_token) {
875 callback(match);
876 }
877 }
878 else if (!is_last_token && curr_search_token != '?' && next_search_token == '?') {
879 /* This was and optional token. Continue scanning. */
880 match[i] = Token::invalid();
881 i++;
882 }
883 else {
884 /* Token mismatch. Test next position. */
885 break;
886 }
887 }
888 }
889 }
890
891 /* Will iterate over all the scopes that are direct children. */
892 void foreach_scope(ScopeType type, std::function<void(Scope)> callback) const
893 {
894 size_t pos = this->index;
895 while ((pos = data->scope_types.find(char(type), pos)) != std::string::npos) {
897 if (scope.start().index > this->end().index) {
898 /* Found scope starts after this scope. End iteration. */
899 break;
900 }
901 /* Make sure found scope is direct child of this scope. */
902 if (scope.start().scope().scope().index == this->index) {
903 callback(scope);
904 }
905 pos += 1;
906 }
907 }
908
909 void foreach_token(const TokenType token_type, std::function<void(const Token)> callback) const
910 {
911 const char str[2] = {token_type, '\0'};
912 foreach_match(str, [&](const std::vector<Token> &tokens) { callback(tokens[0]); });
913 }
914
915 /* Run a callback for all existing function scopes. */
917 std::function<void(
918 bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback)
919 const
920 {
921 foreach_match("m?ww(..)c?{..}", [&](const std::vector<Token> matches) {
922 callback(matches[0] == Static,
923 matches[2],
924 matches[3],
925 matches[4].scope(),
926 matches[8] == Const,
927 matches[10].scope());
928 });
929 foreach_match("m?ww::w(..)c?{..}", [&](const std::vector<Token> matches) {
930 callback(matches[0] == Static,
931 matches[2],
932 matches[6],
933 matches[7].scope(),
934 matches[11] == Const,
935 matches[13].scope());
936 });
937 foreach_match("m?ww<..>(..)c?{..}", [&](const std::vector<Token> matches) {
938 callback(matches[0] == Static,
939 matches[2],
940 matches[3],
941 matches[8].scope(),
942 matches[12] == Const,
943 matches[14].scope());
944 });
945 }
946
947 /* Run a callback for all existing struct scopes. */
948 void foreach_struct(std::function<void(Token struct_tok, Token name, Scope body)> callback) const
949 {
950 foreach_match("sw{..}", [&](const std::vector<Token> matches) {
951 callback(matches[0], matches[1], matches[2].scope());
952 });
953 foreach_match("Sw{..}", [&](const std::vector<Token> matches) {
954 callback(matches[0], matches[1], matches[2].scope());
955 });
956 foreach_match("sw<..>{..}", [&](const std::vector<Token> matches) {
957 callback(matches[0], matches[1], matches[6].scope());
958 });
959 foreach_match("Sw<..>{..}", [&](const std::vector<Token> matches) {
960 callback(matches[0], matches[1], matches[6].scope());
961 });
962 }
963
964 bool operator==(const Scope &other) const
965 {
966 return this->index == other.index && this->data == other.data;
967 }
968 bool operator!=(const Scope &other) const
969 {
970 return !(*this == other);
971 }
972};
973
974inline Scope Token::scope() const
975{
976 return Scope::from_position(data, data->token_scope[index]);
977}
978
980{
981 Scope scope = this->scope();
982 while (scope.type() != ScopeType::Global && scope.type() != type) {
983 scope = scope.scope();
984 }
985 return scope.type() == type ? scope : Scope::invalid();
986}
987
989{
990 {
991 /* Scope detection. */
992 scope_ranges.clear();
993 scope_types.clear();
994
995 struct ScopeItem {
996 ScopeType type;
997 size_t start;
998 int index;
999 };
1000
1001 int scope_index = 0;
1002 std::stack<ScopeItem> scopes;
1003
1004 auto enter_scope = [&](ScopeType type, size_t start_tok_id) {
1005 scopes.emplace(ScopeItem{type, start_tok_id, scope_index++});
1006 scope_ranges.emplace_back(start_tok_id, 1);
1007 scope_types += char(type);
1008 };
1009
1010 auto exit_scope = [&](int end_tok_id) {
1011 ScopeItem scope = scopes.top();
1012 scope_ranges[scope.index].size = end_tok_id - scope.start + 1;
1013 scopes.pop();
1014 };
1015
1016 enter_scope(ScopeType::Global, 0);
1017
1018 int in_template = 0;
1019
1020 int tok_id = -1;
1021 for (char &c : token_types) {
1022 tok_id++;
1023
1024 if (scopes.top().type == ScopeType::Preprocessor) {
1025 if (TokenType(c) == NewLine) {
1026 exit_scope(tok_id);
1027 }
1028 else {
1029 /* Do nothing. Enclose all preprocessor lines together. */
1030 continue;
1031 }
1032 }
1033
1034 switch (TokenType(c)) {
1035 case Hash:
1036 enter_scope(ScopeType::Preprocessor, tok_id);
1037 break;
1038 case Assign:
1039 if (scopes.top().type == ScopeType::Assignment) {
1040 /* Chained assignments. */
1041 exit_scope(tok_id - 1);
1042 }
1043 enter_scope(ScopeType::Assignment, tok_id);
1044 break;
1045 case BracketOpen: {
1046 /* Scan back identifier that could contain namespaces. */
1047 TokenType keyword;
1048 int pos = 2;
1049 do {
1050 keyword = (tok_id >= pos) ? TokenType(token_types[tok_id - pos]) : TokenType::Invalid;
1051 pos += 3;
1052 } while (keyword != Invalid && keyword == Colon);
1053
1054 if (keyword == Struct) {
1055 enter_scope(ScopeType::Local, tok_id);
1056 }
1057 else if (keyword == Enum) {
1058 enter_scope(ScopeType::Local, tok_id);
1059 }
1060 else if (keyword == Namespace) {
1061 enter_scope(ScopeType::Namespace, tok_id);
1062 }
1063 else if (ScopeType(scope_types.back()) == ScopeType::LoopArg) {
1064 enter_scope(ScopeType::LoopBody, tok_id);
1065 }
1066 else if (ScopeType(scope_types.back()) == ScopeType::SwitchArg) {
1067 enter_scope(ScopeType::SwitchBody, tok_id);
1068 }
1069 else if (scopes.top().type == ScopeType::Global) {
1070 enter_scope(ScopeType::Function, tok_id);
1071 }
1072 else if (scopes.top().type == ScopeType::Struct) {
1073 enter_scope(ScopeType::Function, tok_id);
1074 }
1075 else if (scopes.top().type == ScopeType::Namespace) {
1076 enter_scope(ScopeType::Function, tok_id);
1077 }
1078 else {
1079 enter_scope(ScopeType::Local, tok_id);
1080 }
1081 break;
1082 }
1083 case ParOpen:
1084 if ((tok_id >= 1 && token_types[tok_id - 1] == For) ||
1085 (tok_id >= 1 && token_types[tok_id - 1] == While))
1086 {
1087 enter_scope(ScopeType::LoopArgs, tok_id);
1088 }
1089 else if (tok_id >= 1 && token_types[tok_id - 1] == Switch) {
1090 enter_scope(ScopeType::SwitchArg, tok_id);
1091 }
1092 else if (scopes.top().type == ScopeType::Global) {
1093 enter_scope(ScopeType::FunctionArgs, tok_id);
1094 }
1095 else if (scopes.top().type == ScopeType::Struct) {
1096 enter_scope(ScopeType::FunctionArgs, tok_id);
1097 }
1098 else if ((scopes.top().type == ScopeType::Function ||
1099 scopes.top().type == ScopeType::Local) &&
1100 (tok_id >= 1 && token_types[tok_id - 1] == Word))
1101 {
1102 enter_scope(ScopeType::FunctionCall, tok_id);
1103 }
1104 else {
1105 enter_scope(ScopeType::Local, tok_id);
1106 }
1107 break;
1108 case SquareOpen:
1109 enter_scope(ScopeType::Subscript, tok_id);
1110 break;
1111 case AngleOpen:
1112 if (tok_id >= 1) {
1113 char prev_char = str[token_offsets[tok_id - 1].last()];
1114 /* Rely on the fact that template are formatted without spaces but comparison isn't. */
1115 if ((prev_char != ' ' && prev_char != '\n' && prev_char != '<') ||
1116 token_types[tok_id - 1] == Template)
1117 {
1118 enter_scope(ScopeType::Template, tok_id);
1119 in_template++;
1120 }
1121 }
1122 break;
1123 case AngleClose:
1124 if (in_template > 0 && scopes.top().type == ScopeType::Assignment) {
1125 exit_scope(tok_id - 1);
1126 }
1127 if (scopes.top().type == ScopeType::TemplateArg) {
1128 exit_scope(tok_id - 1);
1129 }
1130 if (scopes.top().type == ScopeType::Template) {
1131 exit_scope(tok_id);
1132 in_template--;
1133 }
1134 break;
1135 case BracketClose:
1136 case ParClose:
1137 if (scopes.top().type == ScopeType::Assignment) {
1138 exit_scope(tok_id - 1);
1139 }
1140 if (scopes.top().type == ScopeType::FunctionArg) {
1141 exit_scope(tok_id - 1);
1142 }
1143 if (scopes.top().type == ScopeType::LoopArg) {
1144 exit_scope(tok_id - 1);
1145 }
1146 exit_scope(tok_id);
1147 break;
1148 case SquareClose:
1149 exit_scope(tok_id);
1150 break;
1151 case SemiColon:
1152 if (scopes.top().type == ScopeType::Assignment) {
1153 exit_scope(tok_id - 1);
1154 }
1155 if (scopes.top().type == ScopeType::FunctionArg) {
1156 exit_scope(tok_id - 1);
1157 }
1158 if (scopes.top().type == ScopeType::TemplateArg) {
1159 exit_scope(tok_id - 1);
1160 }
1161 if (scopes.top().type == ScopeType::LoopArg) {
1162 exit_scope(tok_id - 1);
1163 }
1164 break;
1165 case Comma:
1166 if (scopes.top().type == ScopeType::Assignment) {
1167 exit_scope(tok_id - 1);
1168 }
1169 if (scopes.top().type == ScopeType::FunctionArg) {
1170 exit_scope(tok_id - 1);
1171 }
1172 if (scopes.top().type == ScopeType::TemplateArg) {
1173 exit_scope(tok_id - 1);
1174 }
1175 break;
1176 default:
1177 if (scopes.top().type == ScopeType::FunctionArgs) {
1178 enter_scope(ScopeType::FunctionArg, tok_id);
1179 }
1180 if (scopes.top().type == ScopeType::LoopArgs) {
1181 enter_scope(ScopeType::LoopArg, tok_id);
1182 }
1183 if (scopes.top().type == ScopeType::Template) {
1184 enter_scope(ScopeType::TemplateArg, tok_id);
1185 }
1186 break;
1187 }
1188 }
1189
1190 if (scopes.top().type == ScopeType::Preprocessor) {
1191 exit_scope(tok_id - 1);
1192 }
1193
1194 if (scopes.top().type != ScopeType::Global) {
1195 ScopeItem scope_item = scopes.top();
1196 Token token = Token::from_position(this, scope_ranges[scope_item.index].start);
1197 report_error(
1198 token.line_number(), token.char_number(), token.line_str(), "unterminated scope");
1199
1200 /* Avoid out of bound access for the rest of the processing. Empty everything. */
1201 *this = {};
1202 return;
1203 }
1204
1205 exit_scope(tok_id);
1206 }
1207 {
1208 token_scope.clear();
1209 token_scope.resize(scope_ranges[0].size);
1210
1211 int scope_id = -1;
1212 for (const IndexRange &range : scope_ranges) {
1213 scope_id++;
1214 for (int i = 0; i < range.size; i++) {
1215 int j = range.start + i;
1216 token_scope[j] = scope_id;
1217 }
1218 }
1219 }
1220}
1221
1222struct Parser {
1223 private:
1224 ParserData data_;
1225
1226 /* If false, the whitespaces are fused with the tokens. Otherwise they are kept as separate space
1227 * and newline tokens. */
1228 bool keep_whitespace_;
1229
1230 struct Mutation {
1231 /* Range of the original string to replace. */
1232 IndexRange src_range;
1233 /* The replacement string. */
1234 std::string replacement;
1235
1236 Mutation(IndexRange src_range, std::string replacement)
1237 : src_range(src_range), replacement(replacement)
1238 {
1239 }
1240
1241 /* Define operator in order to sort the mutation by starting position.
1242 * Otherwise, applying them in one pass will not work. */
1243 friend bool operator<(const Mutation &a, const Mutation &b)
1244 {
1245 return a.src_range.start < b.src_range.start;
1246 }
1247 };
1248 std::vector<Mutation> mutations_;
1249
1250 ParserData::report_callback &report_error;
1251
1252 public:
1253 Parser(const std::string &input,
1254 ParserData::report_callback &report_error,
1255 bool keep_whitespace = false)
1256 : keep_whitespace_(keep_whitespace), report_error(report_error)
1257 {
1258 data_.str = input;
1259 parse(report_error);
1260 }
1261
1262 /* Run a callback for all existing scopes of a given type. */
1263 void foreach_scope(ScopeType type, std::function<void(Scope)> callback)
1264 {
1265 size_t pos = 0;
1266 while ((pos = data_.scope_types.find(char(type), pos)) != std::string::npos) {
1267 callback(Scope::from_position(&data_, pos));
1268 pos += 1;
1269 }
1270 }
1271
1272 void foreach_match(const std::string &pattern,
1273 std::function<void(const std::vector<Token>)> callback)
1274 {
1276 [&](const Scope scope) { scope.foreach_match(pattern, callback); });
1277 }
1278
1279 void foreach_token(const TokenType token_type, std::function<void(const Token)> callback)
1280 {
1281 const char str[2] = {token_type, '\0'};
1282 foreach_match(str, [&](const std::vector<Token> &tokens) { callback(tokens[0]); });
1283 }
1284
1285 /* Run a callback for all existing function scopes. */
1287 std::function<void(
1288 bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback)
1289 {
1290 foreach_match("m?ww(..)c?{..}", [&](const std::vector<Token> matches) {
1291 callback(matches[0] == Static,
1292 matches[2],
1293 matches[3],
1294 matches[4].scope(),
1295 matches[8] == Const,
1296 matches[10].scope());
1297 });
1298 foreach_match("m?ww::w(..)c?{..}", [&](const std::vector<Token> matches) {
1299 callback(matches[0] == Static,
1300 matches[2],
1301 matches[6],
1302 matches[7].scope(),
1303 matches[11] == Const,
1304 matches[13].scope());
1305 });
1306 foreach_match("m?ww<..>(..)c?{..}", [&](const std::vector<Token> matches) {
1307 callback(matches[0] == Static,
1308 matches[2],
1309 matches[3],
1310 matches[8].scope(),
1311 matches[12] == Const,
1312 matches[14].scope());
1313 });
1314 }
1315
1316 std::string substr_range_inclusive(size_t start, size_t end)
1317 {
1318 return data_.str.substr(start, end - start + 1);
1319 }
1320 std::string substr_range_inclusive(Token start, Token end)
1321 {
1323 }
1324
1325 /* Replace everything from `from` to `to` (inclusive).
1326 * Return true on success. */
1327 bool replace_try(size_t from, size_t to, const std::string &replacement)
1328 {
1329 IndexRange range = IndexRange(from, to + 1 - from);
1330 for (const Mutation &mut : mutations_) {
1331 if (mut.src_range.overlaps(range)) {
1332 return false;
1333 }
1334 }
1335 mutations_.emplace_back(range, replacement);
1336 return true;
1337 }
1338 /* Replace everything from `from` to `to` (inclusive).
1339 * Return true on success. */
1341 Token to,
1342 const std::string &replacement,
1343 bool keep_trailing_whitespaces = false)
1344 {
1345 if (keep_trailing_whitespaces) {
1346 return replace_try(from.str_index_start(), to.str_index_last_no_whitespace(), replacement);
1347 }
1348 return replace_try(from.str_index_start(), to.str_index_last(), replacement);
1349 }
1350
1351 /* Replace everything from `from` to `to` (inclusive). */
1352 void replace(size_t from, size_t to, const std::string &replacement)
1353 {
1354 bool success = replace_try(from, to, replacement);
1355 assert(success);
1356 (void)success;
1357 }
1358 /* Replace everything from `from` to `to` (inclusive). */
1359 void replace(Token from, Token to, const std::string &replacement)
1360 {
1361 replace(from.str_index_start(), to.str_index_last(), replacement);
1362 }
1363 /* Replace token by string. */
1364 void replace(Token tok, const std::string &replacement, bool keep_trailing_whitespaces = false)
1365 {
1366 if (keep_trailing_whitespaces) {
1367 replace(tok.str_index_start(), tok.str_index_last_no_whitespace(), replacement);
1368 }
1369 else {
1370 replace(tok.str_index_start(), tok.str_index_last(), replacement);
1371 }
1372 }
1373 /* Replace Scope by string. */
1374 void replace(Scope scope, const std::string &replacement, bool keep_trailing_whitespaces = false)
1375 {
1376 if (keep_trailing_whitespaces) {
1377 replace(scope.start().str_index_start(),
1379 replacement);
1380 }
1381 else {
1382 replace(scope.start(), scope.end(), replacement);
1383 }
1384 }
1385
1386 /* Replace the content from `from` to `to` (inclusive) by whitespaces without changing
1387 * line count and keep the remaining indentation spaces. */
1388 void erase(size_t from, size_t to)
1389 {
1390 IndexRange range = IndexRange(from, to + 1 - from);
1391 std::string content = data_.str.substr(range.start, range.size);
1392 size_t lines = std::count(content.begin(), content.end(), '\n');
1393 size_t spaces = content.find_last_not_of(" ");
1394 if (spaces != std::string::npos) {
1395 spaces = content.length() - (spaces + 1);
1396 }
1397 replace(from, to, std::string(lines, '\n') + std::string(spaces, ' '));
1398 }
1399 /* Replace the content from `from` to `to` (inclusive) by whitespaces without changing
1400 * line count and keep the remaining indentation spaces. */
1401 void erase(Token from, Token to)
1402 {
1403 erase(from.str_index_start(), to.str_index_last());
1404 }
1405 /* Replace the content from `from` to `to` (inclusive) by whitespaces without changing
1406 * line count and keep the remaining indentation spaces. */
1407 void erase(Token tok)
1408 {
1409 erase(tok, tok);
1410 }
1411 /* Replace the content of the scope by whitespaces without changing
1412 * line count and keep the remaining indentation spaces. */
1413 void erase(Scope scope)
1414 {
1415 erase(scope.start(), scope.end());
1416 }
1417
1418 void insert_after(size_t at, const std::string &content)
1419 {
1420 IndexRange range = IndexRange(at + 1, 0);
1421 mutations_.emplace_back(range, content);
1422 }
1423 void insert_after(Token at, const std::string &content)
1424 {
1425 insert_after(at.str_index_last(), content);
1426 }
1427
1428 void insert_line_number(size_t at, int line)
1429 {
1430 insert_after(at, "#line " + std::to_string(line) + "\n");
1431 }
1432 void insert_line_number(Token at, int line)
1433 {
1435 }
1436
1437 void insert_before(size_t at, const std::string &content)
1438 {
1439 IndexRange range = IndexRange(at, 0);
1440 mutations_.emplace_back(range, content);
1441 }
1442 void insert_before(Token at, const std::string &content)
1443 {
1444 insert_after(at.str_index_start(), content);
1445 }
1446
1447 /* Return true if any mutation was applied. */
1449 {
1450 if (mutations_.empty()) {
1451 return false;
1452 }
1453
1454 /* Order mutations so that they can be applied in one pass. */
1455 std::stable_sort(mutations_.begin(), mutations_.end());
1456
1457 /* Make sure to pad the input string in case of insertion after the last char. */
1458 bool added_trailing_new_line = false;
1459 if (data_.str.back() != '\n') {
1460 data_.str += '\n';
1461 added_trailing_new_line = true;
1462 }
1463
1464 int64_t offset = 0;
1465 for (const Mutation &mut : mutations_) {
1466 data_.str.replace(mut.src_range.start + offset, mut.src_range.size, mut.replacement);
1467 offset += mut.replacement.size() - mut.src_range.size;
1468 }
1469 mutations_.clear();
1470
1471 if (added_trailing_new_line) {
1472 data_.str.pop_back();
1473 }
1474 return true;
1475 }
1476
1478 {
1479 bool applied = only_apply_mutations();
1480 if (applied) {
1481 this->parse(report_error);
1482 }
1483 return applied;
1484 }
1485
1486 /* Apply mutations if any and get resulting string. */
1487 const std::string &result_get()
1488 {
1490 return data_.str;
1491 }
1492
1493 /* For testing. */
1495 {
1496 return data_;
1497 }
1498
1499 /* For testing. */
1500 std::string serialize_mutations() const
1501 {
1502 std::string out;
1503 for (const Mutation &mut : mutations_) {
1504 out += "Replace ";
1505 out += std::to_string(mut.src_range.start);
1506 out += " - ";
1507 out += std::to_string(mut.src_range.size);
1508 out += " \"";
1509 out += data_.str.substr(mut.src_range.start, mut.src_range.size);
1510 out += "\" by \"";
1511 out += mut.replacement;
1512 out += "\"\n";
1513 }
1514 return out;
1515 }
1516
1517 private:
1518 using Duration = std::chrono::microseconds;
1519 Duration tokenize_time;
1520 Duration parse_scope_time;
1521
1522 struct TimeIt {
1523 Duration &time;
1524 std::chrono::high_resolution_clock::time_point start;
1525
1526 TimeIt(Duration &time) : time(time)
1527 {
1528 start = std::chrono::high_resolution_clock::now();
1529 }
1530 ~TimeIt()
1531 {
1532 auto end = std::chrono::high_resolution_clock::now();
1533 time = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
1534 }
1535 };
1536
1537 void parse(ParserData::report_callback &report_error)
1538 {
1539 {
1540 TimeIt time_it(parse_scope_time);
1541 data_.tokenize(keep_whitespace_);
1542 }
1543 {
1544 TimeIt time_it(tokenize_time);
1545 data_.parse_scopes(report_error);
1546 }
1547 }
1548
1549 public:
1551 {
1552 std::cout << "Tokenize time: " << tokenize_time.count() << " µs" << std::endl;
1553 std::cout << "Parser time: " << parse_scope_time.count() << " µs" << std::endl;
1554 std::cout << "String len: " << std::to_string(data_.str.size()) << std::endl;
1555 std::cout << "Token len: " << std::to_string(data_.token_types.size()) << std::endl;
1556 std::cout << "Scope len: " << std::to_string(data_.scope_types.size()) << std::endl;
1557 }
1558
1560 {
1561 std::cout << "Input: \n" << data_.str << " \nEnd of Input\n" << std::endl;
1562 std::cout << "Token Types: \"" << data_.token_types << "\"" << std::endl;
1563 std::cout << "Scope Types: \"" << data_.scope_types << "\"" << std::endl;
1564 }
1565};
1566
1567} // namespace blender::gpu::shader::parser
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
#define str(s)
uint pos
#define input
#define assert(assertion)
#define out
constexpr bool operator<(StringRef a, StringRef b)
const char * name
bool overlaps(IndexRange other) const
IndexRange(size_t start, size_t size)
IndexRange operator[](const int64_t index) const
std::function< void( int error_line, int error_char, std::string error_line_string, const char *error_str)> report_callback
void parse_scopes(report_callback &report_error)
void tokenize(const bool keep_whitespace)
void insert_line_number(size_t at, int line)
bool replace_try(Token from, Token to, const std::string &replacement, bool keep_trailing_whitespaces=false)
void foreach_scope(ScopeType type, std::function< void(Scope)> callback)
void insert_line_number(Token at, int line)
void foreach_match(const std::string &pattern, std::function< void(const std::vector< Token >)> callback)
void erase(Token from, Token to)
void foreach_function(std::function< void(bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback)
void insert_after(Token at, const std::string &content)
void replace(Scope scope, const std::string &replacement, bool keep_trailing_whitespaces=false)
void replace(size_t from, size_t to, const std::string &replacement)
std::string substr_range_inclusive(size_t start, size_t end)
void replace(Token tok, const std::string &replacement, bool keep_trailing_whitespaces=false)
void foreach_token(const TokenType token_type, std::function< void(const Token)> callback)
void insert_after(size_t at, const std::string &content)
Parser(const std::string &input, ParserData::report_callback &report_error, bool keep_whitespace=false)
void insert_before(size_t at, const std::string &content)
bool replace_try(size_t from, size_t to, const std::string &replacement)
void erase(size_t from, size_t to)
void replace(Token from, Token to, const std::string &replacement)
std::string substr_range_inclusive(Token start, Token end)
void insert_before(Token at, const std::string &content)
bool contains_token(const char token_type) const
void foreach_struct(std::function< void(Token struct_tok, Token name, Scope body)> callback) const
Token find_token(const char token_type) const
Token operator[](const int64_t index) const
bool operator!=(const Scope &other) const
static Scope from_position(const ParserData *data, int64_t index)
void foreach_token(const TokenType token_type, std::function< void(const Token)> callback) const
void foreach_match(const std::string &pattern, std::function< void(const std::vector< Token >)> callback) const
bool contains(const Scope sub) const
void foreach_scope(ScopeType type, std::function< void(Scope)> callback) const
void foreach_function(std::function< void(bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback) const
bool operator==(const Scope &other) const
bool operator==(const Token &other) const
bool operator==(TokenType type) const
bool operator!=(TokenType type) const
static Token from_position(const ParserData *data, int64_t index)
bool operator!=(const Token &other) const
Scope first_containing_scope_of_type(const ScopeType type) const
Token find_next(TokenType type) const
i
Definition text_draw.cc:230