206 bool next_character_is_escape =
false;
207 bool inside_string =
false;
210 for (
const char &c :
str.substr(1)) {
217 if (!next_character_is_escape && c ==
'\"') {
218 inside_string =
false;
220 next_character_is_escape = c ==
'\\';
224 inside_string =
true;
232 if (inside_preprocessor_directive && type ==
NewLine) {
233 inside_preprocessor_directive =
false;
239 inside_preprocessor_directive =
true;
242 if (!keep_whitespace && (type ==
NewLine || type ==
Space)) {
243 prev_was_whitespace =
true;
252 if (prev ==
'!' && type ==
Assign) {
257 if (prev ==
'>' && type ==
Assign) {
262 if (prev ==
'<' && type ==
Assign) {
267 if (prev ==
'-' && type ==
'>') {
272 if (type ==
Number && prev ==
Word && !prev_was_whitespace) {
276 if (c ==
'x' && prev ==
Number) {
280 if (c >=
'A' && c <=
'F' && prev ==
Number) {
284 if (c >=
'a' && c <=
'f' && prev ==
Number) {
288 if (c ==
'u' && prev ==
Number) {
296 if (c ==
'f' && prev ==
Number) {
300 if (c ==
'e' && prev ==
Number) {
304 if ((c ==
'+' || c ==
'-') && prev ==
Number) {
308 if (type ==
'+' && prev ==
'+') {
313 if (type ==
'+' && prev ==
'+') {
322 if (!keep_whitespace && type ==
Word && prev_was_whitespace) {
324 prev_was_whitespace =
false;
343 if (!keep_whitespace) {
344 size_t last_non_whitespace = word.find_last_not_of(
" \n");
345 if (last_non_whitespace != std::string::npos) {
346 word = word.substr(0, last_non_whitespace + 1);
350 if (word ==
"namespace") {
353 else if (word ==
"struct") {
356 else if (word ==
"class") {
359 else if (word ==
"const") {
362 else if (word ==
"constexpr") {
365 else if (word ==
"return") {
368 else if (word ==
"break") {
371 else if (word ==
"continue") {
374 else if (word ==
"case") {
377 else if (word ==
"switch") {
380 else if (word ==
"if") {
383 else if (word ==
"else") {
386 else if (word ==
"while") {
389 else if (word ==
"do") {
392 else if (word ==
"for") {
395 else if (word ==
"template") {
398 else if (word ==
"this") {
401 else if (word ==
"static") {
404 else if (word ==
"private") {
407 else if (word ==
"public") {
410 else if (word ==
"enum") {
413 else if (word ==
"using") {
422 int error_line,
int error_char, std::string error_line_string,
const char *error_str)>;
507 return {
"",
nullptr, 0};
568 if (tok.
prev() ==
':') {
583 return data->
str.substr(start, end - start + 1);
590 while (next ==
' ' ||
next ==
'\n') {
618 return (
pos == std::string::npos) ? 0 : (
pos + 1);
625 return (
pos == std::string::npos) ? (
data->str.size() - 1) : (
pos - 1);
636 return str.substr(0,
str.find_last_not_of(
" \n") + 1);
642 std::string
str = this->
str();
643 if (str.length() < 2) {
646 return str.substr(1,
str.length() - 2);
652 std::string directive =
"#line ";
655 size_t nearest_line_directive = sub_str.rfind(directive);
656 size_t line_count = 1;
657 if (nearest_line_directive != std::string::npos) {
658 sub_str = sub_str.substr(nearest_line_directive + directive.size());
659 line_count = std::stoll(sub_str) - 1;
661 return line_count + std::count(sub_str.begin(), sub_str.end(),
'\n');
668 size_t nearest_line_directive = sub_str.rfind(
'\n');
669 return (nearest_line_directive == std::string::npos) ?
671 (sub_str.size() - nearest_line_directive - 1);
679 start = (start != std::string::npos) ? start + 1 : 0;
680 return data->str.substr(start, end - start);
693 return this->
type() == type;
697 return !(*
this ==
type);
710 return this->index == other.
index && this->data == other.
data;
714 return !(*
this == other);
729 int str_start =
data->token_offsets[index_range.
start].start;
730 int str_end =
data->token_offsets[index_range.
last()].last();
731 return {std::string_view(
data->token_types).substr(index_range.
start, index_range.
size),
732 std::string_view(
data->str).substr(str_start, str_end - str_start + 1),
775 return {
"",
"",
nullptr, -1};
791 parent = parent.
scope();
793 return parent == *
this;
801 return data->
str.substr(
start().str_index_start(),
802 end().str_index_last() -
start().str_index_start() + 1);
811 return data->
str.substr(
start().str_index_start() + 1,
812 end().str_index_last() -
start().str_index_start() - 1);
828 std::function<
void(
const std::vector<Token>)> callback)
const
831 const std::string_view scope_tokens =
834 auto count_match = [](
const std::string_view &s,
const std::string_view &pattern) {
835 size_t pos = 0, occurrences = 0;
836 while ((
pos = s.find(pattern,
pos)) != std::string::npos) {
838 pos += pattern.length();
842 const int control_token_count = count_match(pattern,
"?") * 2 + count_match(pattern,
"..") * 2;
844 if (
range().
size < pattern.size() - control_token_count) {
848 const size_t searchable_range = scope_tokens.size() -
849 (pattern.size() - 1 - control_token_count);
851 std::vector<Token> match;
852 match.resize(pattern.size());
854 for (
size_t pos = 0;
pos < searchable_range;
pos++) {
857 for (
int i = 0;
i < pattern.size();
i++) {
858 bool is_last_token =
i == pattern.size() - 1;
864 if (!is_last_token && curr_search_token ==
'.' && next_search_token ==
'.') {
865 cursor = match[
i - 1].scope().end().index;
871 if (curr_search_token == token_type) {
878 else if (!is_last_token && curr_search_token !=
'?' && next_search_token ==
'?') {
894 size_t pos = this->index;
895 while ((
pos =
data->scope_types.find(
char(
type),
pos)) != std::string::npos) {
897 if (
scope.start().index > this->end().index) {
902 if (
scope.start().scope().scope().index == this->index) {
911 const char str[2] = {token_type,
'\0'};
912 foreach_match(
str, [&](
const std::vector<Token> &tokens) { callback(tokens[0]); });
921 foreach_match(
"m?ww(..)c?{..}", [&](
const std::vector<Token> matches) {
922 callback(matches[0] ==
Static,
927 matches[10].
scope());
929 foreach_match(
"m?ww::w(..)c?{..}", [&](
const std::vector<Token> matches) {
930 callback(matches[0] ==
Static,
934 matches[11] ==
Const,
935 matches[13].
scope());
937 foreach_match(
"m?ww<..>(..)c?{..}", [&](
const std::vector<Token> matches) {
938 callback(matches[0] ==
Static,
942 matches[12] ==
Const,
943 matches[14].
scope());
950 foreach_match(
"sw{..}", [&](
const std::vector<Token> matches) {
951 callback(matches[0], matches[1], matches[2].
scope());
953 foreach_match(
"Sw{..}", [&](
const std::vector<Token> matches) {
954 callback(matches[0], matches[1], matches[2].
scope());
956 foreach_match(
"sw<..>{..}", [&](
const std::vector<Token> matches) {
957 callback(matches[0], matches[1], matches[6].
scope());
959 foreach_match(
"Sw<..>{..}", [&](
const std::vector<Token> matches) {
960 callback(matches[0], matches[1], matches[6].
scope());
966 return this->index == other.
index && this->data == other.
data;
970 return !(*
this == other);
1001 int scope_index = 0;
1002 std::stack<ScopeItem> scopes;
1004 auto enter_scope = [&](
ScopeType type,
size_t start_tok_id) {
1005 scopes.emplace(ScopeItem{type, start_tok_id, scope_index++});
1010 auto exit_scope = [&](
int end_tok_id) {
1011 ScopeItem scope = scopes.top();
1012 scope_ranges[scope.index].size = end_tok_id - scope.start + 1;
1018 int in_template = 0;
1041 exit_scope(tok_id - 1);
1057 else if (keyword ==
Enum) {
1115 if ((prev_char !=
' ' && prev_char !=
'\n' && prev_char !=
'<') ||
1125 exit_scope(tok_id - 1);
1128 exit_scope(tok_id - 1);
1138 exit_scope(tok_id - 1);
1141 exit_scope(tok_id - 1);
1144 exit_scope(tok_id - 1);
1153 exit_scope(tok_id - 1);
1156 exit_scope(tok_id - 1);
1159 exit_scope(tok_id - 1);
1162 exit_scope(tok_id - 1);
1167 exit_scope(tok_id - 1);
1170 exit_scope(tok_id - 1);
1173 exit_scope(tok_id - 1);
1191 exit_scope(tok_id - 1);
1195 ScopeItem scope_item = scopes.top();
1214 for (
int i = 0;
i < range.size;
i++) {
1215 int j = range.start +
i;
1228 bool keep_whitespace_;
1234 std::string replacement;
1236 Mutation(
IndexRange src_range, std::string replacement)
1237 : src_range(src_range), replacement(replacement)
1243 friend bool operator<(
const Mutation &a,
const Mutation &
b)
1245 return a.src_range.
start <
b.src_range.start;
1248 std::vector<Mutation> mutations_;
1255 bool keep_whitespace =
false)
1256 : keep_whitespace_(keep_whitespace), report_error(report_error)
1259 parse(report_error);
1266 while ((
pos = data_.scope_types.find(
char(type),
pos)) != std::string::npos) {
1273 std::function<
void(
const std::vector<Token>)> callback)
1281 const char str[2] = {token_type,
'\0'};
1282 foreach_match(
str, [&](
const std::vector<Token> &tokens) { callback(tokens[0]); });
1290 foreach_match(
"m?ww(..)c?{..}", [&](
const std::vector<Token> matches) {
1291 callback(matches[0] ==
Static,
1295 matches[8] ==
Const,
1296 matches[10].scope());
1298 foreach_match(
"m?ww::w(..)c?{..}", [&](
const std::vector<Token> matches) {
1299 callback(matches[0] ==
Static,
1303 matches[11] ==
Const,
1304 matches[13].scope());
1306 foreach_match(
"m?ww<..>(..)c?{..}", [&](
const std::vector<Token> matches) {
1307 callback(matches[0] ==
Static,
1311 matches[12] ==
Const,
1312 matches[14].scope());
1318 return data_.str.substr(start, end - start + 1);
1327 bool replace_try(
size_t from,
size_t to,
const std::string &replacement)
1330 for (
const Mutation &mut : mutations_) {
1331 if (mut.src_range.overlaps(range)) {
1335 mutations_.emplace_back(range, replacement);
1342 const std::string &replacement,
1343 bool keep_trailing_whitespaces =
false)
1345 if (keep_trailing_whitespaces) {
1352 void replace(
size_t from,
size_t to,
const std::string &replacement)
1354 bool success =
replace_try(from, to, replacement);
1364 void replace(
Token tok,
const std::string &replacement,
bool keep_trailing_whitespaces =
false)
1366 if (keep_trailing_whitespaces) {
1374 void replace(
Scope scope,
const std::string &replacement,
bool keep_trailing_whitespaces =
false)
1376 if (keep_trailing_whitespaces) {
1391 std::string content = data_.str.substr(range.
start, range.
size);
1392 size_t lines = std::count(content.begin(), content.end(),
'\n');
1393 size_t spaces = content.find_last_not_of(
" ");
1394 if (spaces != std::string::npos) {
1395 spaces = content.length() - (spaces + 1);
1397 replace(from, to, std::string(lines,
'\n') + std::string(spaces,
' '));
1421 mutations_.emplace_back(range, content);
1430 insert_after(at,
"#line " + std::to_string(line) +
"\n");
1440 mutations_.emplace_back(range, content);
1450 if (mutations_.empty()) {
1455 std::stable_sort(mutations_.begin(), mutations_.end());
1458 bool added_trailing_new_line =
false;
1459 if (data_.str.back() !=
'\n') {
1461 added_trailing_new_line =
true;
1465 for (
const Mutation &mut : mutations_) {
1466 data_.str.replace(mut.src_range.start + offset, mut.src_range.size, mut.replacement);
1467 offset += mut.replacement.size() - mut.src_range.size;
1471 if (added_trailing_new_line) {
1472 data_.str.pop_back();
1481 this->parse(report_error);
1503 for (
const Mutation &mut : mutations_) {
1505 out += std::to_string(mut.src_range.start);
1507 out += std::to_string(mut.src_range.size);
1509 out += data_.str.substr(mut.src_range.start, mut.src_range.size);
1511 out += mut.replacement;
1518 using Duration = std::chrono::microseconds;
1519 Duration tokenize_time;
1520 Duration parse_scope_time;
1524 std::chrono::high_resolution_clock::time_point start;
1526 TimeIt(Duration &time) : time(time)
1528 start = std::chrono::high_resolution_clock::now();
1532 auto end = std::chrono::high_resolution_clock::now();
1533 time = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
1540 TimeIt time_it(parse_scope_time);
1544 TimeIt time_it(tokenize_time);
1552 std::cout <<
"Tokenize time: " << tokenize_time.count() <<
" µs" << std::endl;
1553 std::cout <<
"Parser time: " << parse_scope_time.count() <<
" µs" << std::endl;
1554 std::cout <<
"String len: " << std::to_string(data_.str.size()) << std::endl;
1555 std::cout <<
"Token len: " << std::to_string(data_.token_types.size()) << std::endl;
1556 std::cout <<
"Scope len: " << std::to_string(data_.scope_types.size()) << std::endl;
1561 std::cout <<
"Input: \n" << data_.str <<
" \nEnd of Input\n" << std::endl;
1562 std::cout <<
"Token Types: \"" << data_.token_types <<
"\"" << std::endl;
1563 std::cout <<
"Scope Types: \"" << data_.scope_types <<
"\"" << std::endl;
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
#define assert(assertion)
constexpr bool operator<(StringRef a, StringRef b)
bool overlaps(IndexRange other) const
IndexRange(size_t start, size_t size)
IndexRange operator[](const int64_t index) const
std::vector< size_t > offsets
std::function< void( int error_line, int error_char, std::string error_line_string, const char *error_str)> report_callback
OffsetIndices token_offsets
std::vector< int > token_scope
void parse_scopes(report_callback &report_error)
void tokenize(const bool keep_whitespace)
std::vector< IndexRange > scope_ranges
void insert_line_number(size_t at, int line)
bool replace_try(Token from, Token to, const std::string &replacement, bool keep_trailing_whitespaces=false)
void foreach_scope(ScopeType type, std::function< void(Scope)> callback)
void insert_line_number(Token at, int line)
std::string serialize_mutations() const
void foreach_match(const std::string &pattern, std::function< void(const std::vector< Token >)> callback)
void erase(Token from, Token to)
bool only_apply_mutations()
void foreach_function(std::function< void(bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback)
void insert_after(Token at, const std::string &content)
void replace(Scope scope, const std::string &replacement, bool keep_trailing_whitespaces=false)
const ParserData & data_get()
void replace(size_t from, size_t to, const std::string &replacement)
std::string substr_range_inclusive(size_t start, size_t end)
void replace(Token tok, const std::string &replacement, bool keep_trailing_whitespaces=false)
void foreach_token(const TokenType token_type, std::function< void(const Token)> callback)
void insert_after(size_t at, const std::string &content)
Parser(const std::string &input, ParserData::report_callback &report_error, bool keep_whitespace=false)
void insert_before(size_t at, const std::string &content)
const std::string & result_get()
bool replace_try(size_t from, size_t to, const std::string &replacement)
void erase(size_t from, size_t to)
void replace(Token from, Token to, const std::string &replacement)
std::string substr_range_inclusive(Token start, Token end)
void insert_before(Token at, const std::string &content)
size_t token_count() const
bool contains_token(const char token_type) const
void foreach_struct(std::function< void(Token struct_tok, Token name, Scope body)> callback) const
std::string_view token_view
Token find_token(const char token_type) const
Token operator[](const int64_t index) const
std::string str_exclusive() const
bool operator!=(const Scope &other) const
std::string_view str_view
static Scope from_position(const ParserData *data, int64_t index)
void foreach_token(const TokenType token_type, std::function< void(const Token)> callback) const
void foreach_match(const std::string &pattern, std::function< void(const std::vector< Token >)> callback) const
bool contains(const Scope sub) const
void foreach_scope(ScopeType type, std::function< void(Scope)> callback) const
void foreach_function(std::function< void(bool is_static, Token type, Token name, Scope args, bool is_const, Scope body)> callback) const
bool operator==(const Scope &other) const
std::string line_str() const
bool operator==(const Token &other) const
IndexRange index_range() const
size_t str_index_last() const
bool operator==(TokenType type) const
size_t str_index_last_no_whitespace() const
bool operator!=(TokenType type) const
bool operator!=(char type) const
size_t char_number() const
std::string str_with_whitespace() const
static Token from_position(const ParserData *data, int64_t index)
std::string str_exclusive() const
size_t str_index_start() const
bool operator!=(const Token &other) const
Token namespace_start() const
std::string_view str_view
bool operator==(char type) const
size_t line_number() const
std::string full_symbol_name() const
Scope first_containing_scope_of_type(const ScopeType type) const
Token next_not_whitespace() const
size_t line_start() const
Token find_next(TokenType type) const