Blender V5.0
BLI_csv_parse_test.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include "testing/testing.h"
6
7#include "BLI_csv_parse.hh"
8#include "BLI_string_ref.hh"
9
11
12static std::optional<int64_t> find_end_of_simple_field(const StringRef buffer,
13 const int64_t start,
14 const char delimiter = ',')
15{
16 return detail::find_end_of_simple_field(Span<char>(buffer), start, delimiter);
17}
18
19static std::optional<int64_t> find_end_of_quoted_field(
20 const StringRef buffer,
21 const int64_t start,
22 const char quote = '"',
23 const Span<char> escape_chars = Span<char>(StringRef("\"\\")))
24{
25 return detail::find_end_of_quoted_field(Span<char>(buffer), start, quote, escape_chars);
26}
27
28static std::optional<Vector<std::string>> parse_record_fields(
29 const StringRef buffer,
30 const int64_t start = 0,
31 const char delimiter = ',',
32 const char quote = '"',
33 const Span<char> quote_escape_chars = Span<char>{'"', '\\'})
34{
35 Vector<Span<char>> fields;
36 const std::optional<int64_t> end_of_record = detail::parse_record_fields(
37 Span<char>(buffer), start, delimiter, quote, quote_escape_chars, fields);
38 if (!end_of_record.has_value()) {
39 return std::nullopt;
40 }
42 for (const Span<char> field : fields) {
43 result.append(std::string(field.begin(), field.end()));
44 }
45 return result;
46}
47
53
55{
56 struct Chunk {
58 };
59
61 const std::optional<Vector<Chunk>> chunks = parse_csv_in_chunks<Chunk>(
63 options,
64 [&](const CsvRecord &record) {
65 for (const int64_t i : record.index_range()) {
66 result.column_names.append(record.field_str(i));
67 }
68 },
69 [&](const CsvRecords &records) {
70 Chunk result;
71 for (const int64_t record_i : records.index_range()) {
72 const CsvRecord record = records.record(record_i);
74 for (const int64_t column_i : record.index_range()) {
75 fields.append(record.field_str(column_i));
76 }
77 result.fields.append(std::move(fields));
78 }
79 return result;
80 });
81 if (!chunks.has_value()) {
82 result.success = false;
83 return result;
84 }
85 result.success = true;
86 for (const Chunk &chunk : *chunks) {
87 result.records.extend(std::move(chunk.fields));
88 }
89 return result;
90}
91
92TEST(csv_parse, FindEndOfSimpleField)
93{
100 EXPECT_EQ(find_end_of_simple_field("123,456", 0), 3);
101 EXPECT_EQ(find_end_of_simple_field("123,456,789", 0), 3);
105 EXPECT_EQ(find_end_of_simple_field("12\n", 0), 2);
106 EXPECT_EQ(find_end_of_simple_field("0,12\n", 0), 1);
107 EXPECT_EQ(find_end_of_simple_field("0,12\n", 2), 4);
108 EXPECT_EQ(find_end_of_simple_field("\r\n", 0), 0);
109 EXPECT_EQ(find_end_of_simple_field("12\r\n", 0), 2);
110 EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 0), 1);
111 EXPECT_EQ(find_end_of_simple_field("0,12\r\n", 2), 4);
112 EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2), 5);
113 EXPECT_EQ(find_end_of_simple_field("0,\t12\r\n", 2, '\t'), 2);
114}
115
116TEST(csv_parse, FindEndOfQuotedField)
117{
118 EXPECT_EQ(find_end_of_quoted_field("", 0), std::nullopt);
119 EXPECT_EQ(find_end_of_quoted_field("123", 0), std::nullopt);
120 EXPECT_EQ(find_end_of_quoted_field("123\n", 0), std::nullopt);
121 EXPECT_EQ(find_end_of_quoted_field("123\r\n", 0), std::nullopt);
122 EXPECT_EQ(find_end_of_quoted_field("123\"", 0), 3);
124 EXPECT_EQ(find_end_of_quoted_field("\"\"", 0), std::nullopt);
125 EXPECT_EQ(find_end_of_quoted_field("\"\"\"", 0), 2);
126 EXPECT_EQ(find_end_of_quoted_field("123\"\"", 0), std::nullopt);
127 EXPECT_EQ(find_end_of_quoted_field("123\"\"\"", 0), 5);
128 EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"", 0), std::nullopt);
129 EXPECT_EQ(find_end_of_quoted_field("123\"\"\"\"\"", 0), 7);
130 EXPECT_EQ(find_end_of_quoted_field("123\"\"0\"\"\"", 0), 8);
131 EXPECT_EQ(find_end_of_quoted_field(",", 0), std::nullopt);
133 EXPECT_EQ(find_end_of_quoted_field("0,1\"", 0), 3);
134 EXPECT_EQ(find_end_of_quoted_field("0,1\n", 0), std::nullopt);
135 EXPECT_EQ(find_end_of_quoted_field("0,1\"\"", 0), std::nullopt);
136 EXPECT_EQ(find_end_of_quoted_field("0,1\"\"\"", 0), 5);
137 EXPECT_EQ(find_end_of_quoted_field("0\n1\n\"", 0), 4);
138 EXPECT_EQ(find_end_of_quoted_field("\n\"", 0), 1);
139 EXPECT_EQ(find_end_of_quoted_field("\\\"", 0), std::nullopt);
140 EXPECT_EQ(find_end_of_quoted_field("\\\"\"", 0), 2);
141 EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"", 0), std::nullopt);
142 EXPECT_EQ(find_end_of_quoted_field("\\\"\"\"\"", 0), 4);
143}
144
145TEST(csv_parse, ParseRecordFields)
146{
147 using StrVec = Vector<std::string>;
148 EXPECT_EQ(parse_record_fields(""), StrVec());
149 EXPECT_EQ(parse_record_fields("1"), StrVec{"1"});
150 EXPECT_EQ(parse_record_fields("1,2"), StrVec({"1", "2"}));
151 EXPECT_EQ(parse_record_fields("1,2,3"), StrVec({"1", "2", "3"}));
152 EXPECT_EQ(parse_record_fields("1\n,2,3"), StrVec({"1"}));
153 EXPECT_EQ(parse_record_fields("1, 2\n,3"), StrVec({"1", " 2"}));
154 EXPECT_EQ(parse_record_fields("1, 2\r\n,3"), StrVec({"1", " 2"}));
155 EXPECT_EQ(parse_record_fields("\"1,2,3\""), StrVec({"1,2,3"}));
156 EXPECT_EQ(parse_record_fields("\"1,2,3"), std::nullopt);
157 EXPECT_EQ(parse_record_fields("\"1,\n2\t\r\n,3\""), StrVec({"1,\n2\t\r\n,3"}));
158 EXPECT_EQ(parse_record_fields("\"1,2,3\",\"4,5\""), StrVec({"1,2,3", "4,5"}));
159 EXPECT_EQ(parse_record_fields(","), StrVec({"", ""}));
160 EXPECT_EQ(parse_record_fields(",,"), StrVec({"", "", ""}));
161 EXPECT_EQ(parse_record_fields(",,\n"), StrVec({"", "", ""}));
162 EXPECT_EQ(parse_record_fields("\r\n,,"), StrVec());
163 EXPECT_EQ(parse_record_fields("\"a\"\"b\""), StrVec({"a\"\"b"}));
164 EXPECT_EQ(parse_record_fields("\"a\\\"b\""), StrVec({"a\\\"b"}));
165 EXPECT_EQ(parse_record_fields("\"a\"\nb"), StrVec({"a"}));
166 EXPECT_EQ(parse_record_fields("\"a\" \nb"), StrVec({"a"}));
167}
168
169TEST(csv_parse, ParseCsvBasic)
170{
172 options.chunk_size_bytes = 1;
173 StrParseResult result = parse_csv_fields("a,b,c\n1,2,3,4\n4\n77,88,99\n", options);
174
175 EXPECT_TRUE(result.success);
176
177 EXPECT_EQ(result.column_names.size(), 3);
178 EXPECT_EQ(result.column_names[0], "a");
179 EXPECT_EQ(result.column_names[1], "b");
180 EXPECT_EQ(result.column_names[2], "c");
181
182 EXPECT_EQ(result.records.size(), 3);
183 EXPECT_EQ(result.records[0].size(), 4);
184 EXPECT_EQ(result.records[1].size(), 1);
185 EXPECT_EQ(result.records[2].size(), 3);
186
187 EXPECT_EQ(result.records[0][0], "1");
188 EXPECT_EQ(result.records[0][1], "2");
189 EXPECT_EQ(result.records[0][2], "3");
190 EXPECT_EQ(result.records[0][3], "4");
191
192 EXPECT_EQ(result.records[1][0], "4");
193
194 EXPECT_EQ(result.records[2][0], "77");
195 EXPECT_EQ(result.records[2][1], "88");
196 EXPECT_EQ(result.records[2][2], "99");
197}
198
199TEST(csv_parse, ParseCsvMissingEnd)
200{
202 options.chunk_size_bytes = 1;
204 EXPECT_FALSE(result.success);
205}
206
207TEST(csv_parse, ParseCsvMultiLine)
208{
210 options.chunk_size_bytes = 1;
211 StrParseResult result = parse_csv_fields("a,b,c\n1,\"2\n\n\",3,4", options);
212 EXPECT_TRUE(result.success);
213 EXPECT_EQ(result.records.size(), 1);
214 EXPECT_EQ(result.records[0].size(), 4);
215 EXPECT_EQ(result.records[0][0], "1");
216 EXPECT_EQ(result.records[0][1], "2\n\n");
217 EXPECT_EQ(result.records[0][2], "3");
218 EXPECT_EQ(result.records[0][3], "4");
219}
220
221TEST(csv_parse, ParseCsvEmpty)
222{
224 options.chunk_size_bytes = 1;
226 EXPECT_TRUE(result.success);
227 EXPECT_EQ(result.column_names.size(), 0);
228 EXPECT_EQ(result.records.size(), 0);
229}
230
231TEST(csv_parse, ParseCsvTitlesOnly)
232{
234 options.chunk_size_bytes = 1;
236 EXPECT_TRUE(result.success);
237 EXPECT_EQ(result.column_names.size(), 3);
238 EXPECT_EQ(result.column_names[0], "a");
239 EXPECT_EQ(result.column_names[1], "b");
240 EXPECT_EQ(result.column_names[2], "c");
241 EXPECT_TRUE(result.records.is_empty());
242}
243
244TEST(csv_parse, ParseCsvTrailingNewline)
245{
247 options.chunk_size_bytes = 1;
249 EXPECT_TRUE(result.success);
250 EXPECT_EQ(result.column_names.size(), 1);
251 EXPECT_EQ(result.column_names[0], "a");
252 EXPECT_EQ(result.records.size(), 2);
253 EXPECT_EQ(result.records[0].size(), 1);
254 EXPECT_EQ(result.records[0][0], "1");
255 EXPECT_EQ(result.records[1].size(), 1);
256 EXPECT_EQ(result.records[1][0], "2");
257}
258
259TEST(csv_parse, UnescapeField)
260{
261 LinearAllocator<> allocator;
263 EXPECT_EQ(unescape_field("", options, allocator), "");
264 EXPECT_EQ(unescape_field("a", options, allocator), "a");
265 EXPECT_EQ(unescape_field("abcd", options, allocator), "abcd");
266 EXPECT_EQ(unescape_field("ab\\cd", options, allocator), "ab\\cd");
267 EXPECT_EQ(unescape_field("ab\\\"cd", options, allocator), "ab\"cd");
268 EXPECT_EQ(unescape_field("ab\"\"cd", options, allocator), "ab\"cd");
269 EXPECT_EQ(unescape_field("ab\"\"\"\"cd", options, allocator), "ab\"\"cd");
270 EXPECT_EQ(unescape_field("ab\"\"\\\"cd", options, allocator), "ab\"\"cd");
271}
272
273} // namespace blender::csv_parse::tests
EXPECT_EQ(BLI_expr_pylike_eval(expr, nullptr, 0, &result), EXPR_PYLIKE_INVALID)
long long int int64_t
void append(const T &value)
StringRef field_str(const int64_t index) const
CCL_NAMESPACE_BEGIN struct Options options
#define str(s)
int64_t find_end_of_simple_field(Span< char > buffer, int64_t start, char delimiter)
Definition csv_parse.cc:284
std::optional< int64_t > parse_record_fields(const Span< char > buffer, const int64_t start, const char delimiter, const char quote, const Span< char > quote_escape_chars, Vector< Span< char > > &r_fields)
Definition csv_parse.cc:199
std::optional< int64_t > find_end_of_quoted_field(Span< char > buffer, int64_t start, char quote, Span< char > escape_chars)
Definition csv_parse.cc:299
static std::optional< Vector< std::string > > parse_record_fields(const StringRef buffer, const int64_t start=0, const char delimiter=',', const char quote='"', const Span<char> quote_escape_chars = Span<char>{'"', '\\'})
static std::optional< int64_t > find_end_of_quoted_field(const StringRef buffer, const int64_t start, const char quote='"', const Span<char> escape_chars = Span<char>(StringRef("\"\\")))
static StrParseResult parse_csv_fields(const StringRef str, const CsvParseOptions &options)
TEST(csv_parse, FindEndOfSimpleField)
static std::optional< int64_t > find_end_of_simple_field(const StringRef buffer, const int64_t start, const char delimiter=',')
std::optional< Vector< Any<> > > parse_csv_in_chunks(const Span< char > buffer, const CsvParseOptions &options, FunctionRef< void(const CsvRecord &record)> process_header, FunctionRef< Any<>(const CsvRecords &records)> process_records)
Definition csv_parse.cc:91
StringRef unescape_field(const StringRef str, const CsvParseOptions &options, LinearAllocator<> &allocator)
Definition csv_parse.cc:169
Vector< Vector< std::string > > records
i
Definition text_draw.cc:230