Blender V5.0
csv_reader.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2024 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include <atomic>
10#include <charconv>
11#include <optional>
12#include <variant>
13
14#include "BLI_array_utils.hh"
15#include "fast_float.h"
16
18#include "BKE_attribute.hh"
19#include "BKE_pointcloud.hh"
20#include "BKE_report.hh"
21
22#include "BLI_csv_parse.hh"
23#include "BLI_fileops.hh"
25#include "BLI_vector.hh"
26
27#include "IO_csv.hh"
28
30
31struct ColumnInfo {
33 bool has_invalid_name = false;
34 std::atomic<bool> found_invalid = false;
35 std::atomic<bool> found_int = false;
36 std::atomic<bool> found_float = false;
37};
38
39using ColumnData = std::variant<std::monostate, Vector<float>, Vector<int>>;
40
45
50
53 bool found_invalid = false;
54 bool found_float = false;
55};
56
58 const int column_i)
59{
61 result.data.reserve(records.size());
62 for (const int row_i : records.index_range()) {
63 const Span<char> value_span = records.record(row_i).field(column_i);
64 const char *value_begin = value_span.begin();
65 const char *value_end = value_span.end();
66 /* Skip leading white-space and plus sign. */
67 while (value_begin < value_end && ELEM(*value_begin, ' ', '+')) {
68 value_begin++;
69 }
70 float value;
71 fast_float::from_chars_result res = fast_float::from_chars(value_begin, value_end, value);
72 if (res.ec != std::errc()) {
73 result.found_invalid = true;
74 return result;
75 }
76 if (res.ptr < value_end) {
77 /* Allow trailing white-space in the value. */
78 while (res.ptr < value_end && res.ptr[0] == ' ') {
79 res.ptr++;
80 }
81 if (res.ptr < value_end) {
82 result.found_invalid = true;
83 return result;
84 }
85 }
86 result.data.append(value);
87 }
88 return result;
89}
90
92 const int column_i)
93{
95 result.data.reserve(records.size());
96 for (const int row_i : records.index_range()) {
97 const Span<char> value_span = records.record(row_i).field(column_i);
98 const char *value_begin = value_span.begin();
99 const char *value_end = value_span.end();
100 /* Skip leading white-space and plus sign. */
101 while (value_begin < value_end && ELEM(*value_begin, ' ', '+')) {
102 value_begin++;
103 }
104 int value;
105 std::from_chars_result res = std::from_chars(value_begin, value_end, value);
106 if (res.ec != std::errc()) {
107 result.found_invalid = true;
108 return result;
109 }
110 if (res.ptr < value_end) {
111 /* If the next character after the value is a dot, it should be parsed again as float. */
112 if (res.ptr[0] == '.') {
113 result.found_float = true;
114 return result;
115 }
116 /* Allow trailing white-space in the value. */
117 while (res.ptr < value_end && res.ptr[0] == ' ') {
118 res.ptr++;
119 }
120 if (res.ptr < value_end) {
121 result.found_invalid = true;
122 return result;
123 }
124 }
125 result.data.append(value);
126 }
127 return result;
128}
129
131 MutableSpan<ColumnInfo> columns_info)
132{
133 const int columns_num = columns_info.size();
134 ChunkResult chunk_result;
135 chunk_result.rows_num = records.size();
136 chunk_result.columns.resize(columns_num);
137 for (const int column_i : IndexRange(columns_num)) {
138 ColumnInfo &column_info = columns_info[column_i];
139 if (column_info.has_invalid_name) {
140 /* Column can be ignored. */
141 continue;
142 }
143 if (column_info.found_invalid.load(std::memory_order_relaxed)) {
144 /* Invalid values have been found in this column already, skip it. */
145 continue;
146 }
147 /* A float was found in this column already, so parse everything as floats. */
148 const bool found_float = column_info.found_float.load(std::memory_order_relaxed);
149 if (found_float) {
150 ParseFloatColumnResult float_column_result = parse_column_as_floats(records, column_i);
151 if (float_column_result.found_invalid) {
152 column_info.found_invalid.store(true, std::memory_order_relaxed);
153 continue;
154 }
155 chunk_result.columns[column_i] = std::move(float_column_result.data);
156 continue;
157 }
158 /* No float was found so far in this column, so attempt to parse it as integers. */
159 ParseIntColumnResult int_column_result = parse_column_as_ints(records, column_i);
160 if (int_column_result.found_invalid) {
161 column_info.found_invalid.store(true, std::memory_order_relaxed);
162 continue;
163 }
164 if (!int_column_result.found_float) {
165 chunk_result.columns[column_i] = std::move(int_column_result.data);
166 column_info.found_int.store(true, std::memory_order_relaxed);
167 continue;
168 }
169 /* While parsing it as integers, floats were detected. So parse it as floats again. */
170 column_info.found_float.store(true, std::memory_order_relaxed);
171 ParseFloatColumnResult float_column_result = parse_column_as_floats(records, column_i);
172 if (float_column_result.found_invalid) {
173 column_info.found_invalid.store(true, std::memory_order_relaxed);
174 continue;
175 }
176 chunk_result.columns[column_i] = std::move(float_column_result.data);
177 }
178 return chunk_result;
179}
180
186 const Span<ColumnInfo> columns_info,
187 OffsetIndices<int> chunk_offsets,
189{
190 const int points_num = chunk_offsets.total_size();
191 Array<std::optional<GArray<>>> flattened_attributes(columns_info.size());
192
193 threading::parallel_for(columns_info.index_range(), 1, [&](const IndexRange columns_range) {
194 for (const int column_i : columns_range) {
195 const ColumnInfo &column_info = columns_info[column_i];
196 if (column_info.has_invalid_name || column_info.found_invalid) {
197 /* Column can be ignored. */
198 continue;
199 }
200 if (column_info.found_float) {
201 /* Should read column as floats. */
202 GArray<> attribute(CPPType::get<float>(), points_num);
203 float *attribute_buffer = static_cast<float *>(attribute.data());
204 threading::parallel_for(chunks.index_range(), 1, [&](const IndexRange chunks_range) {
205 for (const int chunk_i : chunks_range) {
206 const IndexRange dst_range = chunk_offsets[chunk_i];
207 ChunkResult &chunk = chunks[chunk_i];
208 ColumnData &column_data = chunk.columns[column_i];
209 if (const auto *float_vec = std::get_if<Vector<float>>(&column_data)) {
210 BLI_assert(float_vec->size() == dst_range.size());
211 uninitialized_copy_n(
212 float_vec->data(), dst_range.size(), attribute_buffer + dst_range.first());
213 }
214 else if (const auto *int_vec = std::get_if<Vector<int>>(&column_data)) {
215 /* This chunk was read entirely as integers, so it still has to be converted to
216 * floats. */
217 BLI_assert(int_vec->size() == dst_range.size());
218 uninitialized_convert_n(int_vec->data(), dst_range.size(), attribute_buffer);
219 }
220 else {
221 /* Expected data to be available, because the `found_invalid` flag was not
222 * set. */
223 BLI_assert_unreachable();
224 }
225 /* Free data for chunk. */
226 column_data = std::monostate{};
227 }
228 });
229 flattened_attributes[column_i] = std::move(attribute);
230 continue;
231 }
232 if (column_info.found_int) {
233 /* Should read column as ints. */
234 GArray<> attribute(CPPType::get<int>(), points_num);
235 int *attribute_buffer = static_cast<int *>(attribute.data());
236 threading::parallel_for(chunks.index_range(), 1, [&](const IndexRange chunks_range) {
237 for (const int chunk_i : chunks_range) {
238 const IndexRange dst_range = chunk_offsets[chunk_i];
239 ChunkResult &chunk = chunks[chunk_i];
240 ColumnData &column_data = chunk.columns[column_i];
241 if (const auto *int_vec = std::get_if<Vector<int>>(&column_data)) {
242 BLI_assert(int_vec->size() == dst_range.size());
243 uninitialized_copy_n(
244 int_vec->data(), dst_range.size(), attribute_buffer + dst_range.first());
245 }
246 else {
247 /* Expected data to be available, because the `found_invalid` and
248 * `found_float` flags were not set. */
249 BLI_assert_unreachable();
250 }
251 /* Free data for chunk. */
252 column_data = std::monostate{};
253 }
254 });
255 flattened_attributes[column_i] = std::move(attribute);
256 continue;
257 }
258 }
259 });
260 return flattened_attributes;
261}
262
264{
265 size_t buffer_len;
266 void *buffer = BLI_file_read_text_as_mem(import_params.filepath, 0, &buffer_len);
267 if (buffer == nullptr) {
268 BKE_reportf(import_params.reports,
269 RPT_ERROR,
270 "CSV Import: Cannot open file '%s'",
271 import_params.filepath);
272 return nullptr;
273 }
274 BLI_SCOPED_DEFER([&]() { MEM_freeN(buffer); });
275 if (buffer_len == 0) {
277 import_params.reports, RPT_ERROR, "CSV Import: empty file '%s'", import_params.filepath);
278 return nullptr;
279 }
280
281 LinearAllocator<> allocator;
282 Array<ColumnInfo> columns_info;
283 csv_parse::CsvParseOptions parse_options;
284 parse_options.delimiter = import_params.delimiter;
285
286 const auto parse_header = [&](const csv_parse::CsvRecord &record) {
287 columns_info.reinitialize(record.size());
288 for (const int i : record.index_range()) {
289 ColumnInfo &column_info = columns_info[i];
291 record.field_str(i), parse_options, allocator);
292 column_info.name = name;
295 {
296 column_info.has_invalid_name = true;
297 continue;
298 }
299 }
300 };
301 const auto parse_data_chunk = [&](const csv_parse::CsvRecords &records) {
302 return parse_records_chunk(records, columns_info);
303 };
304
305 const Span<char> buffer_span{static_cast<char *>(buffer), int64_t(buffer_len)};
306 std::optional<Vector<ChunkResult>> parsed_chunks = csv_parse::parse_csv_in_chunks<ChunkResult>(
307 buffer_span, parse_options, parse_header, parse_data_chunk);
308
309 if (!parsed_chunks.has_value()) {
310 BKE_reportf(import_params.reports,
311 RPT_ERROR,
312 "CSV import: failed to parse file '%s'",
313 import_params.filepath);
314 return nullptr;
315 }
316
317 /* Count the total number of records and compute the offset of each chunk which is used when
318 * flattening the parsed data. */
319 Vector<int> chunk_offsets_vec;
320 chunk_offsets_vec.append(0);
321 for (const ChunkResult &chunk : *parsed_chunks) {
322 chunk_offsets_vec.append(chunk_offsets_vec.last() + chunk.rows_num);
323 }
324 const OffsetIndices<int> chunk_offsets(chunk_offsets_vec);
325 const int points_num = chunk_offsets_vec.last();
326
327 PointCloud *pointcloud = BKE_pointcloud_new_nomain(points_num);
328
329 Array<std::optional<GArray<>>> flattened_attributes;
330 threading::memory_bandwidth_bound_task(points_num * 16, [&]() {
332 [&]() {
334 pointcloud->positions_for_write());
335 },
336 [&]() {
337 flattened_attributes = flatten_valid_attribute_chunks(
338 columns_info, chunk_offsets, *parsed_chunks);
339 });
340 });
341
342 /* Add all valid attributes to the pointcloud. */
343 bke::MutableAttributeAccessor attributes = pointcloud->attributes_for_write();
344 for (const int column_i : columns_info.index_range()) {
345 std::optional<GArray<>> &attribute = flattened_attributes[column_i];
346 if (!attribute.has_value()) {
347 continue;
348 }
349 const auto *data = new ImplicitSharedValue<GArray<>>(std::move(*attribute));
350 const bke::AttrType type = bke::cpp_type_to_attribute_type(attribute->type());
351 const ColumnInfo &column_info = columns_info[column_i];
352 attributes.add(column_info.name,
354 type,
355 bke::AttributeInitShared{data->data.data(), *data});
356 data->remove_user_and_delete_if_last();
357 }
358
359 /* Since all positions are set to zero, the bounding box can be updated eagerly to avoid
360 * computing it later. */
361 pointcloud->runtime->bounds_cache.ensure([](Bounds<float3> &r_bounds) {
362 r_bounds.min = float3(0);
363 r_bounds.max = float3(0);
364 });
365
366 return pointcloud;
367}
368
369} // namespace blender::io::csv
General operations for point clouds.
PointCloud * BKE_pointcloud_new_nomain(int totpoint)
void BKE_reportf(ReportList *reports, eReportType type, const char *format,...) ATTR_PRINTF_FORMAT(3
@ RPT_ERROR
Definition BKE_report.hh:39
void * BLI_file_read_text_as_mem(const char *filepath, size_t pad_bytes, size_t *r_size)
Definition storage.cc:511
File and directory operations.
#define BLI_SCOPED_DEFER(function_to_defer)
#define ELEM(...)
BMesh const char void * data
long long int int64_t
void resize(const int64_t new_size)
IndexRange index_range() const
Definition BLI_array.hh:360
void reinitialize(const int64_t new_size)
Definition BLI_array.hh:419
constexpr int64_t size() const
Definition BLI_span.hh:493
constexpr int64_t size() const
Definition BLI_span.hh:252
constexpr const T * end() const
Definition BLI_span.hh:224
constexpr IndexRange index_range() const
Definition BLI_span.hh:401
constexpr const T * begin() const
Definition BLI_span.hh:220
static VArray from_single(T value, const int64_t size)
void append(const T &value)
const T & last(const int64_t n=0) const
Span< char > field(const int64_t index) const
CsvRecord record(const int64_t index) const
VecBase< float, 3 > float3
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
void copy(const GVArray &src, GMutableSpan dst, int64_t grain_size=4096)
bool attribute_name_is_anonymous(const StringRef name)
bool allow_procedural_attribute_access(StringRef attribute_name)
AttrType cpp_type_to_attribute_type(const CPPType &type)
std::optional< Vector< Any<> > > parse_csv_in_chunks(const Span< char > buffer, const CsvParseOptions &options, FunctionRef< void(const CsvRecord &record)> process_header, FunctionRef< Any<>(const CsvRecords &records)> process_records)
Definition csv_parse.cc:91
StringRef unescape_field(const StringRef str, const CsvParseOptions &options, LinearAllocator<> &allocator)
Definition csv_parse.cc:169
static ParseIntColumnResult parse_column_as_ints(const csv_parse::CsvRecords &records, const int column_i)
Definition csv_reader.cc:91
std::variant< std::monostate, Vector< float >, Vector< int > > ColumnData
Definition csv_reader.cc:39
PointCloud * import_csv_as_pointcloud(const CSVImportParams &import_params)
static ParseFloatColumnResult parse_column_as_floats(const csv_parse::CsvRecords &records, const int column_i)
Definition csv_reader.cc:57
static ChunkResult parse_records_chunk(const csv_parse::CsvRecords &records, MutableSpan< ColumnInfo > columns_info)
static Array< std::optional< GArray<> > > flatten_valid_attribute_chunks(const Span< ColumnInfo > columns_info, OffsetIndices< int > chunk_offsets, MutableSpan< ChunkResult > chunks)
void parallel_invoke(Functions &&...functions)
Definition BLI_task.hh:221
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:265
const char * name
PointCloudRuntimeHandle * runtime
Vector< ColumnData > columns
Definition csv_reader.cc:43
std::atomic< bool > found_float
Definition csv_reader.cc:36
std::atomic< bool > found_int
Definition csv_reader.cc:35
std::atomic< bool > found_invalid
Definition csv_reader.cc:34
i
Definition text_draw.cc:230