Blender V5.0
id_hash.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <fcntl.h>
6#ifndef WIN32
7# include <unistd.h>
8#else
9# include <io.h>
10#endif
11#include <fmt/format.h>
12#include <mutex>
13#include <xxhash.h>
14
15#include "BKE_id_hash.hh"
16#include "BKE_lib_id.hh"
17#include "BKE_lib_query.hh"
18#include "BKE_library.hh"
19#include "BKE_main.hh"
20
21#include "BLI_fileops.hh"
22#include "BLI_mmap.h"
23#include "BLI_mutex.hh"
24#include "BLI_set.hh"
25
26namespace blender::bke::id_hash {
27
28static std::optional<Vector<char>> read_file(const StringRefNull path)
29{
30 blender::fstream stream{path.c_str(), std::ios_base::in | std::ios_base::binary};
31 stream.seekg(0, std::ios_base::end);
32 const int64_t size = stream.tellg();
33 stream.seekg(0, std::ios_base::beg);
34
36 stream.read(buffer.data(), size);
37 if (stream.bad()) {
38 return std::nullopt;
39 }
40
41 return buffer;
42}
43
44static std::optional<XXH128_hash_t> compute_file_hash_with_file_read(const StringRefNull path)
45{
46 const std::optional<Vector<char>> buffer = read_file(path);
47 if (!buffer) {
48 return std::nullopt;
49 }
50 return XXH3_128bits(buffer->data(), buffer->size());
51}
52
53static std::optional<XXH128_hash_t> compute_file_hash_with_memory_map(const StringRefNull path)
54{
55 const int file = BLI_open(path.c_str(), O_BINARY | O_RDONLY, 0);
56 if (file == -1) {
57 return std::nullopt;
58 }
59 BLI_SCOPED_DEFER([&]() { close(file); });
60
61 BLI_mmap_file *mmap_file = BLI_mmap_open(file);
62 if (!mmap_file) {
63 return std::nullopt;
64 }
65 BLI_SCOPED_DEFER([&]() { BLI_mmap_free(mmap_file); });
66 const size_t size = BLI_mmap_get_length(mmap_file);
67 const void *data = BLI_mmap_get_pointer(mmap_file);
68 const XXH128_hash_t hash = XXH3_128bits(data, size);
69 if (BLI_mmap_any_io_error(mmap_file)) {
70 return std::nullopt;
71 }
72 return hash;
73}
74
75static std::optional<XXH128_hash_t> compute_file_hash(const StringRefNull path)
76{
77 /* First try the memory map the file, because it avoids an extra copy. */
78 if (const std::optional<XXH128_hash_t> hash = compute_file_hash_with_memory_map(path)) {
79 /* Make sure both code paths are tested even if memory mapping should almost always work. */
80 BLI_assert(hash->low64 == compute_file_hash_with_file_read(path)->low64);
81 return hash;
82 }
83 if (const std::optional<XXH128_hash_t> hash = compute_file_hash_with_file_read(path)) {
84 return hash;
85 }
86 return std::nullopt;
87}
88
91 XXH128_hash_t hash;
92};
93
94static std::optional<XXH128_hash_t> get_source_file_hash(const ID &id, DeepHashErrors &r_errors)
95{
97 static Mutex mutex;
98
99 const StringRefNull path = id.lib->runtime->filepath_abs;
100
101 BLI_stat_t stat;
102 if (BLI_stat(path.c_str(), &stat) == -1) {
103 r_errors.missing_files.add_as(path);
104 return std::nullopt;
105 }
106
107 std::lock_guard lock(mutex);
108 if (const CachedFileHash *cached_hash = cache.lookup_ptr_as(path)) {
109 if (cached_hash->last_modified == stat.st_mtime) {
110 return cached_hash->hash;
111 }
112 }
113
114 /* The modification time may not be set if the data-block is added as linked data as part of
115 * versioning (e.g. in #do_versions_after_setup). */
116 if (id.runtime->src_blend_modifification_time != 0) {
117 if (stat.st_mtime != id.runtime->src_blend_modifification_time) {
118 r_errors.updated_files.add_as(path);
119 return std::nullopt;
120 }
121 }
122
123 if (const std::optional<XXH128_hash_t> hash = compute_file_hash(path)) {
124 cache.add_overwrite(path, CachedFileHash{stat.st_mtime, *hash});
125 return hash;
126 }
127 r_errors.missing_files.add_as(path);
128 return std::nullopt;
129}
130
131static std::optional<XXH128_hash_t> get_id_shallow_hash(const ID &id, DeepHashErrors &r_errors)
132{
134 const StringRefNull id_name = id.name;
135 const std::optional<XXH128_hash_t> file_hash = get_source_file_hash(id, r_errors);
136 if (!file_hash) {
137 return std::nullopt;
138 }
139
140 XXH3_state_t *hash_state = XXH3_createState();
141 XXH3_128bits_reset(hash_state);
142 XXH3_128bits_update(hash_state, id_name.data(), id_name.size());
143 XXH3_128bits_update(hash_state, &*file_hash, sizeof(XXH128_hash_t));
144 XXH128_hash_t shallow_hash = XXH3_128bits_digest(hash_state);
145 XXH3_freeState(hash_state);
146 return shallow_hash;
147}
148
149static void compute_deep_hash_recursive(const Main &bmain,
150 const ID &id,
151 Set<const ID *> &current_stack,
152 Map<const ID *, IDHash> &r_hashes,
153 DeepHashErrors &r_errors)
154{
155 if (r_hashes.contains(&id)) {
156 return;
157 }
158 if (!id.deep_hash.is_null()) {
159 r_hashes.add(&id, id.deep_hash);
160 return;
161 }
162 current_stack.add(&id);
163 BLI_SCOPED_DEFER([&]() -> void { current_stack.remove(&id); });
164 const std::optional<XXH128_hash_t> id_shallow_hash = get_id_shallow_hash(id, r_errors);
165 if (!id_shallow_hash) {
166 return;
167 }
168
169 XXH3_state_t *hash_state = XXH3_createState();
170 BLI_SCOPED_DEFER([&hash_state]() -> void { XXH3_freeState(hash_state); })
171 XXH3_128bits_reset(hash_state);
172 XXH3_128bits_update(hash_state, &*id_shallow_hash, sizeof(XXH128_hash_t));
173
174 bool success = true;
176 const_cast<Main *>(&bmain),
177 const_cast<ID *>(&id),
178 [&](LibraryIDLinkCallbackData *cb_data) {
179 if (cb_data->cb_flag & IDWALK_CB_LOOPBACK) {
180 /* Loopback pointer (e.g. from a shapekey to its owner geometry ID, or from a collection
181 * to its parents) should always be ignored, as they do not represent an actual
182 * dependency. The dependency relationship should already have been processed from the
183 * owner to its dependency anyway (if applicable). */
184 return IDWALK_RET_NOP;
185 }
187 /* Embedded data are part of their owner's internal data, and as such already computed as
188 * part of the owner's shallow hash. */
189 return IDWALK_RET_NOP;
190 }
191 if (cb_data->cb_flag & IDWALK_CB_HASH_IGNORE) {
192 /* This pointer is explicitly ignored for the hash computation. */
193 return IDWALK_RET_NOP;
194 }
195 ID *referenced_id = *cb_data->id_pointer;
196 if (!referenced_id) {
197 /* Need to update the hash even if there is no id. There is a difference between the case
198 * where there is no id and the case where this callback is not called at all.*/
199 const int random_data = 452942579;
200 XXH3_128bits_update(hash_state, &random_data, sizeof(int));
201 return IDWALK_RET_NOP;
202 }
203 /* All embedded ID usages should already have been excluded above. */
204 BLI_assert((referenced_id->flag & ID_FLAG_EMBEDDED_DATA) == 0);
205 if (current_stack.contains(referenced_id)) {
206 /* Somehow encode that we had a circular reference here. */
207 const int random_data = 234632342;
208 XXH3_128bits_update(hash_state, &random_data, sizeof(int));
209 return IDWALK_RET_NOP;
210 }
211 compute_deep_hash_recursive(bmain, *referenced_id, current_stack, r_hashes, r_errors);
212 const IDHash *referenced_id_hash = r_hashes.lookup_ptr(referenced_id);
213 if (!referenced_id_hash) {
214 success = false;
216 }
217 XXH3_128bits_update(hash_state, referenced_id_hash->data, sizeof(IDHash));
218 return IDWALK_RET_NOP;
219 },
220 nullptr,
222
223 if (!success) {
224 return;
225 }
226 IDHash new_deep_hash;
227 const XXH128_hash_t new_deep_hash_xxh128 = XXH3_128bits_digest(hash_state);
228 static_assert(sizeof(IDHash) == sizeof(XXH128_hash_t));
229 memcpy(new_deep_hash.data, &new_deep_hash_xxh128, sizeof(IDHash));
230 r_hashes.add(&id, new_deep_hash);
231}
232
234{
235#ifndef NDEBUG
236 for (const ID *id : ids) {
238 }
239#endif
240
241 if (ids.is_empty()) {
242 return ValidDeepHashes{};
243 }
244
246 Set<const ID *> current_stack;
247 DeepHashErrors errors;
248 for (const ID *id : ids) {
249 compute_deep_hash_recursive(bmain, *id, current_stack, hashes, errors);
250 }
251 if (!errors.missing_files.is_empty() || !errors.updated_files.is_empty()) {
252 return errors;
253 }
254 return ValidDeepHashes{hashes};
255}
256
257std::string id_hash_to_hex(const IDHash &hash)
258{
259 std::string hex_str;
260 for (const uint8_t byte : hash.data) {
261 hex_str += fmt::format("{:02x}", byte);
262 }
263 return hex_str;
264}
265
266} // namespace blender::bke::id_hash
@ IDWALK_RET_STOP_ITER
@ IDWALK_RET_NOP
@ IDWALK_CB_LOOPBACK
@ IDWALK_CB_EMBEDDED_NOT_OWNING
@ IDWALK_CB_EMBEDDED
@ IDWALK_CB_HASH_IGNORE
void BKE_library_foreach_ID_link(Main *bmain, ID *id, blender::FunctionRef< LibraryIDLinkCallback > callback, void *user_data, LibraryForeachIDFlag flag)
Definition lib_query.cc:431
@ IDWALK_READONLY
#define BLI_assert(a)
Definition BLI_assert.h:46
#define O_BINARY
int BLI_stat(const char *path, BLI_stat_t *buffer) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
struct stat BLI_stat_t
int BLI_open(const char *filepath, int oflag, int pmode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
File and directory operations.
#define BLI_SCOPED_DEFER(function_to_defer)
void * BLI_mmap_get_pointer(BLI_mmap_file *file) ATTR_WARN_UNUSED_RESULT
Definition BLI_mmap.cc:472
void BLI_mmap_free(BLI_mmap_file *file) ATTR_NONNULL(1)
Definition BLI_mmap.cc:487
bool BLI_mmap_any_io_error(const BLI_mmap_file *file) ATTR_WARN_UNUSED_RESULT
Definition BLI_mmap.cc:482
BLI_mmap_file * BLI_mmap_open(int fd) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition BLI_mmap.cc:367
size_t BLI_mmap_get_length(const BLI_mmap_file *file) ATTR_WARN_UNUSED_RESULT
Definition BLI_mmap.cc:477
#define ID_IS_LINKED(_id)
Definition DNA_ID.h:694
@ ID_FLAG_EMBEDDED_DATA
Definition DNA_ID.h:774
volatile int lock
BMesh const char void * data
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
bool add_as(ForwardKey &&key)
bool is_empty() const
const Value * lookup_ptr(const Key &key) const
Definition BLI_map.hh:508
bool add_overwrite(const Key &key, const Value &value)
Definition BLI_map.hh:325
bool add(const Key &key, const Value &value)
Definition BLI_map.hh:295
const Value * lookup_ptr_as(const ForwardKey &key) const
Definition BLI_map.hh:516
bool contains(const Key &key) const
Definition BLI_map.hh:353
bool contains(const Key &key) const
Definition BLI_set.hh:310
bool add(const Key &key)
Definition BLI_set.hh:248
bool remove(const Key &key)
Definition BLI_set.hh:385
constexpr bool is_empty() const
Definition BLI_span.hh:260
constexpr int64_t size() const
constexpr const char * data() const
constexpr const char * c_str() const
ThreadMutex mutex
static std::optional< XXH128_hash_t > get_source_file_hash(const ID &id, DeepHashErrors &r_errors)
Definition id_hash.cc:94
static std::optional< XXH128_hash_t > compute_file_hash_with_file_read(const StringRefNull path)
Definition id_hash.cc:44
static void compute_deep_hash_recursive(const Main &bmain, const ID &id, Set< const ID * > &current_stack, Map< const ID *, IDHash > &r_hashes, DeepHashErrors &r_errors)
Definition id_hash.cc:149
static std::optional< XXH128_hash_t > compute_file_hash(const StringRefNull path)
Definition id_hash.cc:75
static std::optional< XXH128_hash_t > get_id_shallow_hash(const ID &id, DeepHashErrors &r_errors)
Definition id_hash.cc:131
static std::optional< XXH128_hash_t > compute_file_hash_with_memory_map(const StringRefNull path)
Definition id_hash.cc:53
std::variant< ValidDeepHashes, DeepHashErrors > IDHashResult
static std::optional< Vector< char > > read_file(const StringRefNull path)
Definition id_hash.cc:28
IDHashResult compute_linked_id_deep_hashes(const Main &bmain, Span< const ID * > root_ids)
Definition id_hash.cc:233
std::string id_hash_to_hex(const IDHash &hash)
Definition id_hash.cc:257
std::mutex Mutex
Definition BLI_mutex.hh:47
#define hash
Definition noise_c.cc:154
char data[16]
Definition DNA_ID.h:384
Definition DNA_ID.h:414
short flag
Definition DNA_ID.h:438
LibraryForeachIDCallbackFlag cb_flag
VectorSet< std::string > updated_files
VectorSet< std::string > missing_files