Blender V4.3
keyframe_selection.cc
Go to the documentation of this file.
1// Copyright (c) 2012 libmv authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to
5// deal in the Software without restriction, including without limitation the
6// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7// sell copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19// IN THE SOFTWARE.
20
22
23#include "ceres/ceres.h"
30
31#include <Eigen/Eigenvalues>
32
33namespace libmv {
34namespace {
35
36Mat3 IntrinsicsNormalizationMatrix(const CameraIntrinsics& intrinsics) {
37 Mat3 T = Mat3::Identity(), S = Mat3::Identity();
38
39 T(0, 2) = -intrinsics.principal_point_x();
40 T(1, 2) = -intrinsics.principal_point_y();
41
42 S(0, 0) /= intrinsics.focal_length_x();
43 S(1, 1) /= intrinsics.focal_length_y();
44
45 return S * T;
46}
47
48// P.H.S. Torr
49// Geometric Motion Segmentation and Model Selection
50//
51// http://reference.kfupm.edu.sa/content/g/e/geometric_motion_segmentation_and_model__126445.pdf
52//
53// d is the number of dimensions modeled
54// (d = 3 for a fundamental matrix or 2 for a homography)
55// k is the number of degrees of freedom in the model
56// (k = 7 for a fundamental matrix or 8 for a homography)
57// r is the dimension of the data
58// (r = 4 for 2D correspondences between two frames)
59double GRIC(const Vec& e, int d, int k, int r) {
60 int n = e.rows();
61 double lambda1 = log(static_cast<double>(r));
62 double lambda2 = log(static_cast<double>(r * n));
63
64 // lambda3 limits the residual error, and this paper
65 // http://elvera.nue.tu-berlin.de/files/0990Knorr2006.pdf
66 // suggests using lambda3 of 2
67 // same value is used in Torr's Problem of degeneracy in structure
68 // and motion recovery from uncalibrated image sequences
69 // http://www.robots.ox.ac.uk/~vgg/publications/papers/torr99.ps.gz
70 double lambda3 = 2.0;
71
72 // Variance of tracker position. Physically, this is typically about 0.1px,
73 // and when squared becomes 0.01 px^2.
74 double sigma2 = 0.01;
75
76 // Finally, calculate the GRIC score.
77 double gric = 0.0;
78 for (int i = 0; i < n; i++) {
79 gric += std::min(e(i) * e(i) / sigma2, lambda3 * (r - d));
80 }
81 gric += lambda1 * d * n;
82 gric += lambda2 * k;
83 return gric;
84}
85
86// Compute a generalized inverse using eigen value decomposition, clamping the
87// smallest eigenvalues if requested. This is needed to compute the variance of
88// reconstructed 3D points.
89//
90// TODO(keir): Consider moving this into the numeric code, since this is not
91// related to keyframe selection.
92Mat PseudoInverseWithClampedEigenvalues(const Mat& matrix,
93 int num_eigenvalues_to_clamp) {
94 Eigen::EigenSolver<Mat> eigen_solver(matrix);
95 Mat D = eigen_solver.pseudoEigenvalueMatrix();
96 Mat V = eigen_solver.pseudoEigenvectors();
97
98 // Clamp too-small singular values to zero to prevent numeric blowup.
99 double epsilon = std::numeric_limits<double>::epsilon();
100 for (int i = 0; i < D.cols(); ++i) {
101 if (D(i, i) > epsilon) {
102 D(i, i) = 1.0 / D(i, i);
103 } else {
104 D(i, i) = 0.0;
105 }
106 }
107
108 // Apply the clamp.
109 for (int i = D.cols() - num_eigenvalues_to_clamp; i < D.cols(); ++i) {
110 D(i, i) = 0.0;
111 }
112 return V * D * V.inverse();
113}
114
115void FilterZeroWeightMarkersFromTracks(const Tracks& tracks,
116 Tracks* filtered_tracks) {
117 vector<Marker> all_markers = tracks.AllMarkers();
118
119 for (int i = 0; i < all_markers.size(); ++i) {
120 Marker& marker = all_markers[i];
121 if (marker.weight != 0.0) {
122 filtered_tracks->Insert(
123 marker.image, marker.track, marker.x, marker.y, marker.weight);
124 }
125 }
126}
127
128} // namespace
129
131 const CameraIntrinsics& intrinsics,
132 vector<int>& keyframes) {
133 // Mirza Tahir Ahmed, Matthew N. Dailey
134 // Robust key frame extraction for 3D reconstruction from video streams
135 //
136 // http://www.cs.ait.ac.th/~mdailey/papers/Tahir-KeyFrame.pdf
137
138 Tracks filtered_tracks;
139 FilterZeroWeightMarkersFromTracks(_tracks, &filtered_tracks);
140
141 int max_image = filtered_tracks.MaxImage();
142 int next_keyframe = 1;
143
144 // Limit correspondence ratio from both sides.
145 // On the one hand if number of correspondent features is too low,
146 // triangulation will suffer.
147 // On the other hand high correspondence likely means short baseline.
148 // which also will affect om accuracy
149 const double Tmin = 0.8;
150 const double Tmax = 1.0;
151
152 Mat3 N = IntrinsicsNormalizationMatrix(intrinsics);
153 Mat3 N_inverse = N.inverse();
154
155 double Sc_best = std::numeric_limits<double>::max();
156 double success_intersects_factor_best = 0.0f;
157
158 while (next_keyframe != -1) {
159 int current_keyframe = next_keyframe;
160 double Sc_best_candidate = std::numeric_limits<double>::max();
161
162 LG << "Found keyframe " << next_keyframe;
163
164 next_keyframe = -1;
165
166 for (int candidate_image = current_keyframe + 1;
167 candidate_image <= max_image;
168 candidate_image++) {
169 // Conjunction of all markers from both keyframes
170 vector<Marker> all_markers = filtered_tracks.MarkersInBothImages(
171 current_keyframe, candidate_image);
172
173 // Match keypoints between frames current_keyframe and candidate_image
174 vector<Marker> tracked_markers =
175 filtered_tracks.MarkersForTracksInBothImages(current_keyframe,
176 candidate_image);
177
178 // Correspondences in normalized space
179 Mat x1, x2;
180 CoordinatesForMarkersInImage(tracked_markers, current_keyframe, &x1);
181 CoordinatesForMarkersInImage(tracked_markers, candidate_image, &x2);
182
183 LG << "Found " << x1.cols() << " correspondences between "
184 << current_keyframe << " and " << candidate_image;
185
186 // Not enough points to construct fundamental matrix
187 if (x1.cols() < 8 || x2.cols() < 8) {
188 continue;
189 }
190
191 // STEP 1: Correspondence ratio constraint
192 int Tc = tracked_markers.size();
193 int Tf = all_markers.size();
194 double Rc = static_cast<double>(Tc) / Tf;
195
196 LG << "Correspondence between " << current_keyframe << " and "
197 << candidate_image << ": " << Rc;
198
199 if (Rc < Tmin || Rc > Tmax) {
200 continue;
201 }
202
203 Mat3 H, F;
204
205 // Estimate homography using default options.
206 EstimateHomographyOptions estimate_homography_options;
208 x1, x2, estimate_homography_options, &H);
209
210 // Convert homography to original pixel space.
211 H = N_inverse * H * N;
212
213 EstimateFundamentalOptions estimate_fundamental_options;
215 x1, x2, estimate_fundamental_options, &F);
216
217 // Convert fundamental to original pixel space.
218 F = N_inverse * F * N;
219
220 // TODO(sergey): STEP 2: Discard outlier matches
221
222 // STEP 3: Geometric Robust Information Criteria
223
224 // Compute error values for homography and fundamental matrices
225 Vec H_e, F_e;
226 H_e.resize(x1.cols());
227 F_e.resize(x1.cols());
228 for (int i = 0; i < x1.cols(); i++) {
229 Vec2 current_x1, current_x2;
230
231 intrinsics.NormalizedToImageSpace(
232 x1(0, i), x1(1, i), &current_x1(0), &current_x1(1));
233
234 intrinsics.NormalizedToImageSpace(
235 x2(0, i), x2(1, i), &current_x2(0), &current_x2(1));
236
237 H_e(i) = SymmetricGeometricDistance(H, current_x1, current_x2);
238 F_e(i) = SymmetricEpipolarDistance(F, current_x1, current_x2);
239 }
240
241 LG << "H_e: " << H_e.transpose();
242 LG << "F_e: " << F_e.transpose();
243
244 // Degeneracy constraint
245 double GRIC_H = GRIC(H_e, 2, 8, 4);
246 double GRIC_F = GRIC(F_e, 3, 7, 4);
247
248 LG << "GRIC values for frames " << current_keyframe << " and "
249 << candidate_image << ", H-GRIC: " << GRIC_H << ", F-GRIC: " << GRIC_F;
250
251 if (GRIC_H <= GRIC_F) {
252 continue;
253 }
254
255 // TODO(sergey): STEP 4: PELC criterion
256
257 // STEP 5: Estimation of reconstruction error
258 //
259 // Uses paper Keyframe Selection for Camera Motion and Structure
260 // Estimation from Multiple Views
261 // Uses ftp://ftp.tnt.uni-hannover.de/pub/papers/2004/ECCV2004-TTHBAW.pdf
262 // Basically, equation (15)
263 //
264 // TODO(sergey): separate all the constraints into functions,
265 // this one is getting to much cluttered already
266
267 // Definitions in equation (15):
268 // - I is the number of 3D feature points
269 // - A is the number of essential parameters of one camera
270
272
273 // The F matrix should be an E matrix, but squash it just to be sure
274
275 // Reconstruction should happen using normalized fundamental matrix
276 Mat3 F_normal = N * F * N_inverse;
277
278 Mat3 E;
279 FundamentalToEssential(F_normal, &E);
280
281 // Recover motion between the two images. Since this function assumes a
282 // calibrated camera, use the identity for K
283 Mat3 R;
284 Vec3 t;
285 Mat3 K = Mat3::Identity();
286
288 E, K, x1.col(0), K, x2.col(0), &R, &t)) {
289 LG << "Failed to compute R and t from E and K";
290 continue;
291 }
292
293 LG << "Camera transform between frames " << current_keyframe << " and "
294 << candidate_image << ":\nR:\n"
295 << R << "\nt:" << t.transpose();
296
297 // First camera is identity, second one is relative to it
299 current_keyframe, Mat3::Identity(), Vec3::Zero());
300 reconstruction.InsertCamera(candidate_image, R, t);
301
302 // Reconstruct 3D points
303 int intersects_total = 0, intersects_success = 0;
304 for (int i = 0; i < tracked_markers.size(); i++) {
305 if (!reconstruction.PointForTrack(tracked_markers[i].track)) {
306 vector<Marker> reconstructed_markers;
307
308 int track = tracked_markers[i].track;
309
310 reconstructed_markers.push_back(tracked_markers[i]);
311
312 // We know there're always only two markers for a track
313 // Also, we're using brute-force search because we don't
314 // actually know about markers layout in a list, but
315 // at this moment this cycle will run just once, which
316 // is not so big deal
317
318 for (int j = i + 1; j < tracked_markers.size(); j++) {
319 if (tracked_markers[j].track == track) {
320 reconstructed_markers.push_back(tracked_markers[j]);
321 break;
322 }
323 }
324
325 intersects_total++;
326
327 if (EuclideanIntersect(reconstructed_markers, &reconstruction)) {
328 LG << "Ran Intersect() for track " << track;
329 intersects_success++;
330 } else {
331 LG << "Filed to intersect track " << track;
332 }
333 }
334 }
335
336 double success_intersects_factor =
337 double(intersects_success) / intersects_total;
338
339 if (success_intersects_factor < success_intersects_factor_best) {
340 LG << "Skip keyframe candidate because of "
341 "lower successful intersections ratio";
342
343 continue;
344 }
345
346 success_intersects_factor_best = success_intersects_factor;
347
348 Tracks two_frames_tracks(tracked_markers);
349 PolynomialCameraIntrinsics empty_intrinsics;
350 BundleEvaluation evaluation;
351 evaluation.evaluate_jacobian = true;
352
353 EuclideanBundleCommonIntrinsics(two_frames_tracks,
357 &empty_intrinsics,
358 &evaluation);
359
360 Mat& jacobian = evaluation.jacobian;
361
362 Mat JT_J = jacobian.transpose() * jacobian;
363 // There are 7 degrees of freedom, so clamp them out.
364 Mat JT_J_inv = PseudoInverseWithClampedEigenvalues(JT_J, 7);
365
366 Mat temp_derived = JT_J * JT_J_inv * JT_J;
367 bool is_inversed = (temp_derived - JT_J).cwiseAbs2().sum() <
368 1e-4 * std::min(temp_derived.cwiseAbs2().sum(),
369 JT_J.cwiseAbs2().sum());
370
371 LG << "Check on inversed: " << (is_inversed ? "true" : "false")
372 << ", det(JT_J): " << JT_J.determinant();
373
374 if (!is_inversed) {
375 LG << "Ignoring candidature due to poor jacobian stability";
376 continue;
377 }
378
379 Mat Sigma_P;
380 Sigma_P = JT_J_inv.bottomRightCorner(evaluation.num_points * 3,
381 evaluation.num_points * 3);
382
383 int I = evaluation.num_points;
384 int A = 12;
385
386 double Sc = static_cast<double>(I + A) / Square(3 * I) * Sigma_P.trace();
387
388 LG << "Expected estimation error between " << current_keyframe << " and "
389 << candidate_image << ": " << Sc;
390
391 // Pairing with a lower Sc indicates a better choice
392 if (Sc > Sc_best_candidate) {
393 continue;
394 }
395
396 Sc_best_candidate = Sc;
397
398 next_keyframe = candidate_image;
399 }
400
401 // This is a bit arbitrary and main reason of having this is to deal
402 // better with situations when there's no keyframes were found for
403 // current keyframe this could happen when there's no so much parallax
404 // in the beginning of image sequence and then most of features are
405 // getting occluded. In this case there could be good keyframe pair in
406 // the middle of the sequence
407 //
408 // However, it's just quick hack and smarter way to do this would be nice
409 if (next_keyframe == -1) {
410 next_keyframe = current_keyframe + 10;
411
412 if (next_keyframe >= max_image) {
413 break;
414 }
415
416 LG << "Starting searching for keyframes starting from " << next_keyframe;
417 } else {
418 // New pair's expected reconstruction error is lower
419 // than existing pair's one.
420 //
421 // For now let's store just one candidate, easy to
422 // store more candidates but needs some thoughts
423 // how to choose best one automatically from them
424 // (or allow user to choose pair manually).
425 if (Sc_best > Sc_best_candidate) {
426 keyframes.clear();
427 keyframes.push_back(current_keyframe);
428 keyframes.push_back(next_keyframe);
429 Sc_best = Sc_best_candidate;
430 }
431 }
432 }
433}
434
435} // namespace libmv
#define D
typedef double(DMatrix)[4][4]
#define K(key)
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
#define A
void NormalizedToImageSpace(double normalized_x, double normalized_y, double *image_x, double *image_y) const
ProjectivePoint * PointForTrack(int track)
Returns a pointer to the point corresponding to track.
vector< Marker > MarkersInBothImages(int image1, int image2) const
Returns all the markers visible in image1 and image2.
vector< Marker > MarkersForTracksInBothImages(int image1, int image2) const
int MaxImage() const
Returns the maximum image identifier used.
float[3][3] Mat3
Definition gpu_matrix.cc:28
const ProjectiveReconstruction & reconstruction
Definition intersect.cc:198
#define LG
#define Square(a, x, y)
ccl_device_inline float3 log(float3 v)
#define N
#define T
#define F
#define R
#define H(x, y, z)
double epsilon
default precision while comparing with Equal(..,..) functions. Initialized at 0.0000001.
Definition utility.cpp:22
Eigen::VectorXd Vec
Definition numeric.h:61
@ BUNDLE_NO_CONSTRAINTS
Definition bundle.h:117
bool EstimateFundamentalFromCorrespondences(const Mat &x1, const Mat &x2, const EstimateFundamentalOptions &options, Mat3 *F)
@ BUNDLE_NO_INTRINSICS
Definition bundle.h:100
Eigen::Matrix< double, 3, 3 > Mat3
Definition numeric.h:72
Eigen::Vector2d Vec2
Definition numeric.h:105
void SelectKeyframesBasedOnGRICAndVariance(const Tracks &_tracks, const CameraIntrinsics &intrinsics, vector< int > &keyframes)
Eigen::MatrixXd Mat
Definition numeric.h:60
bool MotionFromEssentialAndCorrespondence(const Mat3 &E, const Mat3 &K1, const Vec2 &x1, const Mat3 &K2, const Vec2 &x2, Mat3 *R, Vec3 *t)
bool EuclideanIntersect(const vector< Marker > &markers, EuclideanReconstruction *reconstruction)
Definition intersect.cc:69
bool EstimateHomography2DFromCorrespondences(const Mat &x1, const Mat &x2, const EstimateHomographyOptions &options, Mat3 *H)
Eigen::Vector3d Vec3
Definition numeric.h:106
void EuclideanBundleCommonIntrinsics(const Tracks &tracks, const int bundle_intrinsics, const int bundle_constraints, EuclideanReconstruction *reconstruction, CameraIntrinsics *intrinsics, BundleEvaluation *evaluation)
Definition bundle.cc:661
double SymmetricEpipolarDistance(const Mat &F, const Vec2 &x1, const Vec2 &x2)
void CoordinatesForMarkersInImage(const vector< Marker > &markers, int image, Mat *coordinates)
void FundamentalToEssential(const Mat3 &F, Mat3 *E)
double SymmetricGeometricDistance(const Mat3 &H, const Vec2 &x1, const Vec2 &x2)
#define I
CCL_NAMESPACE_BEGIN struct Window V