Blender V4.3
libmv/tracking/track_region.cc
Go to the documentation of this file.
1// Copyright (c) 2012 libmv authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to
5// deal in the Software without restriction, including without limitation the
6// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7// sell copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19// IN THE SOFTWARE.
20//
21// Author: mierle@gmail.com (Keir Mierle)
22//
23// TODO(keir): While this tracking code works rather well, it has some
24// outragous inefficiencies. There is probably a 5-10x speedup to be had if a
25// smart coder went through the TODO's and made the suggested performance
26// enhancements.
27
29
30#include <Eigen/QR>
31#include <Eigen/SVD>
32#include <iostream>
33#include "ceres/ceres.h"
35#include "libmv/image/image.h"
36#include "libmv/image/sample.h"
40
41// Expand the Jet functionality of Ceres to allow mixed numeric/autodiff.
42//
43// TODO(keir): Push this (or something similar) into upstream Ceres.
44namespace ceres {
45
46// A jet traits class to make it easier to work with mixed auto / numeric diff.
47template <typename T>
48struct JetOps {
49 static bool IsScalar() { return true; }
50 static T GetScalar(const T& t) { return t; }
51 static void SetScalar(const T& scalar, T* t) { *t = scalar; }
52 static void ScaleDerivative(double scale_by, T* value) {
53 // For double, there is no derivative to scale.
54 (void)scale_by; // Ignored.
55 (void)value; // Ignored.
56 }
57};
58
59template <typename T, int N>
60struct JetOps<Jet<T, N>> {
61 static bool IsScalar() { return false; }
62 static T GetScalar(const Jet<T, N>& t) { return t.a; }
63 static void SetScalar(const T& scalar, Jet<T, N>* t) { t->a = scalar; }
64 static void ScaleDerivative(double scale_by, Jet<T, N>* value) {
65 value->v *= scale_by;
66 }
67};
68
69template <typename FunctionType, int kNumArgs, typename ArgumentType>
70struct Chain {
71 static ArgumentType Rule(const FunctionType& f,
72 const FunctionType dfdx[kNumArgs],
73 const ArgumentType x[kNumArgs]) {
74 // In the default case of scalars, there's nothing to do since there are no
75 // derivatives to propagate.
76 (void)dfdx; // Ignored.
77 (void)x; // Ignored.
78 return f;
79 }
80};
81
82// XXX Add documentation here!
83template <typename FunctionType, int kNumArgs, typename T, int N>
84struct Chain<FunctionType, kNumArgs, Jet<T, N>> {
85 static Jet<T, N> Rule(const FunctionType& f,
86 const FunctionType dfdx[kNumArgs],
87 const Jet<T, N> x[kNumArgs]) {
88 // x is itself a function of another variable ("z"); what this function
89 // needs to return is "f", but with the derivative with respect to z
90 // attached to the jet. So combine the derivative part of x's jets to form
91 // a Jacobian matrix between x and z (i.e. dx/dz).
92 Eigen::Matrix<T, kNumArgs, N> dxdz;
93 for (int i = 0; i < kNumArgs; ++i) {
94 dxdz.row(i) = x[i].v.transpose();
95 }
96
97 // Map the input gradient dfdx into an Eigen row vector.
98 Eigen::Map<const Eigen::Matrix<FunctionType, 1, kNumArgs>> vector_dfdx(
99 dfdx, 1, kNumArgs);
100
101 // Now apply the chain rule to obtain df/dz. Combine the derivative with
102 // the scalar part to obtain f with full derivative information.
103 Jet<T, N> jet_f;
104 jet_f.a = f;
105 jet_f.v = vector_dfdx.template cast<T>() * dxdz; // Also known as dfdz.
106 return jet_f;
107 }
108};
109
110} // namespace ceres
111
112namespace libmv {
113
114using ceres::Chain;
115using ceres::Jet;
116using ceres::JetOps;
117
119 : direction(FORWARD),
120 mode(TRANSLATION),
121 minimum_correlation(0),
122 max_iterations(20),
123 use_esm(true),
124 use_brute_initialization(true),
125 use_normalized_intensities(false),
126 sigma(0.9),
127 num_extra_points(0),
128 regularization_coefficient(0.0),
129 minimum_corner_shift_tolerance_pixels(0.005),
130 image1_mask(NULL) {
131}
132
133namespace {
134
135// TODO(keir): Consider adding padding.
136template <typename T>
137bool InBounds(const FloatImage& image, const T& x, const T& y) {
138 return 0.0 <= x && x < image.Width() && 0.0 <= y && y < image.Height();
139}
140
141bool AllInBounds(const FloatImage& image, const double* x, const double* y) {
142 for (int i = 0; i < 4; ++i) {
143 if (!InBounds(image, x[i], y[i])) {
144 return false;
145 }
146 }
147 return true;
148}
149
150// Sample the image at position (x, y) but use the gradient, if present, to
151// propagate derivatives from x and y. This is needed to integrate the numeric
152// image gradients with Ceres's autodiff framework.
153template <typename T>
154static T SampleWithDerivative(const FloatImage& image_and_gradient,
155 const T& x,
156 const T& y) {
157 float scalar_x = JetOps<T>::GetScalar(x);
158 float scalar_y = JetOps<T>::GetScalar(y);
159
160 // Note that sample[1] and sample[2] will be uninitialized in the scalar
161 // case, but that is not an issue because the Chain::Rule below will not read
162 // the uninitialized values.
163 float sample[3];
164 if (JetOps<T>::IsScalar()) {
165 // For the scalar case, only sample the image.
166 sample[0] = SampleLinear(image_and_gradient, scalar_y, scalar_x, 0);
167 } else {
168 // For the derivative case, sample the gradient as well.
169 SampleLinear(image_and_gradient, scalar_y, scalar_x, sample);
170 }
171 T xy[2] = {x, y};
172 return Chain<float, 2, T>::Rule(sample[0], sample + 1, xy);
173}
174
175template <typename Warp>
176class TerminationCheckingCallback : public ceres::IterationCallback {
177 public:
178 TerminationCheckingCallback(const TrackRegionOptions& options,
179 const FloatImage& image2,
180 const Warp& warp,
181 const double* x1,
182 const double* y1)
183 : options_(options),
184 image2_(image2),
185 warp_(warp),
186 x1_(x1),
187 y1_(y1),
188 have_last_successful_step_(false) {}
189
190 virtual ceres::CallbackReturnType operator()(
191 const ceres::IterationSummary& summary) {
192 // If the step wasn't successful, there's nothing to do.
193 if (!summary.step_is_successful) {
194 return ceres::SOLVER_CONTINUE;
195 }
196 // Warp the original 4 points with the current warp into image2.
197 double x2[4];
198 double y2[4];
199 for (int i = 0; i < 4; ++i) {
200 warp_.Forward(warp_.parameters, x1_[i], y1_[i], x2 + i, y2 + i);
201 }
202 // Ensure the corners are all in bounds.
203 if (!AllInBounds(image2_, x2, y2)) {
204 LG << "Successful step fell outside of the pattern bounds; aborting.";
205 return ceres::SOLVER_ABORT;
206 }
207
208 // Ensure the minimizer is making large enough shifts to bother continuing.
209 // Ideally, this check would happen on the parameters themselves which
210 // Ceres supports directly; however, the mapping from parameter change
211 // magnitude to corner movement in pixels is not a simple norm. Hence, the
212 // need for a stateful callback which tracks the last successful set of
213 // parameters (and the position of the projected patch corners).
214 if (have_last_successful_step_) {
215 // Compute the maximum shift of any corner in pixels since the last
216 // successful iteration.
217 double max_change_pixels = 0;
218 for (int i = 0; i < 4; ++i) {
219 double dx = x2[i] - x2_last_successful_[i];
220 double dy = y2[i] - y2_last_successful_[i];
221 double change_pixels = dx * dx + dy * dy;
222 if (change_pixels > max_change_pixels) {
223 max_change_pixels = change_pixels;
224 }
225 }
226 max_change_pixels = sqrt(max_change_pixels);
227 LG << "Max patch corner shift is " << max_change_pixels;
228
229 // Bail if the shift is too small.
230 if (max_change_pixels < options_.minimum_corner_shift_tolerance_pixels) {
231 LG << "Max patch corner shift is " << max_change_pixels
232 << " from the last iteration; returning success.";
233 return ceres::SOLVER_TERMINATE_SUCCESSFULLY;
234 }
235 }
236
237 // Save the projected corners for checking on the next successful iteration.
238 for (int i = 0; i < 4; ++i) {
239 x2_last_successful_[i] = x2[i];
240 y2_last_successful_[i] = y2[i];
241 }
242 have_last_successful_step_ = true;
243 return ceres::SOLVER_CONTINUE;
244 }
245
246 private:
247 const TrackRegionOptions& options_;
248 const FloatImage& image2_;
249 const Warp& warp_;
250 const double* x1_;
251 const double* y1_;
252
253 bool have_last_successful_step_;
254 double x2_last_successful_[4];
255 double y2_last_successful_[4];
256};
257
258template <typename Warp>
259class PixelDifferenceCostFunctor {
260 public:
261 PixelDifferenceCostFunctor(const TrackRegionOptions& options,
262 const FloatImage& image_and_gradient1,
263 const FloatImage& image_and_gradient2,
264 const Mat3& canonical_to_image1,
265 int num_samples_x,
266 int num_samples_y,
267 const Warp& warp)
268 : options_(options),
269 image_and_gradient1_(image_and_gradient1),
270 image_and_gradient2_(image_and_gradient2),
271 canonical_to_image1_(canonical_to_image1),
272 num_samples_x_(num_samples_x),
273 num_samples_y_(num_samples_y),
274 warp_(warp),
275 pattern_and_gradient_(num_samples_y_, num_samples_x_, 3),
276 pattern_positions_(num_samples_y_, num_samples_x_, 2),
277 pattern_mask_(num_samples_y_, num_samples_x_, 1) {
278 ComputeCanonicalPatchAndNormalizer();
279 }
280
281 void ComputeCanonicalPatchAndNormalizer() {
282 src_mean_ = 0.0;
283 double num_samples = 0.0;
284 for (int r = 0; r < num_samples_y_; ++r) {
285 for (int c = 0; c < num_samples_x_; ++c) {
286 // Compute the position; cache it.
287 Vec3 image_position = canonical_to_image1_ * Vec3(c, r, 1);
288 image_position /= image_position(2);
289 pattern_positions_(r, c, 0) = image_position(0);
290 pattern_positions_(r, c, 1) = image_position(1);
291
292 // Sample the pattern and gradients.
293 SampleLinear(image_and_gradient1_,
294 image_position(1), // SampleLinear is r, c.
295 image_position(0),
296 &pattern_and_gradient_(r, c, 0));
297
298 // Sample sample the mask.
299 double mask_value = 1.0;
300 if (options_.image1_mask != NULL) {
301 SampleLinear(*options_.image1_mask,
302 image_position(1), // SampleLinear is r, c.
303 image_position(0),
304 &pattern_mask_(r, c, 0));
305 mask_value = pattern_mask_(r, c);
306 }
307 src_mean_ += pattern_and_gradient_(r, c, 0) * mask_value;
308 num_samples += mask_value;
309 }
310 }
311 src_mean_ /= num_samples;
312 }
313
314 template <typename T>
315 bool operator()(const T* warp_parameters, T* residuals) const {
316 if (options_.image1_mask != NULL) {
317 VLOG(2) << "Using a mask.";
318 }
319 for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
320 VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i];
321 }
322
323 T dst_mean = T(1.0);
324 if (options_.use_normalized_intensities) {
325 ComputeNormalizingCoefficient(warp_parameters, &dst_mean);
326 }
327
328 int cursor = 0;
329 for (int r = 0; r < num_samples_y_; ++r) {
330 for (int c = 0; c < num_samples_x_; ++c) {
331 // Use the pre-computed image1 position.
332 Vec2 image1_position(pattern_positions_(r, c, 0),
333 pattern_positions_(r, c, 1));
334
335 // Sample the mask early; if it's zero, this pixel has no effect. This
336 // allows early bailout from the expensive sampling that happens below.
337 //
338 // Note that partial masks are not short circuited. To see why short
339 // circuiting produces bitwise-exact same results, consider that the
340 // residual for each pixel is
341 //
342 // residual = mask * (src - dst) ,
343 //
344 // and for jets, multiplying by a scalar multiplies the derivative
345 // components by the scalar as well. Therefore, if the mask is exactly
346 // zero, then so too will the final residual and derivatives.
347 double mask_value = 1.0;
348 if (options_.image1_mask != NULL) {
349 mask_value = pattern_mask_(r, c);
350 if (mask_value == 0.0) {
351 residuals[cursor++] = T(0.0);
352 continue;
353 }
354 }
355
356 // Compute the location of the destination pixel.
357 T image2_position[2];
358 warp_.Forward(warp_parameters,
359 T(image1_position[0]),
360 T(image1_position[1]),
361 &image2_position[0],
362 &image2_position[1]);
363
364 // Sample the destination, propagating derivatives.
365 T dst_sample = SampleWithDerivative(
366 image_and_gradient2_, image2_position[0], image2_position[1]);
367
368 // Sample the source. This is made complicated by ESM mode.
369 T src_sample;
370 if (options_.use_esm && !JetOps<T>::IsScalar()) {
371 // In ESM mode, the derivative of the source is also taken into
372 // account. This changes the linearization in a way that causes
373 // better convergence. Copy the derivative of the warp parameters
374 // onto the jets for the image1 position. This is the ESM hack.
375 T image1_position_jet[2] = {
376 image2_position[0], // Order is x, y. This matches the
377 image2_position[1] // derivative order in the patch.
378 };
379 JetOps<T>::SetScalar(image1_position[0], image1_position_jet + 0);
380 JetOps<T>::SetScalar(image1_position[1], image1_position_jet + 1);
381
382 // Now that the image1 positions have the jets applied from the
383 // image2 position (the ESM hack), chain the image gradients to
384 // obtain a sample with the derivative with respect to the warp
385 // parameters attached.
386 src_sample = Chain<float, 2, T>::Rule(pattern_and_gradient_(r, c),
387 &pattern_and_gradient_(r, c, 1),
388 image1_position_jet);
389
390 // The jacobians for these should be averaged. Due to the subtraction
391 // below, flip the sign of the src derivative so that the effect
392 // after subtraction of the jets is that they are averaged.
393 JetOps<T>::ScaleDerivative(-0.5, &src_sample);
394 JetOps<T>::ScaleDerivative(0.5, &dst_sample);
395 } else {
396 // This is the traditional, forward-mode KLT solution.
397 src_sample = T(pattern_and_gradient_(r, c));
398 }
399
400 // Normalize the samples by the mean values of each signal. The typical
401 // light model assumes multiplicative intensity changes with changing
402 // light, so this is a reasonable choice. Note that dst_mean has
403 // derivative information attached thanks to autodiff.
404 if (options_.use_normalized_intensities) {
405 src_sample /= T(src_mean_);
406 dst_sample /= dst_mean;
407 }
408
409 // The difference is the error.
410 T error = src_sample - dst_sample;
411
412 // Weight the error by the mask, if one is present.
413 if (options_.image1_mask != NULL) {
414 error *= T(mask_value);
415 }
416 residuals[cursor++] = error;
417 }
418 }
419 return true;
420 }
421
422 // For normalized matching, the average and
423 template <typename T>
424 void ComputeNormalizingCoefficient(const T* warp_parameters,
425 T* dst_mean) const {
426 *dst_mean = T(0.0);
427 double num_samples = 0.0;
428 for (int r = 0; r < num_samples_y_; ++r) {
429 for (int c = 0; c < num_samples_x_; ++c) {
430 // Use the pre-computed image1 position.
431 Vec2 image1_position(pattern_positions_(r, c, 0),
432 pattern_positions_(r, c, 1));
433
434 // Sample the mask early; if it's zero, this pixel has no effect. This
435 // allows early bailout from the expensive sampling that happens below.
436 double mask_value = 1.0;
437 if (options_.image1_mask != NULL) {
438 mask_value = pattern_mask_(r, c);
439 if (mask_value == 0.0) {
440 continue;
441 }
442 }
443
444 // Compute the location of the destination pixel.
445 T image2_position[2];
446 warp_.Forward(warp_parameters,
447 T(image1_position[0]),
448 T(image1_position[1]),
449 &image2_position[0],
450 &image2_position[1]);
451
452 // Sample the destination, propagating derivatives.
453 // TODO(keir): This accumulation can, surprisingly, be done as a
454 // pre-pass by using integral images. This is complicated by the need
455 // to store the jets in the integral image, but it is possible.
456 T dst_sample = SampleWithDerivative(
457 image_and_gradient2_, image2_position[0], image2_position[1]);
458
459 // Weight the sample by the mask, if one is present.
460 if (options_.image1_mask != NULL) {
461 dst_sample *= T(mask_value);
462 }
463
464 *dst_mean += dst_sample;
465 num_samples += mask_value;
466 }
467 }
468 *dst_mean /= T(num_samples);
469 LG << "Normalization for dst:" << *dst_mean;
470 }
471
472 // TODO(keir): Consider also computing the cost here.
473 double PearsonProductMomentCorrelationCoefficient(
474 const double* warp_parameters) const {
475 for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
476 VLOG(2) << "Correlation warp_parameters[" << i
477 << "]: " << warp_parameters[i];
478 }
479
480 // The single-pass PMCC computation is somewhat numerically unstable, but
481 // it's sufficient for the tracker.
482 double sX = 0, sY = 0, sXX = 0, sYY = 0, sXY = 0;
483
484 // Due to masking, it's important to account for fractional samples.
485 // For example, samples with a 50% mask are counted as a half sample.
486 double num_samples = 0;
487
488 for (int r = 0; r < num_samples_y_; ++r) {
489 for (int c = 0; c < num_samples_x_; ++c) {
490 // Use the pre-computed image1 position.
491 Vec2 image1_position(pattern_positions_(r, c, 0),
492 pattern_positions_(r, c, 1));
493
494 double mask_value = 1.0;
495 if (options_.image1_mask != NULL) {
496 mask_value = pattern_mask_(r, c);
497 if (mask_value == 0.0) {
498 continue;
499 }
500 }
501
502 // Compute the location of the destination pixel.
503 double image2_position[2];
504 warp_.Forward(warp_parameters,
505 image1_position[0],
506 image1_position[1],
507 &image2_position[0],
508 &image2_position[1]);
509
510 double x = pattern_and_gradient_(r, c);
511 double y = SampleLinear(image_and_gradient2_,
512 image2_position[1], // SampleLinear is r, c.
513 image2_position[0]);
514
515 // Weight the signals by the mask, if one is present.
516 if (options_.image1_mask != NULL) {
517 x *= mask_value;
518 y *= mask_value;
519 num_samples += mask_value;
520 } else {
521 num_samples++;
522 }
523 sX += x;
524 sY += y;
525 sXX += x * x;
526 sYY += y * y;
527 sXY += x * y;
528 }
529 }
530 // Normalize.
531 sX /= num_samples;
532 sY /= num_samples;
533 sXX /= num_samples;
534 sYY /= num_samples;
535 sXY /= num_samples;
536
537 double var_x = sXX - sX * sX;
538 double var_y = sYY - sY * sY;
539 double covariance_xy = sXY - sX * sY;
540
541 double correlation = covariance_xy / sqrt(var_x * var_y);
542 LG << "Covariance xy: " << covariance_xy << ", var 1: " << var_x
543 << ", var 2: " << var_y << ", correlation: " << correlation;
544 return correlation;
545 }
546
547 private:
548 const TrackRegionOptions& options_;
549 const FloatImage& image_and_gradient1_;
550 const FloatImage& image_and_gradient2_;
551 const Mat3& canonical_to_image1_;
552 int num_samples_x_;
553 int num_samples_y_;
554 const Warp& warp_;
555 double src_mean_;
556 FloatImage pattern_and_gradient_;
557
558 // This contains the position from where the cached pattern samples were
559 // taken from. This is also used to warp from src to dest without going from
560 // canonical pixels to src first.
561 FloatImage pattern_positions_;
562
563 FloatImage pattern_mask_;
564};
565
566template <typename Warp>
567class WarpRegularizingCostFunctor {
568 public:
569 WarpRegularizingCostFunctor(const TrackRegionOptions& options,
570 const double* x1,
571 const double* y1,
572 const double* x2_original,
573 const double* y2_original,
574 const Warp& warp)
575 : options_(options),
576 x1_(x1),
577 y1_(y1),
578 x2_original_(x2_original),
579 y2_original_(y2_original),
580 warp_(warp) {
581 // Compute the centroid of the first guess quad.
582 // TODO(keir): Use Quad class here.
583 original_centroid_[0] = 0.0;
584 original_centroid_[1] = 0.0;
585 for (int i = 0; i < 4; ++i) {
586 original_centroid_[0] += x2_original[i];
587 original_centroid_[1] += y2_original[i];
588 }
589 original_centroid_[0] /= 4;
590 original_centroid_[1] /= 4;
591 }
592
593 template <typename T>
594 bool operator()(const T* warp_parameters, T* residuals) const {
595 T dst_centroid[2] = {T(0.0), T(0.0)};
596 for (int i = 0; i < 4; ++i) {
597 T image1_position[2] = {T(x1_[i]), T(y1_[i])};
598 T image2_position[2];
599 warp_.Forward(warp_parameters,
600 T(x1_[i]),
601 T(y1_[i]),
602 &image2_position[0],
603 &image2_position[1]);
604
605 // Subtract the positions. Note that this ignores the centroids.
606 residuals[2 * i + 0] = image2_position[0] - image1_position[0];
607 residuals[2 * i + 1] = image2_position[1] - image1_position[1];
608
609 // Accumulate the dst centroid.
610 dst_centroid[0] += image2_position[0];
611 dst_centroid[1] += image2_position[1];
612 }
613 dst_centroid[0] /= T(4.0);
614 dst_centroid[1] /= T(4.0);
615
616 // Adjust for the centroids.
617 for (int i = 0; i < 4; ++i) {
618 residuals[2 * i + 0] += T(original_centroid_[0]) - dst_centroid[0];
619 residuals[2 * i + 1] += T(original_centroid_[1]) - dst_centroid[1];
620 }
621
622 // Reweight the residuals.
623 for (int i = 0; i < 8; ++i) {
624 residuals[i] *= T(options_.regularization_coefficient);
625 }
626
627 return true;
628 }
629
630 const TrackRegionOptions& options_;
631 const double* x1_;
632 const double* y1_;
633 const double* x2_original_;
634 const double* y2_original_;
636 const Warp& warp_;
637};
638
639// Compute the warp from rectangular coordinates, where one corner is the
640// origin, and the opposite corner is at (num_samples_x, num_samples_y).
641Mat3 ComputeCanonicalHomography(const double* x1,
642 const double* y1,
643 int num_samples_x,
644 int num_samples_y) {
645 Mat canonical(2, 4);
646 canonical << 0, num_samples_x, num_samples_x, 0, 0, 0, num_samples_y,
647 num_samples_y;
648
649 Mat xy1(2, 4);
650 // clang-format off
651 xy1 << x1[0], x1[1], x1[2], x1[3],
652 y1[0], y1[1], y1[2], y1[3];
653 // clang-format om
654
655 Mat3 H;
656 if (!Homography2DFromCorrespondencesLinear(canonical, xy1, &H, 1e-12)) {
657 LG << "Couldn't construct homography.";
658 }
659 return H;
660}
661
662class Quad {
663 public:
664 Quad(const double* x, const double* y) : x_(x), y_(y) {
665 // Compute the centroid and store it.
666 centroid_ = Vec2(0.0, 0.0);
667 for (int i = 0; i < 4; ++i) {
668 centroid_ += Vec2(x_[i], y_[i]);
669 }
670 centroid_ /= 4.0;
671 }
672
673 // The centroid of the four points representing the quad.
674 const Vec2& Centroid() const { return centroid_; }
675
676 // The average magnitude of the four points relative to the centroid.
677 double Scale() const {
678 double scale = 0.0;
679 for (int i = 0; i < 4; ++i) {
680 scale += (Vec2(x_[i], y_[i]) - Centroid()).norm();
681 }
682 return scale / 4.0;
683 }
684
685 Vec2 CornerRelativeToCentroid(int i) const {
686 return Vec2(x_[i], y_[i]) - centroid_;
687 }
688
689 private:
690 const double* x_;
691 const double* y_;
692 Vec2 centroid_;
693};
694
695struct TranslationWarp {
696 TranslationWarp(const double* x1,
697 const double* y1,
698 const double* x2,
699 const double* y2) {
700 Vec2 t = Quad(x2, y2).Centroid() - Quad(x1, y1).Centroid();
701 parameters[0] = t[0];
702 parameters[1] = t[1];
703 }
704
705 template <typename T>
706 void Forward(
707 const T* warp_parameters, const T& x1, const T& y1, T* x2, T* y2) const {
708 *x2 = x1 + warp_parameters[0];
709 *y2 = y1 + warp_parameters[1];
710 }
711
712 // Translation x, translation y.
713 enum { NUM_PARAMETERS = 2 };
714 double parameters[NUM_PARAMETERS];
715};
716
717struct TranslationScaleWarp {
718 TranslationScaleWarp(const double* x1,
719 const double* y1,
720 const double* x2,
721 const double* y2)
722 : q1(x1, y1) {
723 Quad q2(x2, y2);
724
725 // The difference in centroids is the best guess for translation.
726 Vec2 t = q2.Centroid() - q1.Centroid();
727 parameters[0] = t[0];
728 parameters[1] = t[1];
729
730 // The difference in scales is the estimate for the scale.
731 parameters[2] = 1.0 - q2.Scale() / q1.Scale();
732 }
733
734 // The strange way of parameterizing the translation and scaling is to make
735 // the knobs that the optimizer sees easy to adjust. This is less important
736 // for the scaling case than the rotation case.
737 template <typename T>
738 void Forward(
739 const T* warp_parameters, const T& x1, const T& y1, T* x2, T* y2) const {
740 // Make the centroid of Q1 the origin.
741 const T x1_origin = x1 - q1.Centroid()(0);
742 const T y1_origin = y1 - q1.Centroid()(1);
743
744 // Scale uniformly about the origin.
745 const T scale = 1.0 + warp_parameters[2];
746 const T x1_origin_scaled = scale * x1_origin;
747 const T y1_origin_scaled = scale * y1_origin;
748
749 // Translate back into the space of Q1 (but scaled).
750 const T x1_scaled = x1_origin_scaled + q1.Centroid()(0);
751 const T y1_scaled = y1_origin_scaled + q1.Centroid()(1);
752
753 // Translate into the space of Q2.
754 *x2 = x1_scaled + warp_parameters[0];
755 *y2 = y1_scaled + warp_parameters[1];
756 }
757
758 // Translation x, translation y, scale.
759 enum { NUM_PARAMETERS = 3 };
760 double parameters[NUM_PARAMETERS];
761
762 Quad q1;
763};
764
765// Assumes the given points are already zero-centroid and the same size.
766Mat2 OrthogonalProcrustes(const Mat2& correlation_matrix) {
767 Eigen::JacobiSVD<Mat2> svd(correlation_matrix,
768 Eigen::ComputeFullU | Eigen::ComputeFullV);
769 return svd.matrixV() * svd.matrixU().transpose();
770}
771
772struct TranslationRotationWarp {
773 TranslationRotationWarp(const double* x1,
774 const double* y1,
775 const double* x2,
776 const double* y2)
777 : q1(x1, y1) {
778 Quad q2(x2, y2);
779
780 // The difference in centroids is the best guess for translation.
781 Vec2 t = q2.Centroid() - q1.Centroid();
782 parameters[0] = t[0];
783 parameters[1] = t[1];
784
785 // Obtain the rotation via orthorgonal procrustes.
786 Mat2 correlation_matrix = Mat2::Zero();
787 for (int i = 0; i < 4; ++i) {
788 correlation_matrix += q1.CornerRelativeToCentroid(i) *
789 q2.CornerRelativeToCentroid(i).transpose();
790 }
791 Mat2 R = OrthogonalProcrustes(correlation_matrix);
792 parameters[2] = atan2(R(1, 0), R(0, 0));
793
794 LG << "Correlation_matrix:\n" << correlation_matrix;
795 LG << "R:\n" << R;
796 LG << "Theta:" << parameters[2];
797 }
798
799 // The strange way of parameterizing the translation and rotation is to make
800 // the knobs that the optimizer sees easy to adjust. The reason is that while
801 // it is always the case that it is possible to express composed rotations
802 // and translations as a single translation and rotation, the numerical
803 // values needed for the composition are often large in magnitude. This is
804 // enough to throw off any minimizer, since it must do the equivalent of
805 // compose rotations and translations.
806 //
807 // Instead, use the parameterization below that offers a parameterization
808 // that exposes the degrees of freedom in a way amenable to optimization.
809 template <typename T>
810 void Forward(
811 const T* warp_parameters, const T& x1, const T& y1, T* x2, T* y2) const {
812 // Make the centroid of Q1 the origin.
813 const T x1_origin = x1 - q1.Centroid()(0);
814 const T y1_origin = y1 - q1.Centroid()(1);
815
816 // Rotate about the origin (i.e. centroid of Q1).
817 const T theta = warp_parameters[2];
818 const T costheta = cos(theta);
819 const T sintheta = sin(theta);
820 const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin;
821 const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin;
822
823 // Translate back into the space of Q1 (but scaled).
824 const T x1_rotated = x1_origin_rotated + q1.Centroid()(0);
825 const T y1_rotated = y1_origin_rotated + q1.Centroid()(1);
826
827 // Translate into the space of Q2.
828 *x2 = x1_rotated + warp_parameters[0];
829 *y2 = y1_rotated + warp_parameters[1];
830 }
831
832 // Translation x, translation y, rotation about the center of Q1 degrees.
833 enum { NUM_PARAMETERS = 3 };
834 double parameters[NUM_PARAMETERS];
835
836 Quad q1;
837};
838
839struct TranslationRotationScaleWarp {
840 TranslationRotationScaleWarp(const double* x1,
841 const double* y1,
842 const double* x2,
843 const double* y2)
844 : q1(x1, y1) {
845 Quad q2(x2, y2);
846
847 // The difference in centroids is the best guess for translation.
848 Vec2 t = q2.Centroid() - q1.Centroid();
849 parameters[0] = t[0];
850 parameters[1] = t[1];
851
852 // The difference in scales is the estimate for the scale.
853 parameters[2] = 1.0 - q2.Scale() / q1.Scale();
854
855 // Obtain the rotation via orthorgonal procrustes.
856 Mat2 correlation_matrix = Mat2::Zero();
857 for (int i = 0; i < 4; ++i) {
858 correlation_matrix += q1.CornerRelativeToCentroid(i) *
859 q2.CornerRelativeToCentroid(i).transpose();
860 }
861 Mat2 R = OrthogonalProcrustes(correlation_matrix);
862 parameters[3] = atan2(R(1, 0), R(0, 0));
863
864 LG << "Correlation_matrix:\n" << correlation_matrix;
865 LG << "R:\n" << R;
866 LG << "Theta:" << parameters[3];
867 }
868
869 // The strange way of parameterizing the translation and rotation is to make
870 // the knobs that the optimizer sees easy to adjust. The reason is that while
871 // it is always the case that it is possible to express composed rotations
872 // and translations as a single translation and rotation, the numerical
873 // values needed for the composition are often large in magnitude. This is
874 // enough to throw off any minimizer, since it must do the equivalent of
875 // compose rotations and translations.
876 //
877 // Instead, use the parameterization below that offers a parameterization
878 // that exposes the degrees of freedom in a way amenable to optimization.
879 template <typename T>
880 void Forward(
881 const T* warp_parameters, const T& x1, const T& y1, T* x2, T* y2) const {
882 // Make the centroid of Q1 the origin.
883 const T x1_origin = x1 - q1.Centroid()(0);
884 const T y1_origin = y1 - q1.Centroid()(1);
885
886 // Rotate about the origin (i.e. centroid of Q1).
887 const T theta = warp_parameters[3];
888 const T costheta = cos(theta);
889 const T sintheta = sin(theta);
890 const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin;
891 const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin;
892
893 // Scale uniformly about the origin.
894 const T scale = 1.0 + warp_parameters[2];
895 const T x1_origin_rotated_scaled = scale * x1_origin_rotated;
896 const T y1_origin_rotated_scaled = scale * y1_origin_rotated;
897
898 // Translate back into the space of Q1 (but scaled and rotated).
899 const T x1_rotated_scaled = x1_origin_rotated_scaled + q1.Centroid()(0);
900 const T y1_rotated_scaled = y1_origin_rotated_scaled + q1.Centroid()(1);
901
902 // Translate into the space of Q2.
903 *x2 = x1_rotated_scaled + warp_parameters[0];
904 *y2 = y1_rotated_scaled + warp_parameters[1];
905 }
906
907 // Translation x, translation y, rotation about the center of Q1 degrees,
908 // scale.
909 enum { NUM_PARAMETERS = 4 };
910 double parameters[NUM_PARAMETERS];
911
912 Quad q1;
913};
914
915struct AffineWarp {
916 AffineWarp(const double* x1,
917 const double* y1,
918 const double* x2,
919 const double* y2)
920 : q1(x1, y1) {
921 Quad q2(x2, y2);
922
923 // The difference in centroids is the best guess for translation.
924 Vec2 t = q2.Centroid() - q1.Centroid();
925 parameters[0] = t[0];
926 parameters[1] = t[1];
927
928 // Estimate the four affine parameters with the usual least squares.
929 Mat Q1(8, 4);
930 Vec Q2(8);
931 for (int i = 0; i < 4; ++i) {
932 Vec2 v1 = q1.CornerRelativeToCentroid(i);
933 Vec2 v2 = q2.CornerRelativeToCentroid(i);
934
935 Q1.row(2 * i + 0) << v1[0], v1[1], 0, 0;
936 Q1.row(2 * i + 1) << 0, 0, v1[0], v1[1];
937
938 Q2(2 * i + 0) = v2[0];
939 Q2(2 * i + 1) = v2[1];
940 }
941
942 // TODO(keir): Check solution quality.
943 Vec4 a = Q1.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(Q2);
944 parameters[2] = a[0];
945 parameters[3] = a[1];
946 parameters[4] = a[2];
947 parameters[5] = a[3];
948
949 LG << "a:" << a.transpose();
950 LG << "t:" << t.transpose();
951 }
952
953 // See comments in other parameterizations about why the centroid is used.
954 template <typename T>
955 void Forward(const T* p, const T& x1, const T& y1, T* x2, T* y2) const {
956 // Make the centroid of Q1 the origin.
957 const T x1_origin = x1 - q1.Centroid()(0);
958 const T y1_origin = y1 - q1.Centroid()(1);
959
960 // Apply the affine transformation.
961 const T x1_origin_affine = p[2] * x1_origin + p[3] * y1_origin;
962 const T y1_origin_affine = p[4] * x1_origin + p[5] * y1_origin;
963
964 // Translate back into the space of Q1 (but affine transformed).
965 const T x1_affine = x1_origin_affine + q1.Centroid()(0);
966 const T y1_affine = y1_origin_affine + q1.Centroid()(1);
967
968 // Translate into the space of Q2.
969 *x2 = x1_affine + p[0];
970 *y2 = y1_affine + p[1];
971 }
972
973 // Translation x, translation y, rotation about the center of Q1 degrees,
974 // scale.
975 enum { NUM_PARAMETERS = 6 };
976 double parameters[NUM_PARAMETERS];
977
978 Quad q1;
979};
980
981struct HomographyWarp {
982 HomographyWarp(const double* x1,
983 const double* y1,
984 const double* x2,
985 const double* y2) {
986 Mat quad1(2, 4);
987 // clang-format off
988 quad1 << x1[0], x1[1], x1[2], x1[3],
989 y1[0], y1[1], y1[2], y1[3];
990 // clang-format on
991
992 Mat quad2(2, 4);
993 // clang-format off
994 quad2 << x2[0], x2[1], x2[2], x2[3],
995 y2[0], y2[1], y2[2], y2[3];
996 // clang-format on
997
998 Mat3 H;
999 if (!Homography2DFromCorrespondencesLinear(quad1, quad2, &H, 1e-12)) {
1000 LG << "Couldn't construct homography.";
1001 }
1002
1003 // Assume H(2, 2) != 0, and fix scale at H(2, 2) == 1.0.
1004 H /= H(2, 2);
1005
1006 // Assume H is close to identity, so subtract out the diagonal.
1007 H(0, 0) -= 1.0;
1008 H(1, 1) -= 1.0;
1009
1010 CHECK_NE(H(2, 2), 0.0) << H;
1011 for (int i = 0; i < 8; ++i) {
1012 parameters[i] = H(i / 3, i % 3);
1013 LG << "Parameters[" << i << "]: " << parameters[i];
1014 }
1015 }
1016
1017 template <typename T>
1018 static void Forward(const T* p, const T& x1, const T& y1, T* x2, T* y2) {
1019 // Homography warp with manual 3x3 matrix multiply.
1020 const T xx2 = (1.0 + p[0]) * x1 + p[1] * y1 + p[2];
1021 const T yy2 = p[3] * x1 + (1.0 + p[4]) * y1 + p[5];
1022 const T zz2 = p[6] * x1 + p[7] * y1 + 1.0;
1023 *x2 = xx2 / zz2;
1024 *y2 = yy2 / zz2;
1025 }
1026
1027 enum { NUM_PARAMETERS = 8 };
1028 double parameters[NUM_PARAMETERS];
1029};
1030
1031// Determine the number of samples to use for x and y. Quad winding goes:
1032//
1033// 0 1
1034// 3 2
1035//
1036// The idea is to take the maximum x or y distance. This may be oversampling.
1037// TODO(keir): Investigate the various choices; perhaps average is better?
1038void PickSampling(const double* x1,
1039 const double* y1,
1040 const double* x2,
1041 const double* y2,
1042 int* num_samples_x,
1043 int* num_samples_y) {
1044 (void)x2; // Ignored.
1045 (void)y2; // Ignored.
1046
1047 Vec2 a0(x1[0], y1[0]);
1048 Vec2 a1(x1[1], y1[1]);
1049 Vec2 a2(x1[2], y1[2]);
1050 Vec2 a3(x1[3], y1[3]);
1051
1052 Vec2 b0(x1[0], y1[0]);
1053 Vec2 b1(x1[1], y1[1]);
1054 Vec2 b2(x1[2], y1[2]);
1055 Vec2 b3(x1[3], y1[3]);
1056
1057 double x_dimensions[4] = {
1058 (a1 - a0).norm(), (a3 - a2).norm(), (b1 - b0).norm(), (b3 - b2).norm()};
1059
1060 double y_dimensions[4] = {
1061 (a3 - a0).norm(), (a1 - a2).norm(), (b3 - b0).norm(), (b1 - b2).norm()};
1062 const double kScaleFactor = 1.0;
1063 *num_samples_x = static_cast<int>(
1064 kScaleFactor * *std::max_element(x_dimensions, x_dimensions + 4));
1065 *num_samples_y = static_cast<int>(
1066 kScaleFactor * *std::max_element(y_dimensions, y_dimensions + 4));
1067 LG << "Automatic num_samples_x: " << *num_samples_x
1068 << ", num_samples_y: " << *num_samples_y;
1069}
1070
1071bool SearchAreaTooBigForDescent(const FloatImage& image2,
1072 const double* x2,
1073 const double* y2) {
1074 // TODO(keir): Check the bounds and enable only when it makes sense.
1075 (void)image2; // Ignored.
1076 (void)x2; // Ignored.
1077 (void)y2; // Ignored.
1078
1079 return true;
1080}
1081
1082bool PointOnRightHalfPlane(const Vec2& a, const Vec2& b, double x, double y) {
1083 Vec2 ba = b - a;
1084 return ((Vec2(x, y) - b).transpose() * Vec2(-ba.y(), ba.x())) > 0;
1085}
1086
1087// Determine if a point is in a quad. The quad is arranged as:
1088//
1089// +-------> x
1090// |
1091// | a0------a1
1092// | | |
1093// | | |
1094// | | |
1095// | a3------a2
1096// v
1097// y
1098//
1099// The implementation does up to four half-plane comparisons.
1100bool PointInQuad(const double* xs, const double* ys, double x, double y) {
1101 Vec2 a0(xs[0], ys[0]);
1102 Vec2 a1(xs[1], ys[1]);
1103 Vec2 a2(xs[2], ys[2]);
1104 Vec2 a3(xs[3], ys[3]);
1105
1106 return PointOnRightHalfPlane(a0, a1, x, y) &&
1107 PointOnRightHalfPlane(a1, a2, x, y) &&
1108 PointOnRightHalfPlane(a2, a3, x, y) &&
1109 PointOnRightHalfPlane(a3, a0, x, y);
1110}
1111
1112// This makes it possible to map between Eigen float arrays and FloatImage
1113// without using comparisons.
1114typedef Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
1115 FloatArray;
1116
1117// This creates a pattern in the frame of image2, from the pixel is image1,
1118// based on the initial guess represented by the two quads x1, y1, and x2, y2.
1119template <typename Warp>
1120void CreateBrutePattern(const double* x1,
1121 const double* y1,
1122 const double* x2,
1123 const double* y2,
1124 const FloatImage& image1,
1125 const FloatImage* image1_mask,
1126 FloatArray* pattern,
1127 FloatArray* mask,
1128 int* origin_x,
1129 int* origin_y) {
1130 // Get integer bounding box of quad2 in image2.
1131 int min_x = static_cast<int>(floor(*std::min_element(x2, x2 + 4)));
1132 int min_y = static_cast<int>(floor(*std::min_element(y2, y2 + 4)));
1133 int max_x = static_cast<int>(ceil(*std::max_element(x2, x2 + 4)));
1134 int max_y = static_cast<int>(ceil(*std::max_element(y2, y2 + 4)));
1135
1136 int w = max_x - min_x;
1137 int h = max_y - min_y;
1138
1139 pattern->resize(h, w);
1140 mask->resize(h, w);
1141
1142 Warp inverse_warp(x2, y2, x1, y1);
1143
1144 // r,c are in the coordinate frame of image2.
1145 for (int r = min_y; r < max_y; ++r) {
1146 for (int c = min_x; c < max_x; ++c) {
1147 // i and j are in the coordinate frame of the pattern in image2.
1148 int i = r - min_y;
1149 int j = c - min_x;
1150
1151 double dst_x = c;
1152 double dst_y = r;
1153 double src_x;
1154 double src_y;
1155 inverse_warp.Forward(
1156 inverse_warp.parameters, dst_x, dst_y, &src_x, &src_y);
1157
1158 if (PointInQuad(x1, y1, src_x, src_y)) {
1159 (*pattern)(i, j) = SampleLinear(image1, src_y, src_x);
1160 (*mask)(i, j) = 1.0;
1161 if (image1_mask) {
1162 (*mask)(i, j) = SampleLinear(*image1_mask, src_y, src_x);
1163 }
1164 } else {
1165 (*pattern)(i, j) = 0.0;
1166 (*mask)(i, j) = 0.0;
1167 }
1168 }
1169 }
1170 *origin_x = min_x;
1171 *origin_y = min_y;
1172}
1173
1174// Compute a translation-only estimate of the warp, using brute force search. A
1175// smarter implementation would use the FFT to compute the normalized cross
1176// correlation. Instead, this is a dumb implementation. Surprisingly, it is
1177// fast enough in practice.
1178//
1179// Returns true if any alignment was found, and false if the projected pattern
1180// is zero sized.
1181//
1182// TODO(keir): The normalization is less effective for the brute force search
1183// than it is with the Ceres solver. It's unclear if this is a bug or due to
1184// the original frame being too different from the reprojected reference in the
1185// destination frame.
1186//
1187// The likely solution is to use the previous frame, instead of the original
1188// pattern, when doing brute initialization. Unfortunately that implies a
1189// totally different warping interface, since access to more than a the source
1190// and current destination frame is necessary.
1191template <typename Warp>
1192bool BruteTranslationOnlyInitialize(const FloatImage& image1,
1193 const FloatImage* image1_mask,
1194 const FloatImage& image2,
1195 const int num_extra_points,
1196 const bool use_normalized_intensities,
1197 const double* x1,
1198 const double* y1,
1199 double* x2,
1200 double* y2) {
1201 // Create the pattern to match in the space of image2, assuming our inital
1202 // guess isn't too far from the template in image1. If there is no image1
1203 // mask, then the resulting mask is binary.
1204 FloatArray pattern;
1205 FloatArray mask;
1206 int origin_x = -1, origin_y = -1;
1207 CreateBrutePattern<Warp>(x1,
1208 y1,
1209 x2,
1210 y2,
1211 image1,
1212 image1_mask,
1213 &pattern,
1214 &mask,
1215 &origin_x,
1216 &origin_y);
1217
1218 // For normalization, premultiply the pattern by the inverse pattern mean.
1219 double mask_sum = 1.0;
1220 if (use_normalized_intensities) {
1221 mask_sum = mask.sum();
1222 double inverse_pattern_mean = mask_sum / ((mask * pattern).sum());
1223 pattern *= inverse_pattern_mean;
1224 }
1225
1226 // Use Eigen on the images via maps for strong vectorization.
1227 Map<const FloatArray> search(image2.Data(), image2.Height(), image2.Width());
1228
1229 // Try all possible locations inside the search area. Yes, everywhere.
1230 //
1231 // TODO(keir): There are a number of possible optimizations here. One choice
1232 // is to make a grid and only try one out of every N possible samples.
1233 //
1234 // Another, slightly more clever idea, is to compute some sort of spatial
1235 // frequency distribution of the pattern patch. If the spatial resolution is
1236 // high (e.g. a grating pattern or fine lines) then checking every possible
1237 // translation is necessary, since a 1-pixel shift may induce a massive
1238 // change in the cost function. If the image is a blob or splotch with blurry
1239 // edges, then fewer samples are necessary since a few pixels offset won't
1240 // change the cost function much.
1241 double best_sad = std::numeric_limits<double>::max();
1242 int best_r = -1;
1243 int best_c = -1;
1244 int w = pattern.cols();
1245 int h = pattern.rows();
1246
1247 for (int r = 0; r < (image2.Height() - h); ++r) {
1248 for (int c = 0; c < (image2.Width() - w); ++c) {
1249 // Compute the weighted sum of absolute differences, Eigen style. Note
1250 // that the block from the search image is never stored in a variable, to
1251 // avoid copying overhead and permit inlining.
1252 double sad;
1253 if (use_normalized_intensities) {
1254 // TODO(keir): It's really dumb to recompute the search mean for every
1255 // shift. A smarter implementation would use summed area tables
1256 // instead, reducing the mean calculation to an O(1) operation.
1257 double inverse_search_mean =
1258 mask_sum / ((mask * search.block(r, c, h, w)).sum());
1259 sad = (mask *
1260 (pattern - (search.block(r, c, h, w) * inverse_search_mean)))
1261 .abs()
1262 .sum();
1263 } else {
1264 sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum();
1265 }
1266 if (sad < best_sad) {
1267 best_r = r;
1268 best_c = c;
1269 best_sad = sad;
1270 }
1271 }
1272 }
1273
1274 // This mean the effective pattern area is zero. This check could go earlier,
1275 // but this is less code.
1276 if (best_r == -1 || best_c == -1) {
1277 return false;
1278 }
1279
1280 LG << "Brute force translation found a shift. "
1281 << "best_c: " << best_c << ", best_r: " << best_r << ", "
1282 << "origin_x: " << origin_x << ", origin_y: " << origin_y << ", "
1283 << "dc: " << (best_c - origin_x) << ", "
1284 << "dr: " << (best_r - origin_y) << ", tried "
1285 << ((image2.Height() - h) * (image2.Width() - w)) << " shifts.";
1286
1287 // Apply the shift.
1288 for (int i = 0; i < 4 + num_extra_points; ++i) {
1289 x2[i] += best_c - origin_x;
1290 y2[i] += best_r - origin_y;
1291 }
1292 return true;
1293}
1294
1295void CopyQuad(double* src_x,
1296 double* src_y,
1297 double* dst_x,
1298 double* dst_y,
1299 int num_extra_points) {
1300 for (int i = 0; i < 4 + num_extra_points; ++i) {
1301 dst_x[i] = src_x[i];
1302 dst_y[i] = src_y[i];
1303 }
1304}
1305
1306} // namespace
1307
1308template <typename Warp>
1310 const FloatImage& image2,
1311 const double* x1,
1312 const double* y1,
1314 double* x2,
1315 double* y2,
1316 TrackRegionResult* result) {
1317 for (int i = 0; i < 4 + options.num_extra_points; ++i) {
1318 LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); guess (" << x2[i]
1319 << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
1320 << (y2[i] - y1[i]) << ").";
1321 }
1322
1323 // Since (x2, y2) contains a prediction for where the tracked point should
1324 // go, try a refinement immediately in the hope that the prediction is close
1325 // enough.
1326 if (options.attempt_refine_before_brute) {
1327 TrackRegionOptions modified_options = options;
1328 modified_options.use_brute_initialization = false;
1329 modified_options.attempt_refine_before_brute = false;
1330
1331 double x2_first_try[5];
1332 double y2_first_try[5];
1333 CopyQuad(x2, y2, x2_first_try, y2_first_try, options.num_extra_points);
1334
1336 image2,
1337 x1,
1338 y1,
1339 modified_options,
1340 x2_first_try,
1341 y2_first_try,
1342 result);
1343
1344 // Of the things that can happen in the first pass, don't try the brute
1345 // pass (and second attempt) if the error is one of the terminations below.
1346 if (result->termination == TrackRegionResult::CONVERGENCE ||
1347 result->termination == TrackRegionResult::SOURCE_OUT_OF_BOUNDS ||
1348 result->termination == TrackRegionResult::DESTINATION_OUT_OF_BOUNDS ||
1349 result->termination == TrackRegionResult::INSUFFICIENT_PATTERN_AREA) {
1350 LG << "Terminated with first try at refinement; no brute needed.";
1351 // TODO(keir): Also check correlation?
1352 CopyQuad(x2_first_try, y2_first_try, x2, y2, options.num_extra_points);
1353 LG << "Early termination correlation: " << result->correlation;
1354 return;
1355 } else {
1356 LG << "Initial eager-refinement failed; retrying normally.";
1357 }
1358 }
1359
1360 if (options.use_normalized_intensities) {
1361 LG << "Using normalized intensities.";
1362 }
1363
1364 // Bail early if the points are already outside.
1365 if (!AllInBounds(image1, x1, y1)) {
1366 result->termination = TrackRegionResult::SOURCE_OUT_OF_BOUNDS;
1367 return;
1368 }
1369 if (!AllInBounds(image2, x2, y2)) {
1371 return;
1372 }
1373 // TODO(keir): Check quads to ensure there is some area.
1374
1375 // Keep a copy of the "original" guess for regularization.
1376 double x2_original[4];
1377 double y2_original[4];
1378 for (int i = 0; i < 4; ++i) {
1379 x2_original[i] = x2[i];
1380 y2_original[i] = y2[i];
1381 }
1382
1383 // Prepare the image and gradient.
1384 Array3Df image_and_gradient1;
1385 Array3Df image_and_gradient2;
1387 image1, options.sigma, &image_and_gradient1);
1389 image2, options.sigma, &image_and_gradient2);
1390
1391 // Possibly do a brute-force translation-only initialization.
1392 if (SearchAreaTooBigForDescent(image2, x2, y2) &&
1393 options.use_brute_initialization) {
1394 LG << "Running brute initialization...";
1395 bool found_any_alignment =
1396 BruteTranslationOnlyInitialize<Warp>(image_and_gradient1,
1397 options.image1_mask,
1398 image2,
1399 options.num_extra_points,
1400 options.use_normalized_intensities,
1401 x1,
1402 y1,
1403 x2,
1404 y2);
1405 if (!found_any_alignment) {
1406 LG << "Brute failed to find an alignment; pattern too small. "
1407 << "Failing entire track operation.";
1409 return;
1410 }
1411 for (int i = 0; i < 4; ++i) {
1412 LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); brute (" << x2[i]
1413 << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
1414 << (y2[i] - y1[i]) << ").";
1415 }
1416 }
1417
1418 // Prepare the initial warp parameters from the four correspondences.
1419 // Note: This must happen after the brute initialization runs, since the
1420 // brute initialization mutates x2 and y2 in place.
1421 Warp warp(x1, y1, x2, y2);
1422
1423 // Decide how many samples to use in the x and y dimensions.
1424 int num_samples_x;
1425 int num_samples_y;
1426 PickSampling(x1, y1, x2, y2, &num_samples_x, &num_samples_y);
1427
1428 // Compute the warp from rectangular coordinates.
1429 Mat3 canonical_homography =
1430 ComputeCanonicalHomography(x1, y1, num_samples_x, num_samples_y);
1431
1432 ceres::Problem problem;
1433
1434 // Construct the warp cost function. AutoDiffCostFunction takes ownership.
1435 PixelDifferenceCostFunctor<Warp>* pixel_difference_cost_function =
1436 new PixelDifferenceCostFunctor<Warp>(options,
1437 image_and_gradient1,
1438 image_and_gradient2,
1439 canonical_homography,
1440 num_samples_x,
1441 num_samples_y,
1442 warp);
1443 problem.AddResidualBlock(
1444 new ceres::AutoDiffCostFunction<PixelDifferenceCostFunctor<Warp>,
1445 ceres::DYNAMIC,
1446 Warp::NUM_PARAMETERS>(
1447 pixel_difference_cost_function, num_samples_x * num_samples_y),
1448 NULL,
1449 warp.parameters);
1450
1451 // Construct the regularizing cost function
1452 if (options.regularization_coefficient != 0.0) {
1453 WarpRegularizingCostFunctor<Warp>* regularizing_warp_cost_function =
1454 new WarpRegularizingCostFunctor<Warp>(
1455 options, x1, y2, x2_original, y2_original, warp);
1456
1457 problem.AddResidualBlock(
1458 new ceres::AutoDiffCostFunction<WarpRegularizingCostFunctor<Warp>,
1459 8 /* num_residuals */,
1460 Warp::NUM_PARAMETERS>(
1461 regularizing_warp_cost_function),
1462 NULL,
1463 warp.parameters);
1464 }
1465
1466 // Configure the solve.
1467 ceres::Solver::Options solver_options;
1468 solver_options.linear_solver_type = ceres::DENSE_QR;
1469 solver_options.max_num_iterations = options.max_iterations;
1470 solver_options.update_state_every_iteration = true;
1471 solver_options.parameter_tolerance = 1e-16;
1472 solver_options.function_tolerance = 1e-16;
1473
1474 // Prevent the corners from going outside the destination image and
1475 // terminate if the optimizer is making tiny moves (converged).
1476 TerminationCheckingCallback<Warp> callback(options, image2, warp, x1, y1);
1477 solver_options.callbacks.push_back(&callback);
1478
1479 // Run the solve.
1480 ceres::Solver::Summary summary;
1481 ceres::Solve(solver_options, &problem, &summary);
1482
1483 LG << "Summary:\n" << summary.FullReport();
1484
1485 // Update the four points with the found solution; if the solver failed, then
1486 // the warp parameters are the identity (so ignore failure).
1487 //
1488 // Also warp any extra points on the end of the array.
1489 for (int i = 0; i < 4 + options.num_extra_points; ++i) {
1490 warp.Forward(warp.parameters, x1[i], y1[i], x2 + i, y2 + i);
1491 LG << "Warped point " << i << ": (" << x1[i] << ", " << y1[i] << ") -> ("
1492 << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
1493 << (y2[i] - y1[i]) << ").";
1494 }
1495
1496 // TODO(keir): Update the result statistics.
1497 // TODO(keir): Add a normalize-cross-correlation variant.
1498
1499 if (summary.termination_type == ceres::USER_FAILURE) {
1500 result->termination = TrackRegionResult::FELL_OUT_OF_BOUNDS;
1501 return;
1502 }
1503
1504#define HANDLE_TERMINATION(termination_enum) \
1505 if (summary.termination_type == ceres::termination_enum) { \
1506 result->termination = TrackRegionResult::termination_enum; \
1507 return; \
1508 }
1509
1510 // Avoid computing correlation for tracking failures.
1511 HANDLE_TERMINATION(FAILURE);
1512
1513 // Otherwise, run a final correlation check.
1514 if (options.minimum_correlation > 0.0) {
1515 result->correlation =
1516 pixel_difference_cost_function
1517 ->PearsonProductMomentCorrelationCoefficient(warp.parameters);
1518 if (result->correlation < options.minimum_correlation) {
1519 LG << "Failing with insufficient correlation.";
1520 result->termination = TrackRegionResult::INSUFFICIENT_CORRELATION;
1521 return;
1522 }
1523 }
1524
1525 // This happens when the minimum corner shift tolerance is reached. Due to
1526 // how the tolerance is computed this can't be done by Ceres. So return the
1527 // same termination enum as Ceres, even though this is slightly different
1528 // than Ceres's parameter tolerance, which operates on the raw parameter
1529 // values rather than the pixel shifts of the patch corners.
1530 if (summary.termination_type == ceres::USER_SUCCESS) {
1531 result->termination = TrackRegionResult::CONVERGENCE;
1532 return;
1533 }
1534
1535 HANDLE_TERMINATION(CONVERGENCE);
1536 HANDLE_TERMINATION(NO_CONVERGENCE);
1537#undef HANDLE_TERMINATION
1538};
1539
1540void TrackRegion(const FloatImage& image1,
1541 const FloatImage& image2,
1542 const double* x1,
1543 const double* y1,
1545 double* x2,
1546 double* y2,
1547 TrackRegionResult* result) {
1548 // Enum is necessary due to templated nature of autodiff.
1549#define HANDLE_MODE(mode_enum, mode_type) \
1550 if (options.mode == TrackRegionOptions::mode_enum) { \
1551 TemplatedTrackRegion<mode_type>( \
1552 image1, image2, x1, y1, options, x2, y2, result); \
1553 return; \
1554 }
1555 HANDLE_MODE(TRANSLATION, TranslationWarp);
1556 HANDLE_MODE(TRANSLATION_SCALE, TranslationScaleWarp);
1557 HANDLE_MODE(TRANSLATION_ROTATION, TranslationRotationWarp);
1558 HANDLE_MODE(TRANSLATION_ROTATION_SCALE, TranslationRotationScaleWarp);
1559 HANDLE_MODE(AFFINE, AffineWarp);
1560 HANDLE_MODE(HOMOGRAPHY, HomographyWarp);
1561#undef HANDLE_MODE
1562}
1563
1565 const double* xs,
1566 const double* ys,
1567 int num_samples_x,
1568 int num_samples_y,
1569 FloatImage* mask,
1570 FloatImage* patch,
1571 double* warped_position_x,
1572 double* warped_position_y) {
1573 // Bail early if the points are outside the image.
1574 if (!AllInBounds(image, xs, ys)) {
1575 LG << "Can't sample patch: out of bounds.";
1576 return false;
1577 }
1578
1579 // Make the patch have the appropriate size, and match the depth of image.
1580 patch->Resize(num_samples_y, num_samples_x, image.Depth());
1581
1582 // Compute the warp from rectangular coordinates.
1583 Mat3 canonical_homography =
1584 ComputeCanonicalHomography(xs, ys, num_samples_x, num_samples_y);
1585
1586 // Walk over the coordinates in the canonical space, sampling from the image
1587 // in the original space and copying the result into the patch.
1588 for (int r = 0; r < num_samples_y; ++r) {
1589 for (int c = 0; c < num_samples_x; ++c) {
1590 Vec3 image_position = canonical_homography * Vec3(c, r, 1);
1591 image_position /= image_position(2);
1593 image, image_position(1), image_position(0), &(*patch)(r, c, 0));
1594 if (mask) {
1595 float mask_value =
1596 SampleLinear(*mask, image_position(1), image_position(0), 0);
1597
1598 for (int d = 0; d < image.Depth(); d++) {
1599 (*patch)(r, c, d) *= mask_value;
1600 }
1601 }
1602 }
1603 }
1604
1605 Vec3 warped_position = canonical_homography.inverse() * Vec3(xs[4], ys[4], 1);
1606 warped_position /= warped_position(2);
1607
1608 *warped_position_x = warped_position(0);
1609 *warped_position_y = warped_position(1);
1610
1611 return true;
1612}
1613
1614} // namespace libmv
sqrt(x)+1/max(0
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
void warp(const btVector3 &origin)
btMatrix3x3 transpose() const
Return the transpose of the matrix.
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
static T sum(const btAlignedObjectArray< T > &items)
SIMD_FORCE_INLINE btVector3 operator()(const btVector3 &x) const
Return the transform of the vector.
Definition btTransform.h:90
SIMD_FORCE_INLINE btScalar norm() const
Return the norm (length) of the vector.
Definition btVector3.h:263
3D array (row, column, channel).
Definition array_nd.h:332
void Resize(int height, int width, int depth=1)
Definition array_nd.h:341
local_group_size(16, 16) .push_constant(Type b
CCL_NAMESPACE_BEGIN struct Options options
DEGForeachIDComponentCallback callback
#define NULL
const Mat y_
const Mat x_
float[3][3] Mat3
Definition gpu_matrix.cc:28
#define LG
#define HANDLE_MODE(mode_enum, mode_type)
double parameters[NUM_PARAMETERS]
double original_centroid_[2]
const double * y2_original_
const double * x2_original_
#define HANDLE_TERMINATION(termination_enum)
#define VLOG(severity)
Definition log.h:34
#define CHECK_NE(a, b)
Definition log.h:45
ccl_device_inline float2 floor(const float2 a)
ccl_device_inline float3 ceil(const float3 a)
ccl_device_inline float3 cos(float3 v)
ccl_device_inline float4 mask(const int4 mask, const float4 a)
ccl_device_inline int4 cast(const float4 a)
Definition math_float4.h:29
#define N
#define T
#define R
#define H(x, y, z)
static void error(const char *str)
T atan2(const T &y, const T &x)
T sin(const AngleRadianBase< T > &a)
Eigen::VectorXd Vec
Definition numeric.h:61
Eigen::Vector4d Vec4
Definition numeric.h:107
void TemplatedTrackRegion(const FloatImage &image1, const FloatImage &image2, const double *x1, const double *y1, const TrackRegionOptions &options, double *x2, double *y2, TrackRegionResult *result)
void TrackRegion(const FloatImage &image1, const FloatImage &image2, const double *x1, const double *y1, const TrackRegionOptions &options, double *x2, double *y2, TrackRegionResult *result)
Eigen::Matrix< double, 3, 3 > Mat3
Definition numeric.h:72
Eigen::Vector2d Vec2
Definition numeric.h:105
Eigen::MatrixXd Mat
Definition numeric.h:60
Array3Df FloatImage
T SampleLinear(const Array3D< T > &image, float y, float x, int v=0)
Linear interpolation.
void BlurredImageAndDerivativesChannels(const Array3Df &in, double sigma, Array3Df *blurred_and_gradxy)
Definition convolve.cc:233
bool Homography2DFromCorrespondencesLinear(const Mat &x1, const Mat &x2, Mat3 *H, double expected_precision)
Eigen::Vector3d Vec3
Definition numeric.h:106
bool SamplePlanarPatch(const FloatImage &image, const double *xs, const double *ys, int num_samples_x, int num_samples_y, FloatImage *mask, FloatImage *patch, double *warped_position_x, double *warped_position_y)
Eigen::Matrix< double, 2, 2 > Mat2
Definition numeric.h:70
static Jet< T, N > Rule(const FunctionType &f, const FunctionType dfdx[kNumArgs], const Jet< T, N > x[kNumArgs])
static ArgumentType Rule(const FunctionType &f, const FunctionType dfdx[kNumArgs], const ArgumentType x[kNumArgs])
static T GetScalar(const Jet< T, N > &t)
static void ScaleDerivative(double scale_by, Jet< T, N > *value)
static void SetScalar(const T &scalar, Jet< T, N > *t)
static T GetScalar(const T &t)
static void ScaleDerivative(double scale_by, T *value)
static void SetScalar(const T &scalar, T *t)
ccl_device_inline int abs(int x)
Definition util/math.h:120
int xy[2]
Definition wm_draw.cc:170