Przeglądaj źródła

Improve the robustness of the pose. Related to #9

customisations
alemart 1 rok temu
rodzic
commit
4e55d6b4fd
1 zmienionych plików z 122 dodań i 112 usunięć
  1. 122
    112
      src/geometry/camera-model.ts

+ 122
- 112
src/geometry/camera-model.ts Wyświetl plik

@@ -30,7 +30,7 @@ import { Nullable, Utils } from '../utils/utils';
30 30
 import { Settings } from '../core/settings';
31 31
 import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
32 32
 
33
-/** A guess of the horizontal field-of-view of the camera, in degrees */
33
+/** A guess of the horizontal field-of-view of a typical camera, in degrees */
34 34
 const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
35 35
 
36 36
 /** Number of iterations used to refine the estimated pose */
@@ -40,7 +40,7 @@ const POSE_ITERATIONS = 30;
40 40
 const ROTATION_FILTER_SAMPLES = 10;
41 41
 
42 42
 /** Number of samples used in the translation filter */
43
-const TRANSLATION_FILTER_SAMPLES = 10;
43
+const TRANSLATION_FILTER_SAMPLES = 5;
44 44
 
45 45
 /** Convert degrees to radians */
46 46
 const DEG2RAD = 0.017453292519943295; // pi / 180
@@ -63,35 +63,7 @@ export const U0 = 6;
63 63
 /** Index of the vertical position of the principal point in the camera intrinsics matrix */
64 64
 export const V0 = 7;
65 65
 
66
-/** Translation refinement: predefined buffers for efficiency */
67
-const TRANSLATION_REFINEMENT_BUFFERS = (() => {
68
-    const l = 1.0;
69
-    const x = [ 0, l, 0,-l, 0 ];
70
-    const y = [-l, 0, l, 0, 0 ];
71
-    const n = x.length;
72
-
73
-    return Object.freeze({
74
-        x, y,
75
-        a1: new Array(n) as number[],
76
-        a2: new Array(n) as number[],
77
-        a3: new Array(n) as number[],
78
-        m:  new Array(3*n * 3) as number[],
79
-        v:  new Array(3*n) as number[],
80
-        t:  new Array(3) as number[],
81
-        r:  new Array(3*n) as number[],
82
-        c:  new Array(3) as number[],
83
-        Mc: new Array(3*n) as number[],
84
-    });
85
-})();
86
-
87
-/** Translation refinement: number of iterations */
88
-const TRANSLATION_REFINEMENT_ITERATIONS = 3; // 1; // 5;
89
-
90
-/** Translation refinement: number of samples */
91
-const TRANSLATION_REFINEMENT_SAMPLES = 5; // TRANSLATION_REFINEMENT_BUFFERS.x.length;
92
-
93
-/** Translation refinement: the triple of the number of samples */
94
-const TRANSLATION_REFINEMENT_SAMPLES_3X = 15; //3 * TRANSLATION_REFINEMENT_SAMPLES;
66
+
95 67
 
96 68
 
97 69
 /**
@@ -126,8 +98,8 @@ export class CameraModel
126 98
     {
127 99
         this._screenSize = Speedy.Size(0, 0);
128 100
         this._matrix = Speedy.Matrix.Eye(3, 4);
129
-        this._intrinsics = [1,0,0,0,1,0,0,0,1]; // identity matrix
130
-        this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // no rotation & no translation [ R | t ] = [ I | 0 ]
101
+        this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
102
+        this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
131 103
         this._partialRotationBuffer = [];
132 104
         this._translationBuffer = [];
133 105
     }
@@ -147,8 +119,7 @@ export class CameraModel
147 119
         this._screenSize.height = screenSize.height;
148 120
 
149 121
         // reset the model
150
-        this._resetIntrinsics();
151
-        this._resetExtrinsics();
122
+        this.reset();
152 123
 
153 124
         // log
154 125
         Utils.log(`Initializing the camera model...`);
@@ -206,12 +177,12 @@ export class CameraModel
206 177
 
207 178
         // estimate the pose
208 179
         const pose = this._estimatePose(homography);
209
-        this._storePose(pose);
180
+        this._extrinsics = pose.read();
210 181
 
211 182
         // compute the camera matrix
212 183
         const C = this.denormalizer();
213 184
         const K = Speedy.Matrix(3, 3, this._intrinsics);
214
-        const E = Speedy.Matrix(3, 4, this._extrinsics);
185
+        const E = pose; //Speedy.Matrix(3, 4, this._extrinsics);
215 186
         this._matrix.setToSync(K.times(E).times(C));
216 187
         //console.log("intrinsics -----------", K.toString());
217 188
         //console.log("matrix ----------------",this._matrix.toString());
@@ -360,6 +331,7 @@ export class CameraModel
360 331
     private _resetIntrinsics(): void
361 332
     {
362 333
         const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait?
334
+
363 335
         const u0 = this._screenSize.width / 2;
364 336
         const v0 = this._screenSize.height / 2;
365 337
         const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
@@ -372,10 +344,10 @@ export class CameraModel
372 344
     }
373 345
 
374 346
     /**
375
-     * Compute a normalized homography H' = K^(-1) * H for an
347
+     * Compute a normalized homography H^ = K^(-1) * H for an
376 348
      * ideal pinhole with f = 1 and principal point = (0,0)
377 349
      * @param homography homography H to be normalized
378
-     * @returns normalized homography H'
350
+     * @returns normalized homography H^
379 351
      */
380 352
     private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
381 353
     {
@@ -384,9 +356,11 @@ export class CameraModel
384 356
         const v0 = this._intrinsics[V0];
385 357
         const fx = this._intrinsics[FX];
386 358
         const fy = this._intrinsics[FY];
359
+        const u0fx = u0 / fx;
360
+        const v0fy = v0 / fy;
387 361
 
388
-        const h11 = (h[0] - u0 * h[2]) / fx, h12 = (h[3] - u0 * h[5]) / fx, h13 = (h[6] - u0 * h[8]) / fx;
389
-        const h21 = (h[1] - v0 * h[2]) / fy, h22 = (h[4] - v0 * h[5]) / fy, h23 = (h[7] - v0 * h[8]) / fy;
362
+        const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
363
+        const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
390 364
         const h31 = h[2], h32 = h[5], h33 = h[8];
391 365
 
392 366
         /*console.log([
@@ -414,10 +388,6 @@ export class CameraModel
414 388
         const h21 = h[1], h22 = h[4], h23 = h[7];
415 389
         const h31 = h[2], h32 = h[5], h33 = h[8];
416 390
 
417
-        // select the sign so that t3 = tz > 0
418
-        const sign = h33 >= 0 ? 1 : -1;
419
-
420
-        // compute the scale factor
421 391
         const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
422 392
         const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
423 393
         const h1norm = Math.sqrt(h1norm2);
@@ -425,42 +395,65 @@ export class CameraModel
425 395
         //const hnorm = (h1norm + h2norm) / 2;
426 396
         //const hnorm = Math.sqrt(h1norm * h2norm);
427 397
         const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
428
-        const scale = sign / hnorm;
429
-
430
-        // invalid homography?
431
-        if(Number.isNaN(scale))
432
-            return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
433 398
 
434 399
         // we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
435 400
         // if h1norm is not approximately h2norm, it means that the first two columns of
436 401
         // the normalized homography are not really encoding a rotation (up to a scale)
437
-        // what is causing this? does h3 (and h33) tell us anything about it?
438
-        // what about the intrinsics matrix? the principal point...? the fov...?
439 402
 
440 403
         //console.log("h1,h2",h1norm,h2norm);
441 404
         //console.log(normalizedHomography.toString());
442 405
 
443
-        // recover the pose
444
-        const r11 = scale * h11;
445
-        const r21 = scale * h21;
446
-        const r31 = scale * h31;
447
-        const r12 = scale * h12;
448
-        const r22 = scale * h22;
449
-        const r32 = scale * h32;
450
-        const r_ = [r11, r21, r31, r12, r22, r32];
451
-
452
-        const t1 = scale * h13;
453
-        const t2 = scale * h23;
454
-        const t3 = scale * h33;
455
-        const t_ = [t1, t2, t3];
456
-
457
-        // refine the pose
458
-        const r = this._refineRotation(r_);
459
-        const t = this._refineTranslation(normalizedHomography, r, t_);
460
-        //const t = t_; // faster, but less accurate
406
+        // compute a rough estimate for the scale factor
407
+        // select the sign so that t3 = tz > 0
408
+        const sign = h33 >= 0 ? 1 : -1;
409
+        let scale = sign / hnorm;
410
+
411
+        // sanity check
412
+        if(Number.isNaN(scale))
413
+            return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
414
+
415
+        // recover the rotation
416
+        let r = new Array(6) as number[];
417
+        r[0] = scale * h11;
418
+        r[1] = scale * h21;
419
+        r[2] = scale * h31;
420
+        r[3] = scale * h12;
421
+        r[4] = scale * h22;
422
+        r[5] = scale * h32;
423
+
424
+        // refine the rotation
425
+        r = this._refineRotation(r); // r is initially noisy
426
+
427
+        /*
428
+
429
+        After refining the rotation vectors, let's adjust the scale factor as
430
+        follows:
431
+
432
+        We know that [ r1 | r2 | t ] is equal to the normalized homography H up
433
+        to a non-zero scale factor s, i.e., [ r1 | r2 | t ] = s H. Let's call M
434
+        the first two columns of H, i.e., M = [ h1 | h2 ], and R = [ r1 | r2 ].
435
+        It follows that R = s M, meaning that M'R = s M'M. The trace of 2x2 M'R
436
+        is such that tr(M'R) = tr(s M'M) = s tr(M'M), which means:
437
+
438
+        s = tr(M'R) / tr(M'M) = (r1'h1 + r2'h2) / (h1'h1 + h2'h2)
439
+
440
+        (also: s^2 = det(M'R) / det(M'M))
441
+
442
+        */
443
+
444
+        // adjust the scale factor
445
+        scale = r[0] * h11 + r[1] * h21 + r[2] * h31;
446
+        scale += r[3] * h12 + r[4] * h22 + r[5] * h32;
447
+        scale /= h1norm2 + h2norm2;
448
+
449
+        // recover the translation
450
+        let t = new Array(3) as number[];
451
+        t[0] = scale * h13;
452
+        t[1] = scale * h23;
453
+        t[2] = scale * h33;
461 454
 
462 455
         // done!
463
-        return Speedy.Matrix(3, 3, r.concat(t)); // this is possibly NaN... why? homography...
456
+        return Speedy.Matrix(3, 3, r.concat(t));
464 457
     }
465 458
 
466 459
     /**
@@ -594,12 +587,6 @@ export class CameraModel
594 587
 
595 588
         */
596 589
 
597
-        const B = TRANSLATION_REFINEMENT_BUFFERS;
598
-        const n = TRANSLATION_REFINEMENT_SAMPLES;
599
-        const n3 = TRANSLATION_REFINEMENT_SAMPLES_3X;
600
-
601
-        Utils.assert(B.x.length === n);
602
-
603 590
         const h = normalizedHomography.read();
604 591
         const h11 = h[0], h12 = h[3], h13 = h[6];
605 592
         const h21 = h[1], h22 = h[4], h23 = h[7];
@@ -609,19 +596,35 @@ export class CameraModel
609 596
         const r21 = rot[1], r22 = rot[4];
610 597
         const r31 = rot[2], r32 = rot[5];
611 598
 
612
-        // get sample points (xi, yi), 0 <= i < n
613
-        const x = B.x, y = B.y;
599
+        // sample points [ xi  yi ]' in AR screen space
600
+        //const x = [ 0.5, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5, 0.0 ];
601
+        //const y = [ 0.5, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5 ];
602
+        const x = [ 0.5, 0.0, 1.0, 1.0, 0.0 ];
603
+        const y = [ 0.5, 0.0, 0.0, 1.0, 1.0 ];
604
+        const n = x.length;
605
+        const n3 = 3*n;
606
+
607
+        const width = this._screenSize.width;
608
+        const height = this._screenSize.height;
609
+        for(let i = 0; i < n; i++) {
610
+            x[i] *= width;
611
+            y[i] *= height;
612
+        }
614 613
 
615 614
         // set auxiliary values: ai = H [ xi  yi  1 ]'
616
-        const a1 = B.a1, a2 = B.a2, a3 = B.a3;
615
+        const a1 = new Array(n) as number[];
616
+        const a2 = new Array(n) as number[];
617
+        const a3 = new Array(n) as number[];
617 618
         for(let i = 0; i < n; i++) {
618 619
             a1[i] = x[i] * h11 + y[i] * h12 + h13;
619 620
             a2[i] = x[i] * h21 + y[i] * h22 + h23;
620 621
             a3[i] = x[i] * h31 + y[i] * h32 + h33;
621 622
         }
622 623
 
623
-        // solve M t = v for t; M: 3n x 3, v: 3n x 1, t: 3 x 1 (linear least squares)
624
-        const m = B.m, v = B.v;
624
+        // we'll solve M t = v for t with linear least squares
625
+        // M: 3n x 3, v: 3n x 1, t: 3 x 1
626
+        const m = new Array(3*n * 3) as number[];
627
+        const v = new Array(3*n) as number[];
625 628
         for(let i = 0, k = 0; k < n; i += 3, k++) {
626 629
             m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
627 630
             m[i+n3] = -(m[i+1] = a3[k]);
@@ -681,13 +684,22 @@ export class CameraModel
681 684
 
682 685
         */
683 686
 
687
+        // gradient descent: super lightweight implementation
688
+        const r = new Array(3*n) as number[];
689
+        const c = new Array(3) as number[];
690
+        const Mc = new Array(3*n) as number[];
691
+
684 692
         // initial guess
685
-        const t = B.t;
686
-        t[0] = t0[0]; t[1] = t0[1]; t[2] = t0[2];
693
+        const t = new Array(3) as number[];
694
+        t[0] = t0[0];
695
+        t[1] = t0[1];
696
+        t[2] = t0[2];
687 697
 
688
-        // gradient descent: super lightweight implementation
689
-        const r = B.r, c = B.c, Mc = B.Mc;
690
-        for(let it = 0; it < TRANSLATION_REFINEMENT_ITERATIONS; it++) {
698
+        // iterate
699
+        const MAX_ITERATIONS = 15;
700
+        const TOLERANCE = 1;
701
+        for(let it = 0; it < MAX_ITERATIONS; it++) {
702
+            //console.log("it",it+1);
691 703
 
692 704
             // compute residual r = Mt - v
693 705
             for(let i = 0; i < n3; i++) {
@@ -711,19 +723,25 @@ export class CameraModel
711 723
                     Mc[i] += m[j*n3 + i] * c[j];
712 724
             }
713 725
 
714
-            // compute num = c'c and den = (Mc)'(Mc)
715
-            let num = 0, den = 0;
726
+            // compute c'c
727
+            let num = 0;
716 728
             for(let i = 0; i < 3; i++)
717 729
                 num += c[i] * c[i];
730
+            //console.log("c'c=",num);
731
+            if(num < TOLERANCE)
732
+                break;
733
+
734
+            // compute (Mc)'(Mc)
735
+            let den = 0;
718 736
             for(let i = 0; i < n3; i++)
719 737
                 den += Mc[i] * Mc[i];
720 738
 
721
-            // compute num / den
739
+            // compute frc = c'c / (Mc)'(Mc)
722 740
             const frc = num / den;
723
-            if(Number.isNaN(frc))
741
+            if(Number.isNaN(frc)) // this shouldn't happen
724 742
                 break;
725 743
 
726
-            // iterate: t = t - (num / den) * c
744
+            // iterate: t = t - frc * c
727 745
             for(let i = 0; i < 3; i++)
728 746
                 t[i] -= frc * c[i];
729 747
 
@@ -839,7 +857,8 @@ export class CameraModel
839 857
         // we want the estimated partial pose [ r1 | r2 | t ] to be as close
840 858
         // as possible to the normalized homography, up to a scale factor;
841 859
         // i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
842
-        // it won't be a perfect equality due to noise in the homography
860
+        // it won't be a perfect equality due to noise in the homography.
861
+        // remark: composition of homographies
843 862
         const residual = Speedy.Matrix(normalizedHomography);
844 863
         for(let k = 0; k < POSE_ITERATIONS; k++) {
845 864
             // incrementally improve the partial pose
@@ -851,28 +870,19 @@ export class CameraModel
851 870
         }
852 871
         //console.log('-----------');
853 872
 
854
-        /*
855
-        // test
856
-        const result = Speedy.Matrix.Zeros(3);
857
-        result.setToSync(partialPose.times(normalizedHomography.inverse()));
858
-        const m11 = result.at(0,0);
859
-        result.setToSync(result.times(1/m11));
860
-        console.log("Pose * NORMALIZED HOM^-1", result.toString());
861
-        */
873
+        // refine the translation vector
874
+        const mat = partialPose.read();
875
+        const r = mat.slice(0, 6);
876
+        const t0 = mat.slice(6, 9);
877
+        const t = this._refineTranslation(normalizedHomography, r, t0);
878
+        const refinedPartialPose = Speedy.Matrix(3, 3, r.concat(t));
862 879
 
863 880
         // filter the partial pose
864
-        const filteredPartialPose = this._filterPartialPose(partialPose);
881
+        const filteredPartialPose = this._filterPartialPose(refinedPartialPose);
865 882
 
866 883
         // estimate the full pose
867
-        return this._estimateFullPose(filteredPartialPose);
868
-    }
869
-
870
-    /**
871
-     * Store an estimated pose
872
-     * @param pose 3x4 matrix
873
-     */
874
-    private _storePose(pose: SpeedyMatrix): void
875
-    {
876
-        this._extrinsics = pose.read();
884
+        //const finalPartialPose = partialPose;
885
+        const finalPartialPose = filteredPartialPose;
886
+        return this._estimateFullPose(finalPartialPose);
877 887
     }
878 888
 }

Ładowanie…
Anuluj
Zapisz