소스 검색

Improve the robustness of the pose. Related to #9

customisations
alemart 1 년 전
부모
커밋
4e55d6b4fd
1개의 변경된 파일122개의 추가작업 그리고 112개의 파일을 삭제
  1. 122
    112
      src/geometry/camera-model.ts

+ 122
- 112
src/geometry/camera-model.ts 파일 보기

30
 import { Settings } from '../core/settings';
30
 import { Settings } from '../core/settings';
31
 import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
31
 import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
32
 
32
 
33
-/** A guess of the horizontal field-of-view of the camera, in degrees */
33
+/** A guess of the horizontal field-of-view of a typical camera, in degrees */
34
 const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
34
 const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
35
 
35
 
36
 /** Number of iterations used to refine the estimated pose */
36
 /** Number of iterations used to refine the estimated pose */
40
 const ROTATION_FILTER_SAMPLES = 10;
40
 const ROTATION_FILTER_SAMPLES = 10;
41
 
41
 
42
 /** Number of samples used in the translation filter */
42
 /** Number of samples used in the translation filter */
43
-const TRANSLATION_FILTER_SAMPLES = 10;
43
+const TRANSLATION_FILTER_SAMPLES = 5;
44
 
44
 
45
 /** Convert degrees to radians */
45
 /** Convert degrees to radians */
46
 const DEG2RAD = 0.017453292519943295; // pi / 180
46
 const DEG2RAD = 0.017453292519943295; // pi / 180
63
 /** Index of the vertical position of the principal point in the camera intrinsics matrix */
63
 /** Index of the vertical position of the principal point in the camera intrinsics matrix */
64
 export const V0 = 7;
64
 export const V0 = 7;
65
 
65
 
66
-/** Translation refinement: predefined buffers for efficiency */
67
-const TRANSLATION_REFINEMENT_BUFFERS = (() => {
68
-    const l = 1.0;
69
-    const x = [ 0, l, 0,-l, 0 ];
70
-    const y = [-l, 0, l, 0, 0 ];
71
-    const n = x.length;
72
-
73
-    return Object.freeze({
74
-        x, y,
75
-        a1: new Array(n) as number[],
76
-        a2: new Array(n) as number[],
77
-        a3: new Array(n) as number[],
78
-        m:  new Array(3*n * 3) as number[],
79
-        v:  new Array(3*n) as number[],
80
-        t:  new Array(3) as number[],
81
-        r:  new Array(3*n) as number[],
82
-        c:  new Array(3) as number[],
83
-        Mc: new Array(3*n) as number[],
84
-    });
85
-})();
86
-
87
-/** Translation refinement: number of iterations */
88
-const TRANSLATION_REFINEMENT_ITERATIONS = 3; // 1; // 5;
89
-
90
-/** Translation refinement: number of samples */
91
-const TRANSLATION_REFINEMENT_SAMPLES = 5; // TRANSLATION_REFINEMENT_BUFFERS.x.length;
92
-
93
-/** Translation refinement: the triple of the number of samples */
94
-const TRANSLATION_REFINEMENT_SAMPLES_3X = 15; //3 * TRANSLATION_REFINEMENT_SAMPLES;
66
+
95
 
67
 
96
 
68
 
97
 /**
69
 /**
126
     {
98
     {
127
         this._screenSize = Speedy.Size(0, 0);
99
         this._screenSize = Speedy.Size(0, 0);
128
         this._matrix = Speedy.Matrix.Eye(3, 4);
100
         this._matrix = Speedy.Matrix.Eye(3, 4);
129
-        this._intrinsics = [1,0,0,0,1,0,0,0,1]; // identity matrix
130
-        this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // no rotation & no translation [ R | t ] = [ I | 0 ]
101
+        this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
102
+        this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
131
         this._partialRotationBuffer = [];
103
         this._partialRotationBuffer = [];
132
         this._translationBuffer = [];
104
         this._translationBuffer = [];
133
     }
105
     }
147
         this._screenSize.height = screenSize.height;
119
         this._screenSize.height = screenSize.height;
148
 
120
 
149
         // reset the model
121
         // reset the model
150
-        this._resetIntrinsics();
151
-        this._resetExtrinsics();
122
+        this.reset();
152
 
123
 
153
         // log
124
         // log
154
         Utils.log(`Initializing the camera model...`);
125
         Utils.log(`Initializing the camera model...`);
206
 
177
 
207
         // estimate the pose
178
         // estimate the pose
208
         const pose = this._estimatePose(homography);
179
         const pose = this._estimatePose(homography);
209
-        this._storePose(pose);
180
+        this._extrinsics = pose.read();
210
 
181
 
211
         // compute the camera matrix
182
         // compute the camera matrix
212
         const C = this.denormalizer();
183
         const C = this.denormalizer();
213
         const K = Speedy.Matrix(3, 3, this._intrinsics);
184
         const K = Speedy.Matrix(3, 3, this._intrinsics);
214
-        const E = Speedy.Matrix(3, 4, this._extrinsics);
185
+        const E = pose; //Speedy.Matrix(3, 4, this._extrinsics);
215
         this._matrix.setToSync(K.times(E).times(C));
186
         this._matrix.setToSync(K.times(E).times(C));
216
         //console.log("intrinsics -----------", K.toString());
187
         //console.log("intrinsics -----------", K.toString());
217
         //console.log("matrix ----------------",this._matrix.toString());
188
         //console.log("matrix ----------------",this._matrix.toString());
360
     private _resetIntrinsics(): void
331
     private _resetIntrinsics(): void
361
     {
332
     {
362
         const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait?
333
         const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait?
334
+
363
         const u0 = this._screenSize.width / 2;
335
         const u0 = this._screenSize.width / 2;
364
         const v0 = this._screenSize.height / 2;
336
         const v0 = this._screenSize.height / 2;
365
         const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
337
         const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
372
     }
344
     }
373
 
345
 
374
     /**
346
     /**
375
-     * Compute a normalized homography H' = K^(-1) * H for an
347
+     * Compute a normalized homography H^ = K^(-1) * H for an
376
      * ideal pinhole with f = 1 and principal point = (0,0)
348
      * ideal pinhole with f = 1 and principal point = (0,0)
377
      * @param homography homography H to be normalized
349
      * @param homography homography H to be normalized
378
-     * @returns normalized homography H'
350
+     * @returns normalized homography H^
379
      */
351
      */
380
     private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
352
     private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
381
     {
353
     {
384
         const v0 = this._intrinsics[V0];
356
         const v0 = this._intrinsics[V0];
385
         const fx = this._intrinsics[FX];
357
         const fx = this._intrinsics[FX];
386
         const fy = this._intrinsics[FY];
358
         const fy = this._intrinsics[FY];
359
+        const u0fx = u0 / fx;
360
+        const v0fy = v0 / fy;
387
 
361
 
388
-        const h11 = (h[0] - u0 * h[2]) / fx, h12 = (h[3] - u0 * h[5]) / fx, h13 = (h[6] - u0 * h[8]) / fx;
389
-        const h21 = (h[1] - v0 * h[2]) / fy, h22 = (h[4] - v0 * h[5]) / fy, h23 = (h[7] - v0 * h[8]) / fy;
362
+        const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
363
+        const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
390
         const h31 = h[2], h32 = h[5], h33 = h[8];
364
         const h31 = h[2], h32 = h[5], h33 = h[8];
391
 
365
 
392
         /*console.log([
366
         /*console.log([
414
         const h21 = h[1], h22 = h[4], h23 = h[7];
388
         const h21 = h[1], h22 = h[4], h23 = h[7];
415
         const h31 = h[2], h32 = h[5], h33 = h[8];
389
         const h31 = h[2], h32 = h[5], h33 = h[8];
416
 
390
 
417
-        // select the sign so that t3 = tz > 0
418
-        const sign = h33 >= 0 ? 1 : -1;
419
-
420
-        // compute the scale factor
421
         const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
391
         const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
422
         const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
392
         const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
423
         const h1norm = Math.sqrt(h1norm2);
393
         const h1norm = Math.sqrt(h1norm2);
425
         //const hnorm = (h1norm + h2norm) / 2;
395
         //const hnorm = (h1norm + h2norm) / 2;
426
         //const hnorm = Math.sqrt(h1norm * h2norm);
396
         //const hnorm = Math.sqrt(h1norm * h2norm);
427
         const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
397
         const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
428
-        const scale = sign / hnorm;
429
-
430
-        // invalid homography?
431
-        if(Number.isNaN(scale))
432
-            return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
433
 
398
 
434
         // we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
399
         // we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
435
         // if h1norm is not approximately h2norm, it means that the first two columns of
400
         // if h1norm is not approximately h2norm, it means that the first two columns of
436
         // the normalized homography are not really encoding a rotation (up to a scale)
401
         // the normalized homography are not really encoding a rotation (up to a scale)
437
-        // what is causing this? does h3 (and h33) tell us anything about it?
438
-        // what about the intrinsics matrix? the principal point...? the fov...?
439
 
402
 
440
         //console.log("h1,h2",h1norm,h2norm);
403
         //console.log("h1,h2",h1norm,h2norm);
441
         //console.log(normalizedHomography.toString());
404
         //console.log(normalizedHomography.toString());
442
 
405
 
443
-        // recover the pose
444
-        const r11 = scale * h11;
445
-        const r21 = scale * h21;
446
-        const r31 = scale * h31;
447
-        const r12 = scale * h12;
448
-        const r22 = scale * h22;
449
-        const r32 = scale * h32;
450
-        const r_ = [r11, r21, r31, r12, r22, r32];
451
-
452
-        const t1 = scale * h13;
453
-        const t2 = scale * h23;
454
-        const t3 = scale * h33;
455
-        const t_ = [t1, t2, t3];
456
-
457
-        // refine the pose
458
-        const r = this._refineRotation(r_);
459
-        const t = this._refineTranslation(normalizedHomography, r, t_);
460
-        //const t = t_; // faster, but less accurate
406
+        // compute a rough estimate for the scale factor
407
+        // select the sign so that t3 = tz > 0
408
+        const sign = h33 >= 0 ? 1 : -1;
409
+        let scale = sign / hnorm;
410
+
411
+        // sanity check
412
+        if(Number.isNaN(scale))
413
+            return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
414
+
415
+        // recover the rotation
416
+        let r = new Array(6) as number[];
417
+        r[0] = scale * h11;
418
+        r[1] = scale * h21;
419
+        r[2] = scale * h31;
420
+        r[3] = scale * h12;
421
+        r[4] = scale * h22;
422
+        r[5] = scale * h32;
423
+
424
+        // refine the rotation
425
+        r = this._refineRotation(r); // r is initially noisy
426
+
427
+        /*
428
+
429
+        After refining the rotation vectors, let's adjust the scale factor as
430
+        follows:
431
+
432
+        We know that [ r1 | r2 | t ] is equal to the normalized homography H up
433
+        to a non-zero scale factor s, i.e., [ r1 | r2 | t ] = s H. Let's call M
434
+        the first two columns of H, i.e., M = [ h1 | h2 ], and R = [ r1 | r2 ].
435
+        It follows that R = s M, meaning that M'R = s M'M. The trace of 2x2 M'R
436
+        is such that tr(M'R) = tr(s M'M) = s tr(M'M), which means:
437
+
438
+        s = tr(M'R) / tr(M'M) = (r1'h1 + r2'h2) / (h1'h1 + h2'h2)
439
+
440
+        (also: s^2 = det(M'R) / det(M'M))
441
+
442
+        */
443
+
444
+        // adjust the scale factor
445
+        scale = r[0] * h11 + r[1] * h21 + r[2] * h31;
446
+        scale += r[3] * h12 + r[4] * h22 + r[5] * h32;
447
+        scale /= h1norm2 + h2norm2;
448
+
449
+        // recover the translation
450
+        let t = new Array(3) as number[];
451
+        t[0] = scale * h13;
452
+        t[1] = scale * h23;
453
+        t[2] = scale * h33;
461
 
454
 
462
         // done!
455
         // done!
463
-        return Speedy.Matrix(3, 3, r.concat(t)); // this is possibly NaN... why? homography...
456
+        return Speedy.Matrix(3, 3, r.concat(t));
464
     }
457
     }
465
 
458
 
466
     /**
459
     /**
594
 
587
 
595
         */
588
         */
596
 
589
 
597
-        const B = TRANSLATION_REFINEMENT_BUFFERS;
598
-        const n = TRANSLATION_REFINEMENT_SAMPLES;
599
-        const n3 = TRANSLATION_REFINEMENT_SAMPLES_3X;
600
-
601
-        Utils.assert(B.x.length === n);
602
-
603
         const h = normalizedHomography.read();
590
         const h = normalizedHomography.read();
604
         const h11 = h[0], h12 = h[3], h13 = h[6];
591
         const h11 = h[0], h12 = h[3], h13 = h[6];
605
         const h21 = h[1], h22 = h[4], h23 = h[7];
592
         const h21 = h[1], h22 = h[4], h23 = h[7];
609
         const r21 = rot[1], r22 = rot[4];
596
         const r21 = rot[1], r22 = rot[4];
610
         const r31 = rot[2], r32 = rot[5];
597
         const r31 = rot[2], r32 = rot[5];
611
 
598
 
612
-        // get sample points (xi, yi), 0 <= i < n
613
-        const x = B.x, y = B.y;
599
+        // sample points [ xi  yi ]' in AR screen space
600
+        //const x = [ 0.5, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5, 0.0 ];
601
+        //const y = [ 0.5, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5 ];
602
+        const x = [ 0.5, 0.0, 1.0, 1.0, 0.0 ];
603
+        const y = [ 0.5, 0.0, 0.0, 1.0, 1.0 ];
604
+        const n = x.length;
605
+        const n3 = 3*n;
606
+
607
+        const width = this._screenSize.width;
608
+        const height = this._screenSize.height;
609
+        for(let i = 0; i < n; i++) {
610
+            x[i] *= width;
611
+            y[i] *= height;
612
+        }
614
 
613
 
615
         // set auxiliary values: ai = H [ xi  yi  1 ]'
614
         // set auxiliary values: ai = H [ xi  yi  1 ]'
616
-        const a1 = B.a1, a2 = B.a2, a3 = B.a3;
615
+        const a1 = new Array(n) as number[];
616
+        const a2 = new Array(n) as number[];
617
+        const a3 = new Array(n) as number[];
617
         for(let i = 0; i < n; i++) {
618
         for(let i = 0; i < n; i++) {
618
             a1[i] = x[i] * h11 + y[i] * h12 + h13;
619
             a1[i] = x[i] * h11 + y[i] * h12 + h13;
619
             a2[i] = x[i] * h21 + y[i] * h22 + h23;
620
             a2[i] = x[i] * h21 + y[i] * h22 + h23;
620
             a3[i] = x[i] * h31 + y[i] * h32 + h33;
621
             a3[i] = x[i] * h31 + y[i] * h32 + h33;
621
         }
622
         }
622
 
623
 
623
-        // solve M t = v for t; M: 3n x 3, v: 3n x 1, t: 3 x 1 (linear least squares)
624
-        const m = B.m, v = B.v;
624
+        // we'll solve M t = v for t with linear least squares
625
+        // M: 3n x 3, v: 3n x 1, t: 3 x 1
626
+        const m = new Array(3*n * 3) as number[];
627
+        const v = new Array(3*n) as number[];
625
         for(let i = 0, k = 0; k < n; i += 3, k++) {
628
         for(let i = 0, k = 0; k < n; i += 3, k++) {
626
             m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
629
             m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
627
             m[i+n3] = -(m[i+1] = a3[k]);
630
             m[i+n3] = -(m[i+1] = a3[k]);
681
 
684
 
682
         */
685
         */
683
 
686
 
687
+        // gradient descent: super lightweight implementation
688
+        const r = new Array(3*n) as number[];
689
+        const c = new Array(3) as number[];
690
+        const Mc = new Array(3*n) as number[];
691
+
684
         // initial guess
692
         // initial guess
685
-        const t = B.t;
686
-        t[0] = t0[0]; t[1] = t0[1]; t[2] = t0[2];
693
+        const t = new Array(3) as number[];
694
+        t[0] = t0[0];
695
+        t[1] = t0[1];
696
+        t[2] = t0[2];
687
 
697
 
688
-        // gradient descent: super lightweight implementation
689
-        const r = B.r, c = B.c, Mc = B.Mc;
690
-        for(let it = 0; it < TRANSLATION_REFINEMENT_ITERATIONS; it++) {
698
+        // iterate
699
+        const MAX_ITERATIONS = 15;
700
+        const TOLERANCE = 1;
701
+        for(let it = 0; it < MAX_ITERATIONS; it++) {
702
+            //console.log("it",it+1);
691
 
703
 
692
             // compute residual r = Mt - v
704
             // compute residual r = Mt - v
693
             for(let i = 0; i < n3; i++) {
705
             for(let i = 0; i < n3; i++) {
711
                     Mc[i] += m[j*n3 + i] * c[j];
723
                     Mc[i] += m[j*n3 + i] * c[j];
712
             }
724
             }
713
 
725
 
714
-            // compute num = c'c and den = (Mc)'(Mc)
715
-            let num = 0, den = 0;
726
+            // compute c'c
727
+            let num = 0;
716
             for(let i = 0; i < 3; i++)
728
             for(let i = 0; i < 3; i++)
717
                 num += c[i] * c[i];
729
                 num += c[i] * c[i];
730
+            //console.log("c'c=",num);
731
+            if(num < TOLERANCE)
732
+                break;
733
+
734
+            // compute (Mc)'(Mc)
735
+            let den = 0;
718
             for(let i = 0; i < n3; i++)
736
             for(let i = 0; i < n3; i++)
719
                 den += Mc[i] * Mc[i];
737
                 den += Mc[i] * Mc[i];
720
 
738
 
721
-            // compute num / den
739
+            // compute frc = c'c / (Mc)'(Mc)
722
             const frc = num / den;
740
             const frc = num / den;
723
-            if(Number.isNaN(frc))
741
+            if(Number.isNaN(frc)) // this shouldn't happen
724
                 break;
742
                 break;
725
 
743
 
726
-            // iterate: t = t - (num / den) * c
744
+            // iterate: t = t - frc * c
727
             for(let i = 0; i < 3; i++)
745
             for(let i = 0; i < 3; i++)
728
                 t[i] -= frc * c[i];
746
                 t[i] -= frc * c[i];
729
 
747
 
839
         // we want the estimated partial pose [ r1 | r2 | t ] to be as close
857
         // we want the estimated partial pose [ r1 | r2 | t ] to be as close
840
         // as possible to the normalized homography, up to a scale factor;
858
         // as possible to the normalized homography, up to a scale factor;
841
         // i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
859
         // i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
842
-        // it won't be a perfect equality due to noise in the homography
860
+        // it won't be a perfect equality due to noise in the homography.
861
+        // remark: composition of homographies
843
         const residual = Speedy.Matrix(normalizedHomography);
862
         const residual = Speedy.Matrix(normalizedHomography);
844
         for(let k = 0; k < POSE_ITERATIONS; k++) {
863
         for(let k = 0; k < POSE_ITERATIONS; k++) {
845
             // incrementally improve the partial pose
864
             // incrementally improve the partial pose
851
         }
870
         }
852
         //console.log('-----------');
871
         //console.log('-----------');
853
 
872
 
854
-        /*
855
-        // test
856
-        const result = Speedy.Matrix.Zeros(3);
857
-        result.setToSync(partialPose.times(normalizedHomography.inverse()));
858
-        const m11 = result.at(0,0);
859
-        result.setToSync(result.times(1/m11));
860
-        console.log("Pose * NORMALIZED HOM^-1", result.toString());
861
-        */
873
+        // refine the translation vector
874
+        const mat = partialPose.read();
875
+        const r = mat.slice(0, 6);
876
+        const t0 = mat.slice(6, 9);
877
+        const t = this._refineTranslation(normalizedHomography, r, t0);
878
+        const refinedPartialPose = Speedy.Matrix(3, 3, r.concat(t));
862
 
879
 
863
         // filter the partial pose
880
         // filter the partial pose
864
-        const filteredPartialPose = this._filterPartialPose(partialPose);
881
+        const filteredPartialPose = this._filterPartialPose(refinedPartialPose);
865
 
882
 
866
         // estimate the full pose
883
         // estimate the full pose
867
-        return this._estimateFullPose(filteredPartialPose);
868
-    }
869
-
870
-    /**
871
-     * Store an estimated pose
872
-     * @param pose 3x4 matrix
873
-     */
874
-    private _storePose(pose: SpeedyMatrix): void
875
-    {
876
-        this._extrinsics = pose.read();
884
+        //const finalPartialPose = partialPose;
885
+        const finalPartialPose = filteredPartialPose;
886
+        return this._estimateFullPose(finalPartialPose);
877
     }
887
     }
878
 }
888
 }

Loading…
취소
저장