|
@@ -30,7 +30,7 @@ import { Nullable, Utils } from '../utils/utils';
|
30
|
30
|
import { Settings } from '../core/settings';
|
31
|
31
|
import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
|
32
|
32
|
|
33
|
|
-/** A guess of the horizontal field-of-view of the camera, in degrees */
|
|
33
|
+/** A guess of the horizontal field-of-view of a typical camera, in degrees */
|
34
|
34
|
const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
|
35
|
35
|
|
36
|
36
|
/** Number of iterations used to refine the estimated pose */
|
|
@@ -40,7 +40,7 @@ const POSE_ITERATIONS = 30;
|
40
|
40
|
const ROTATION_FILTER_SAMPLES = 10;
|
41
|
41
|
|
42
|
42
|
/** Number of samples used in the translation filter */
|
43
|
|
-const TRANSLATION_FILTER_SAMPLES = 10;
|
|
43
|
+const TRANSLATION_FILTER_SAMPLES = 5;
|
44
|
44
|
|
45
|
45
|
/** Convert degrees to radians */
|
46
|
46
|
const DEG2RAD = 0.017453292519943295; // pi / 180
|
|
@@ -63,35 +63,7 @@ export const U0 = 6;
|
63
|
63
|
/** Index of the vertical position of the principal point in the camera intrinsics matrix */
|
64
|
64
|
export const V0 = 7;
|
65
|
65
|
|
66
|
|
-/** Translation refinement: predefined buffers for efficiency */
|
67
|
|
-const TRANSLATION_REFINEMENT_BUFFERS = (() => {
|
68
|
|
- const l = 1.0;
|
69
|
|
- const x = [ 0, l, 0,-l, 0 ];
|
70
|
|
- const y = [-l, 0, l, 0, 0 ];
|
71
|
|
- const n = x.length;
|
72
|
|
-
|
73
|
|
- return Object.freeze({
|
74
|
|
- x, y,
|
75
|
|
- a1: new Array(n) as number[],
|
76
|
|
- a2: new Array(n) as number[],
|
77
|
|
- a3: new Array(n) as number[],
|
78
|
|
- m: new Array(3*n * 3) as number[],
|
79
|
|
- v: new Array(3*n) as number[],
|
80
|
|
- t: new Array(3) as number[],
|
81
|
|
- r: new Array(3*n) as number[],
|
82
|
|
- c: new Array(3) as number[],
|
83
|
|
- Mc: new Array(3*n) as number[],
|
84
|
|
- });
|
85
|
|
-})();
|
86
|
|
-
|
87
|
|
-/** Translation refinement: number of iterations */
|
88
|
|
-const TRANSLATION_REFINEMENT_ITERATIONS = 3; // 1; // 5;
|
89
|
|
-
|
90
|
|
-/** Translation refinement: number of samples */
|
91
|
|
-const TRANSLATION_REFINEMENT_SAMPLES = 5; // TRANSLATION_REFINEMENT_BUFFERS.x.length;
|
92
|
|
-
|
93
|
|
-/** Translation refinement: the triple of the number of samples */
|
94
|
|
-const TRANSLATION_REFINEMENT_SAMPLES_3X = 15; //3 * TRANSLATION_REFINEMENT_SAMPLES;
|
|
66
|
+
|
95
|
67
|
|
96
|
68
|
|
97
|
69
|
/**
|
|
@@ -126,8 +98,8 @@ export class CameraModel
|
126
|
98
|
{
|
127
|
99
|
this._screenSize = Speedy.Size(0, 0);
|
128
|
100
|
this._matrix = Speedy.Matrix.Eye(3, 4);
|
129
|
|
- this._intrinsics = [1,0,0,0,1,0,0,0,1]; // identity matrix
|
130
|
|
- this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // no rotation & no translation [ R | t ] = [ I | 0 ]
|
|
101
|
+ this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
|
|
102
|
+ this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
|
131
|
103
|
this._partialRotationBuffer = [];
|
132
|
104
|
this._translationBuffer = [];
|
133
|
105
|
}
|
|
@@ -147,8 +119,7 @@ export class CameraModel
|
147
|
119
|
this._screenSize.height = screenSize.height;
|
148
|
120
|
|
149
|
121
|
// reset the model
|
150
|
|
- this._resetIntrinsics();
|
151
|
|
- this._resetExtrinsics();
|
|
122
|
+ this.reset();
|
152
|
123
|
|
153
|
124
|
// log
|
154
|
125
|
Utils.log(`Initializing the camera model...`);
|
|
@@ -206,12 +177,12 @@ export class CameraModel
|
206
|
177
|
|
207
|
178
|
// estimate the pose
|
208
|
179
|
const pose = this._estimatePose(homography);
|
209
|
|
- this._storePose(pose);
|
|
180
|
+ this._extrinsics = pose.read();
|
210
|
181
|
|
211
|
182
|
// compute the camera matrix
|
212
|
183
|
const C = this.denormalizer();
|
213
|
184
|
const K = Speedy.Matrix(3, 3, this._intrinsics);
|
214
|
|
- const E = Speedy.Matrix(3, 4, this._extrinsics);
|
|
185
|
+ const E = pose; //Speedy.Matrix(3, 4, this._extrinsics);
|
215
|
186
|
this._matrix.setToSync(K.times(E).times(C));
|
216
|
187
|
//console.log("intrinsics -----------", K.toString());
|
217
|
188
|
//console.log("matrix ----------------",this._matrix.toString());
|
|
@@ -360,6 +331,7 @@ export class CameraModel
|
360
|
331
|
private _resetIntrinsics(): void
|
361
|
332
|
{
|
362
|
333
|
const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait?
|
|
334
|
+
|
363
|
335
|
const u0 = this._screenSize.width / 2;
|
364
|
336
|
const v0 = this._screenSize.height / 2;
|
365
|
337
|
const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
|
|
@@ -372,10 +344,10 @@ export class CameraModel
|
372
|
344
|
}
|
373
|
345
|
|
374
|
346
|
/**
|
375
|
|
- * Compute a normalized homography H' = K^(-1) * H for an
|
|
347
|
+ * Compute a normalized homography H^ = K^(-1) * H for an
|
376
|
348
|
* ideal pinhole with f = 1 and principal point = (0,0)
|
377
|
349
|
* @param homography homography H to be normalized
|
378
|
|
- * @returns normalized homography H'
|
|
350
|
+ * @returns normalized homography H^
|
379
|
351
|
*/
|
380
|
352
|
private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
|
381
|
353
|
{
|
|
@@ -384,9 +356,11 @@ export class CameraModel
|
384
|
356
|
const v0 = this._intrinsics[V0];
|
385
|
357
|
const fx = this._intrinsics[FX];
|
386
|
358
|
const fy = this._intrinsics[FY];
|
|
359
|
+ const u0fx = u0 / fx;
|
|
360
|
+ const v0fy = v0 / fy;
|
387
|
361
|
|
388
|
|
- const h11 = (h[0] - u0 * h[2]) / fx, h12 = (h[3] - u0 * h[5]) / fx, h13 = (h[6] - u0 * h[8]) / fx;
|
389
|
|
- const h21 = (h[1] - v0 * h[2]) / fy, h22 = (h[4] - v0 * h[5]) / fy, h23 = (h[7] - v0 * h[8]) / fy;
|
|
362
|
+ const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
|
|
363
|
+ const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
|
390
|
364
|
const h31 = h[2], h32 = h[5], h33 = h[8];
|
391
|
365
|
|
392
|
366
|
/*console.log([
|
|
@@ -414,10 +388,6 @@ export class CameraModel
|
414
|
388
|
const h21 = h[1], h22 = h[4], h23 = h[7];
|
415
|
389
|
const h31 = h[2], h32 = h[5], h33 = h[8];
|
416
|
390
|
|
417
|
|
- // select the sign so that t3 = tz > 0
|
418
|
|
- const sign = h33 >= 0 ? 1 : -1;
|
419
|
|
-
|
420
|
|
- // compute the scale factor
|
421
|
391
|
const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
|
422
|
392
|
const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
|
423
|
393
|
const h1norm = Math.sqrt(h1norm2);
|
|
@@ -425,42 +395,65 @@ export class CameraModel
|
425
|
395
|
//const hnorm = (h1norm + h2norm) / 2;
|
426
|
396
|
//const hnorm = Math.sqrt(h1norm * h2norm);
|
427
|
397
|
const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
|
428
|
|
- const scale = sign / hnorm;
|
429
|
|
-
|
430
|
|
- // invalid homography?
|
431
|
|
- if(Number.isNaN(scale))
|
432
|
|
- return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
|
433
|
398
|
|
434
|
399
|
// we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
|
435
|
400
|
// if h1norm is not approximately h2norm, it means that the first two columns of
|
436
|
401
|
// the normalized homography are not really encoding a rotation (up to a scale)
|
437
|
|
- // what is causing this? does h3 (and h33) tell us anything about it?
|
438
|
|
- // what about the intrinsics matrix? the principal point...? the fov...?
|
439
|
402
|
|
440
|
403
|
//console.log("h1,h2",h1norm,h2norm);
|
441
|
404
|
//console.log(normalizedHomography.toString());
|
442
|
405
|
|
443
|
|
- // recover the pose
|
444
|
|
- const r11 = scale * h11;
|
445
|
|
- const r21 = scale * h21;
|
446
|
|
- const r31 = scale * h31;
|
447
|
|
- const r12 = scale * h12;
|
448
|
|
- const r22 = scale * h22;
|
449
|
|
- const r32 = scale * h32;
|
450
|
|
- const r_ = [r11, r21, r31, r12, r22, r32];
|
451
|
|
-
|
452
|
|
- const t1 = scale * h13;
|
453
|
|
- const t2 = scale * h23;
|
454
|
|
- const t3 = scale * h33;
|
455
|
|
- const t_ = [t1, t2, t3];
|
456
|
|
-
|
457
|
|
- // refine the pose
|
458
|
|
- const r = this._refineRotation(r_);
|
459
|
|
- const t = this._refineTranslation(normalizedHomography, r, t_);
|
460
|
|
- //const t = t_; // faster, but less accurate
|
|
406
|
+ // compute a rough estimate for the scale factor
|
|
407
|
+ // select the sign so that t3 = tz > 0
|
|
408
|
+ const sign = h33 >= 0 ? 1 : -1;
|
|
409
|
+ let scale = sign / hnorm;
|
|
410
|
+
|
|
411
|
+ // sanity check
|
|
412
|
+ if(Number.isNaN(scale))
|
|
413
|
+ return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
|
|
414
|
+
|
|
415
|
+ // recover the rotation
|
|
416
|
+ let r = new Array(6) as number[];
|
|
417
|
+ r[0] = scale * h11;
|
|
418
|
+ r[1] = scale * h21;
|
|
419
|
+ r[2] = scale * h31;
|
|
420
|
+ r[3] = scale * h12;
|
|
421
|
+ r[4] = scale * h22;
|
|
422
|
+ r[5] = scale * h32;
|
|
423
|
+
|
|
424
|
+ // refine the rotation
|
|
425
|
+ r = this._refineRotation(r); // r is initially noisy
|
|
426
|
+
|
|
427
|
+ /*
|
|
428
|
+
|
|
429
|
+ After refining the rotation vectors, let's adjust the scale factor as
|
|
430
|
+ follows:
|
|
431
|
+
|
|
432
|
+ We know that [ r1 | r2 | t ] is equal to the normalized homography H up
|
|
433
|
+ to a non-zero scale factor s, i.e., [ r1 | r2 | t ] = s H. Let's call M
|
|
434
|
+ the first two columns of H, i.e., M = [ h1 | h2 ], and R = [ r1 | r2 ].
|
|
435
|
+ It follows that R = s M, meaning that M'R = s M'M. The trace of 2x2 M'R
|
|
436
|
+ is such that tr(M'R) = tr(s M'M) = s tr(M'M), which means:
|
|
437
|
+
|
|
438
|
+ s = tr(M'R) / tr(M'M) = (r1'h1 + r2'h2) / (h1'h1 + h2'h2)
|
|
439
|
+
|
|
440
|
+ (also: s^2 = det(M'R) / det(M'M))
|
|
441
|
+
|
|
442
|
+ */
|
|
443
|
+
|
|
444
|
+ // adjust the scale factor
|
|
445
|
+ scale = r[0] * h11 + r[1] * h21 + r[2] * h31;
|
|
446
|
+ scale += r[3] * h12 + r[4] * h22 + r[5] * h32;
|
|
447
|
+ scale /= h1norm2 + h2norm2;
|
|
448
|
+
|
|
449
|
+ // recover the translation
|
|
450
|
+ let t = new Array(3) as number[];
|
|
451
|
+ t[0] = scale * h13;
|
|
452
|
+ t[1] = scale * h23;
|
|
453
|
+ t[2] = scale * h33;
|
461
|
454
|
|
462
|
455
|
// done!
|
463
|
|
- return Speedy.Matrix(3, 3, r.concat(t)); // this is possibly NaN... why? homography...
|
|
456
|
+ return Speedy.Matrix(3, 3, r.concat(t));
|
464
|
457
|
}
|
465
|
458
|
|
466
|
459
|
/**
|
|
@@ -594,12 +587,6 @@ export class CameraModel
|
594
|
587
|
|
595
|
588
|
*/
|
596
|
589
|
|
597
|
|
- const B = TRANSLATION_REFINEMENT_BUFFERS;
|
598
|
|
- const n = TRANSLATION_REFINEMENT_SAMPLES;
|
599
|
|
- const n3 = TRANSLATION_REFINEMENT_SAMPLES_3X;
|
600
|
|
-
|
601
|
|
- Utils.assert(B.x.length === n);
|
602
|
|
-
|
603
|
590
|
const h = normalizedHomography.read();
|
604
|
591
|
const h11 = h[0], h12 = h[3], h13 = h[6];
|
605
|
592
|
const h21 = h[1], h22 = h[4], h23 = h[7];
|
|
@@ -609,19 +596,35 @@ export class CameraModel
|
609
|
596
|
const r21 = rot[1], r22 = rot[4];
|
610
|
597
|
const r31 = rot[2], r32 = rot[5];
|
611
|
598
|
|
612
|
|
- // get sample points (xi, yi), 0 <= i < n
|
613
|
|
- const x = B.x, y = B.y;
|
|
599
|
+ // sample points [ xi yi ]' in AR screen space
|
|
600
|
+ //const x = [ 0.5, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5, 0.0 ];
|
|
601
|
+ //const y = [ 0.5, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5 ];
|
|
602
|
+ const x = [ 0.5, 0.0, 1.0, 1.0, 0.0 ];
|
|
603
|
+ const y = [ 0.5, 0.0, 0.0, 1.0, 1.0 ];
|
|
604
|
+ const n = x.length;
|
|
605
|
+ const n3 = 3*n;
|
|
606
|
+
|
|
607
|
+ const width = this._screenSize.width;
|
|
608
|
+ const height = this._screenSize.height;
|
|
609
|
+ for(let i = 0; i < n; i++) {
|
|
610
|
+ x[i] *= width;
|
|
611
|
+ y[i] *= height;
|
|
612
|
+ }
|
614
|
613
|
|
615
|
614
|
// set auxiliary values: ai = H [ xi yi 1 ]'
|
616
|
|
- const a1 = B.a1, a2 = B.a2, a3 = B.a3;
|
|
615
|
+ const a1 = new Array(n) as number[];
|
|
616
|
+ const a2 = new Array(n) as number[];
|
|
617
|
+ const a3 = new Array(n) as number[];
|
617
|
618
|
for(let i = 0; i < n; i++) {
|
618
|
619
|
a1[i] = x[i] * h11 + y[i] * h12 + h13;
|
619
|
620
|
a2[i] = x[i] * h21 + y[i] * h22 + h23;
|
620
|
621
|
a3[i] = x[i] * h31 + y[i] * h32 + h33;
|
621
|
622
|
}
|
622
|
623
|
|
623
|
|
- // solve M t = v for t; M: 3n x 3, v: 3n x 1, t: 3 x 1 (linear least squares)
|
624
|
|
- const m = B.m, v = B.v;
|
|
624
|
+ // we'll solve M t = v for t with linear least squares
|
|
625
|
+ // M: 3n x 3, v: 3n x 1, t: 3 x 1
|
|
626
|
+ const m = new Array(3*n * 3) as number[];
|
|
627
|
+ const v = new Array(3*n) as number[];
|
625
|
628
|
for(let i = 0, k = 0; k < n; i += 3, k++) {
|
626
|
629
|
m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
|
627
|
630
|
m[i+n3] = -(m[i+1] = a3[k]);
|
|
@@ -681,13 +684,22 @@ export class CameraModel
|
681
|
684
|
|
682
|
685
|
*/
|
683
|
686
|
|
|
687
|
+ // gradient descent: super lightweight implementation
|
|
688
|
+ const r = new Array(3*n) as number[];
|
|
689
|
+ const c = new Array(3) as number[];
|
|
690
|
+ const Mc = new Array(3*n) as number[];
|
|
691
|
+
|
684
|
692
|
// initial guess
|
685
|
|
- const t = B.t;
|
686
|
|
- t[0] = t0[0]; t[1] = t0[1]; t[2] = t0[2];
|
|
693
|
+ const t = new Array(3) as number[];
|
|
694
|
+ t[0] = t0[0];
|
|
695
|
+ t[1] = t0[1];
|
|
696
|
+ t[2] = t0[2];
|
687
|
697
|
|
688
|
|
- // gradient descent: super lightweight implementation
|
689
|
|
- const r = B.r, c = B.c, Mc = B.Mc;
|
690
|
|
- for(let it = 0; it < TRANSLATION_REFINEMENT_ITERATIONS; it++) {
|
|
698
|
+ // iterate
|
|
699
|
+ const MAX_ITERATIONS = 15;
|
|
700
|
+ const TOLERANCE = 1;
|
|
701
|
+ for(let it = 0; it < MAX_ITERATIONS; it++) {
|
|
702
|
+ //console.log("it",it+1);
|
691
|
703
|
|
692
|
704
|
// compute residual r = Mt - v
|
693
|
705
|
for(let i = 0; i < n3; i++) {
|
|
@@ -711,19 +723,25 @@ export class CameraModel
|
711
|
723
|
Mc[i] += m[j*n3 + i] * c[j];
|
712
|
724
|
}
|
713
|
725
|
|
714
|
|
- // compute num = c'c and den = (Mc)'(Mc)
|
715
|
|
- let num = 0, den = 0;
|
|
726
|
+ // compute c'c
|
|
727
|
+ let num = 0;
|
716
|
728
|
for(let i = 0; i < 3; i++)
|
717
|
729
|
num += c[i] * c[i];
|
|
730
|
+ //console.log("c'c=",num);
|
|
731
|
+ if(num < TOLERANCE)
|
|
732
|
+ break;
|
|
733
|
+
|
|
734
|
+ // compute (Mc)'(Mc)
|
|
735
|
+ let den = 0;
|
718
|
736
|
for(let i = 0; i < n3; i++)
|
719
|
737
|
den += Mc[i] * Mc[i];
|
720
|
738
|
|
721
|
|
- // compute num / den
|
|
739
|
+ // compute frc = c'c / (Mc)'(Mc)
|
722
|
740
|
const frc = num / den;
|
723
|
|
- if(Number.isNaN(frc))
|
|
741
|
+ if(Number.isNaN(frc)) // this shouldn't happen
|
724
|
742
|
break;
|
725
|
743
|
|
726
|
|
- // iterate: t = t - (num / den) * c
|
|
744
|
+ // iterate: t = t - frc * c
|
727
|
745
|
for(let i = 0; i < 3; i++)
|
728
|
746
|
t[i] -= frc * c[i];
|
729
|
747
|
|
|
@@ -839,7 +857,8 @@ export class CameraModel
|
839
|
857
|
// we want the estimated partial pose [ r1 | r2 | t ] to be as close
|
840
|
858
|
// as possible to the normalized homography, up to a scale factor;
|
841
|
859
|
// i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
|
842
|
|
- // it won't be a perfect equality due to noise in the homography
|
|
860
|
+ // it won't be a perfect equality due to noise in the homography.
|
|
861
|
+ // remark: composition of homographies
|
843
|
862
|
const residual = Speedy.Matrix(normalizedHomography);
|
844
|
863
|
for(let k = 0; k < POSE_ITERATIONS; k++) {
|
845
|
864
|
// incrementally improve the partial pose
|
|
@@ -851,28 +870,19 @@ export class CameraModel
|
851
|
870
|
}
|
852
|
871
|
//console.log('-----------');
|
853
|
872
|
|
854
|
|
- /*
|
855
|
|
- // test
|
856
|
|
- const result = Speedy.Matrix.Zeros(3);
|
857
|
|
- result.setToSync(partialPose.times(normalizedHomography.inverse()));
|
858
|
|
- const m11 = result.at(0,0);
|
859
|
|
- result.setToSync(result.times(1/m11));
|
860
|
|
- console.log("Pose * NORMALIZED HOM^-1", result.toString());
|
861
|
|
- */
|
|
873
|
+ // refine the translation vector
|
|
874
|
+ const mat = partialPose.read();
|
|
875
|
+ const r = mat.slice(0, 6);
|
|
876
|
+ const t0 = mat.slice(6, 9);
|
|
877
|
+ const t = this._refineTranslation(normalizedHomography, r, t0);
|
|
878
|
+ const refinedPartialPose = Speedy.Matrix(3, 3, r.concat(t));
|
862
|
879
|
|
863
|
880
|
// filter the partial pose
|
864
|
|
- const filteredPartialPose = this._filterPartialPose(partialPose);
|
|
881
|
+ const filteredPartialPose = this._filterPartialPose(refinedPartialPose);
|
865
|
882
|
|
866
|
883
|
// estimate the full pose
|
867
|
|
- return this._estimateFullPose(filteredPartialPose);
|
868
|
|
- }
|
869
|
|
-
|
870
|
|
- /**
|
871
|
|
- * Store an estimated pose
|
872
|
|
- * @param pose 3x4 matrix
|
873
|
|
- */
|
874
|
|
- private _storePose(pose: SpeedyMatrix): void
|
875
|
|
- {
|
876
|
|
- this._extrinsics = pose.read();
|
|
884
|
+ //const finalPartialPose = partialPose;
|
|
885
|
+ const finalPartialPose = filteredPartialPose;
|
|
886
|
+ return this._estimateFullPose(finalPartialPose);
|
877
|
887
|
}
|
878
|
888
|
}
|