You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

camera-model.ts 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891
  1. /*
  2. * encantar.js
  3. * GPU-accelerated Augmented Reality for the web
  4. * Copyright (C) 2022-2025 Alexandre Martins <alemartf(at)gmail.com>
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published
  8. * by the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. *
  19. * camera-model.ts
  20. * Camera model
  21. */
  22. import Speedy from 'speedy-vision';
  23. import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
  24. import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
  25. import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
  26. import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
  27. import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
  28. import { Nullable, Utils } from '../utils/utils';
  29. import { Settings } from '../core/settings';
  30. import { PoseFilter } from './pose-filter';
  31. import { NumericalError } from '../utils/errors';
  32. /** A guess of the horizontal field-of-view of a typical camera, in degrees */
  33. const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
  34. /** The default scale of the image plane. The scale affects the focal length */
  35. const DEFAULT_SCALE = 2; // the length of the [-1,+1] interval
  36. /** Convert degrees to radians */
  37. const DEG2RAD = 0.017453292519943295; // pi / 180
  38. /** Convert radians to degrees */
  39. const RAD2DEG = 57.29577951308232; // 180 / pi
  40. /** Numerical tolerance */
  41. const EPSILON = 1e-6;
  42. /** Index of the horizontal focal length in the camera intrinsics matrix (column-major format) */
  43. const FX = 0;
  44. /** Index of the vertical focal length in the camera intrinsics matrix */
  45. const FY = 4;
  46. /** Index of the horizontal position of the principal point in the camera intrinsics matrix */
  47. const U0 = 6;
  48. /** Index of the vertical position of the principal point in the camera intrinsics matrix */
  49. const V0 = 7;
  50. /** Number of iterations used to refine the estimated pose */
  51. const POSE_REFINEMENT_ITERATIONS = 30;
  52. /** Maximum number of iterations used when refining the translation vector */
  53. const TRANSLATION_REFINEMENT_ITERATIONS = 15;
  54. /** Tolerance used to exit early when refining the translation vector */
  55. const TRANSLATION_REFINEMENT_TOLERANCE = DEFAULT_SCALE * 0.01;
  56. /** Size of the grid used to refine the translation vector */
  57. const TRANSLATION_REFINEMENT_GRIDSIZE = 5; //3;
  58. /**
  59. * Camera model
  60. */
  61. export class CameraModel
  62. {
  63. /** size of the image plane */
  64. private _imageSize: SpeedySize;
  65. /** 3x4 camera matrix */
  66. private _matrix: SpeedyMatrix;
  67. /** a helper to switch the handedness of a coordinate system */
  68. private _flipZ: SpeedyMatrix;
  69. /** entries of the intrinsics matrix in column-major format */
  70. private _intrinsics: number[];
  71. /** entries of the extrinsics matrix in column-major format */
  72. private _extrinsics: number[];
  73. /** smoothing filter */
  74. private _filter: PoseFilter;
  75. /**
  76. * Constructor
  77. */
  78. constructor()
  79. {
  80. this._imageSize = Speedy.Size(0, 0);
  81. this._matrix = Speedy.Matrix.Eye(3, 4);
  82. this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
  83. this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
  84. this._filter = new PoseFilter();
  85. this._flipZ = Speedy.Matrix(4, 4, [
  86. 1, 0, 0, 0,
  87. 0, 1, 0, 0,
  88. 0, 0,-1, 0,
  89. 0, 0, 0, 1
  90. ]);
  91. }
  92. /**
  93. * Initialize the model
  94. * @param aspectRatio aspect ratio of the image plane
  95. * @param scale optional scale factor of the image plane
  96. */
  97. init(aspectRatio: number, scale: number = DEFAULT_SCALE): void
  98. {
  99. // log
  100. Utils.log(`Initializing the camera model...`);
  101. Utils.assert(aspectRatio > 0 && scale > 1e-5);
  102. // set the size of the image plane
  103. // this rule is conceived so that min(w,h) = s and w/h = a
  104. if(aspectRatio >= 1) {
  105. this._imageSize.width = aspectRatio * scale;
  106. this._imageSize.height = scale;
  107. }
  108. else {
  109. this._imageSize.width = scale;
  110. this._imageSize.height = scale / aspectRatio;
  111. }
  112. // reset the model
  113. this.reset();
  114. }
  115. /**
  116. * Release the model
  117. */
  118. release(): null
  119. {
  120. this.reset();
  121. return null;
  122. }
  123. /**
  124. * Update the camera model
  125. * @param homographyNDC 3x3 perspective transform
  126. * @returns a promise that resolves to a camera matrix
  127. */
  128. update(homographyNDC: SpeedyMatrix): SpeedyPromise<SpeedyMatrix>
  129. {
  130. Utils.assert(homographyNDC.rows == 3 && homographyNDC.columns == 3);
  131. // convert to image space
  132. const homography = this._convertToImageSpace(homographyNDC);
  133. // read the entries of the homography
  134. const h = homography.read();
  135. const h11 = h[0], h12 = h[3], h13 = h[6],
  136. h21 = h[1], h22 = h[4], h23 = h[7],
  137. h31 = h[2], h32 = h[5], h33 = h[8];
  138. // validate the homography (homography matrices aren't singular)
  139. const det = h13 * (h21 * h32 - h22 * h31) - h23 * (h11 * h32 - h12 * h31) + h33 * (h11 * h22 - h12 * h21);
  140. if(Math.abs(det) < EPSILON || Number.isNaN(det))
  141. return Speedy.Promise.reject(new NumericalError(`Can't update the camera model using an invalid homography matrix`));
  142. // estimate the pose
  143. const pose = this._estimatePose(homography);
  144. if(this._filter.feed(pose))
  145. this._extrinsics = this._filter.output().read();
  146. // compute the camera matrix
  147. const Z = this._flipZ; // switch to a right handed system
  148. const K = Speedy.Matrix(3, 3, this._intrinsics);
  149. const E = Speedy.Matrix(3, 4, this._extrinsics);
  150. this._matrix.setToSync(K.times(E).times(Z));
  151. /*
  152. // test
  153. console.log("homography ------------", homography.toString());
  154. console.log("intrinsics ------------", K.toString());
  155. console.log("extrinsics ------------", E.toString());
  156. console.log("extrinsicsINV ---------", Speedy.Matrix(this.computeViewMatrix().inverse()).toString());
  157. console.log("matrix ----------------", this._matrix.toString());
  158. console.log("projectionMatrix ----- ", this.computeProjectionMatrix(0.1,100).toString());
  159. */
  160. // done!
  161. return Speedy.Promise.resolve(this._matrix);
  162. }
  163. /**
  164. * Reset the camera model
  165. */
  166. reset(): void
  167. {
  168. this._resetIntrinsics();
  169. this._resetExtrinsics();
  170. }
  171. /**
  172. * The 3x4 camera matrix
  173. */
  174. get matrix(): SpeedyMatrix
  175. {
  176. return this._matrix;
  177. }
  178. /**
  179. * The size of the image plane
  180. */
  181. get imageSize(): SpeedySize
  182. {
  183. return this._imageSize;
  184. }
  185. /**
  186. * The aspect ratio of the image
  187. */
  188. get aspectRatio(): number
  189. {
  190. return this._imageSize.width / this._imageSize.height;
  191. }
  192. /**
  193. * Focal length in "pixels" (projection distance in the pinhole camera model)
  194. * same as (focal length in mm) * (number of "pixels" per world unit in "pixels"/mm)
  195. * "pixels" means image plane units
  196. */
  197. get focalLength(): number
  198. {
  199. return this._intrinsics[FX]; // fx == fy
  200. }
  201. /**
  202. * Horizontal field-of-view, given in radians
  203. */
  204. get fovx(): number
  205. {
  206. const halfWidth = this._imageSize.width / 2;
  207. return 2 * Math.atan(halfWidth / this._intrinsics[FX]);
  208. }
  209. /**
  210. * Vertical field-of-view, given in radians
  211. */
  212. get fovy(): number
  213. {
  214. const halfHeight = this._imageSize.height / 2;
  215. return 2 * Math.atan(halfHeight / this._intrinsics[FY]);
  216. }
  217. /**
  218. * Camera intrinsics matrix
  219. * @returns a 3x3 camera intrinsics matrix
  220. */
  221. intrinsicsMatrix(): SpeedyMatrix
  222. {
  223. return Speedy.Matrix(3, 3, this._intrinsics);
  224. }
  225. /**
  226. * Compute the view matrix. This 4x4 matrix moves 3D points from
  227. * world space to view space. We want the camera looking in the
  228. * direction of the negative z-axis (WebGL-friendly)
  229. * @returns a view matrix
  230. */
  231. computeViewMatrix(): SpeedyMatrix
  232. {
  233. const E = this._extrinsics;
  234. // We augment the 3x4 extrinsics matrix E with the [ 0 0 0 1 ] row
  235. // and get E+. Let Z be 4x4 flipZ, the identity matrix with the third
  236. // column negated. The following matrix is View = Z * E+ * Z. We get
  237. // the camera looking in the direction of the negative z-axis in a
  238. // right handed system!
  239. return Speedy.Matrix(4, 4, [
  240. E[0], E[1],-E[2], 0, // r1
  241. E[3], E[4],-E[5], 0, // r2
  242. -E[6],-E[7],+E[8], 0, // r3
  243. E[9], E[10],-E[11], 1 // t
  244. ]);
  245. }
  246. /**
  247. * Compute a perspective projection matrix for WebGL
  248. * @param near distance of the near plane
  249. * @param far distance of the far plane
  250. */
  251. computeProjectionMatrix(near: number, far: number): SpeedyMatrix
  252. {
  253. const fx = this._intrinsics[FX];
  254. const fy = this._intrinsics[FY];
  255. const halfWidth = this._imageSize.width / 2;
  256. const halfHeight = this._imageSize.height / 2;
  257. // we assume that the principal point is at the center of the image plane
  258. const right = near * (halfWidth / fx);
  259. const top = near * (halfHeight / fy);
  260. //const top = right * (halfHeight / halfWidth); // same thing
  261. const bottom = -top, left = -right; // symmetric frustum
  262. // a derivation of this projection matrix can be found at
  263. // https://www.songho.ca/opengl/gl_projectionmatrix.html
  264. // http://learnwebgl.brown37.net/08_projections/projections_perspective.html
  265. return Speedy.Matrix(4, 4, [
  266. 2 * near / (right - left), 0, 0, 0,
  267. 0, 2 * near / (top - bottom), 0, 0,
  268. (right + left) / (right - left), (top + bottom) / (top - bottom), -(far + near) / (far - near), -1,
  269. 0, 0, -2 * far * near / (far - near), 0
  270. ]);
  271. }
  272. /**
  273. * Reset camera extrinsics
  274. */
  275. private _resetExtrinsics(): void
  276. {
  277. // set the rotation matrix to the identity
  278. this._extrinsics.fill(0);
  279. this._extrinsics[0] = this._extrinsics[4] = this._extrinsics[8] = 1;
  280. // reset filter
  281. this._filter.reset();
  282. }
  283. /**
  284. * Reset camera intrinsics
  285. */
  286. private _resetIntrinsics(): void
  287. {
  288. const cameraWidth = Math.max(this._imageSize.width, this._imageSize.height); // portrait or landscape?
  289. const u0 = 0; // principal point at the center of the image plane
  290. const v0 = 0;
  291. const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
  292. const fy = fx;
  293. this._intrinsics[FX] = fx;
  294. this._intrinsics[FY] = fy;
  295. this._intrinsics[U0] = u0;
  296. this._intrinsics[V0] = v0;
  297. }
  298. /**
  299. * Convert a homography from NDC to image space
  300. * @param homographyNDC
  301. * @returns a new homography
  302. */
  303. private _convertToImageSpace(homographyNDC: SpeedyMatrix): SpeedyMatrix
  304. {
  305. const w = this._imageSize.width / 2;
  306. const h = this._imageSize.height / 2;
  307. // fromNDC converts points from NDC to image space
  308. const fromNDC = Speedy.Matrix(3, 3, [
  309. w, 0, 0,
  310. 0, h, 0,
  311. 0, 0, 1
  312. ]);
  313. /*
  314. // make h33 = 1 (wanted?)
  315. const data = homographyNDC.read();
  316. const h33 = data[8];
  317. const hom = homographyNDC.times(1/h33);
  318. */
  319. // convert homography
  320. return Speedy.Matrix(fromNDC.times(homographyNDC));
  321. }
  322. /**
  323. * Compute a normalized homography H^ = K^(-1) * H for an
  324. * ideal pinhole with f = 1 and principal point = (0,0)
  325. * @param homography homography H to be normalized
  326. * @returns normalized homography H^
  327. */
  328. private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
  329. {
  330. const u0 = this._intrinsics[U0];
  331. const v0 = this._intrinsics[V0];
  332. const fx = this._intrinsics[FX];
  333. const fy = this._intrinsics[FY];
  334. const u0fx = u0 / fx;
  335. const v0fy = v0 / fy;
  336. const h = homography.read();
  337. const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
  338. const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
  339. const h31 = h[2], h32 = h[5], h33 = h[8];
  340. /*console.log([
  341. h11, h21, h31,
  342. h12, h22, h32,
  343. h13, h23, h33,
  344. ]);*/
  345. return Speedy.Matrix(3, 3, [
  346. h11, h21, h31,
  347. h12, h22, h32,
  348. h13, h23, h33,
  349. ]);
  350. }
  351. /**
  352. * Estimate [ r1 | r2 | t ], where r1, r2 are orthonormal and t is a translation vector
  353. * @param normalizedHomography based on the ideal pinhole (where calibration K = I)
  354. * @returns a 3x3 matrix
  355. */
  356. private _estimatePartialPose(normalizedHomography: SpeedyMatrix): SpeedyMatrix
  357. {
  358. const h = normalizedHomography.read();
  359. const h11 = h[0], h12 = h[3], h13 = h[6];
  360. const h21 = h[1], h22 = h[4], h23 = h[7];
  361. const h31 = h[2], h32 = h[5], h33 = h[8];
  362. const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
  363. const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
  364. const h1norm = Math.sqrt(h1norm2);
  365. const h2norm = Math.sqrt(h2norm2);
  366. //const hnorm = (h1norm + h2norm) / 2;
  367. //const hnorm = Math.sqrt(h1norm * h2norm);
  368. const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
  369. // we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
  370. // if h1norm is not approximately h2norm, it means that the first two columns of
  371. // the normalized homography are not really encoding a rotation (up to a scale)
  372. //console.log("h1,h2",h1norm,h2norm);
  373. //console.log(normalizedHomography.toString());
  374. // compute a rough estimate for the scale factor
  375. // select the sign so that t3 = tz > 0
  376. const sign = h33 >= 0 ? 1 : -1;
  377. let scale = sign / hnorm;
  378. // sanity check
  379. if(Number.isNaN(scale))
  380. return Speedy.Matrix(3, 3, (new Array<number>(9)).fill(Number.NaN));
  381. // recover the rotation
  382. let r = new Array<number>(6);
  383. r[0] = scale * h11;
  384. r[1] = scale * h21;
  385. r[2] = scale * h31;
  386. r[3] = scale * h12;
  387. r[4] = scale * h22;
  388. r[5] = scale * h32;
  389. // refine the rotation (r is initially noisy)
  390. r = this._refineRotation(r);
  391. /*
  392. After refining the rotation vectors, let's adjust the scale factor as
  393. follows:
  394. We know that [ r1 | r2 | t ] is equal to the normalized homography H up
  395. to a non-zero scale factor s, i.e., [ r1 | r2 | t ] = s H. Let's call M
  396. the first two columns of H, i.e., M = [ h1 | h2 ], and R = [ r1 | r2 ].
  397. It follows that R = s M, meaning that M'R = s M'M. The trace of 2x2 M'R
  398. is such that tr(M'R) = tr(s M'M) = s tr(M'M), which means:
  399. s = tr(M'R) / tr(M'M) = (r1'h1 + r2'h2) / (h1'h1 + h2'h2)
  400. (also: s^2 = det(M'R) / det(M'M))
  401. */
  402. // adjust the scale factor
  403. scale = r[0] * h11 + r[1] * h21 + r[2] * h31;
  404. scale += r[3] * h12 + r[4] * h22 + r[5] * h32;
  405. scale /= h1norm2 + h2norm2;
  406. // recover the translation
  407. let t = new Array<number>(3);
  408. t[0] = scale * h13;
  409. t[1] = scale * h23;
  410. t[2] = scale * h33;
  411. // done!
  412. return Speedy.Matrix(3, 3, r.concat(t));
  413. }
  414. /**
  415. * Make two non-zero and non-parallel input vectors, r1 and r2, orthonormal
  416. * @param rot rotation vectors [ r1 | r2 ] in column-major format
  417. * @returns a 3x2 matrix R such that R'R = I (column-major format)
  418. */
  419. private _refineRotation(rot: number[]): number[]
  420. {
  421. const [r11, r21, r31, r12, r22, r32] = rot;
  422. /*
  423. A little technique I figured out to correct the rotation vectors
  424. ----------------------------------------------------------------
  425. We are given two 3x1 column-vectors r1 and r2 as input in a 3x2 matrix
  426. R = [ r1 | r2 ]. We would like that R'R = I, but that won't be the case
  427. because vectors r1 and r2 are not perfectly orthonormal due to noise.
  428. Let's first notice that R'R is symmetric. You can easily check that its
  429. two eigenvalues are both real and positive (as long as r1, r2 != 0 and
  430. r1 is not parallel to r2, but we never take such vectors as input).
  431. R'R = [ r1'r1 r1'r2 ] is of rank 2, positive-definite
  432. [ r1'r2 r2'r2 ]
  433. We proceed by computing an eigendecomposition Q D Q' of R'R, where Q is
  434. chosen to be orthogonal and D is a diagonal matrix whose entries are
  435. the eigenvalues of R'R.
  436. Let LL' be the Cholesky decomposition of D. Such decomposition exists
  437. and is trivially computed: just take the square roots of the entries of
  438. D. Since L is diagonal, we have L = L'. Its inverse is also trivially
  439. computed - call it Linv.
  440. Now, define a 2x2 correction matrix C as follows:
  441. C = Q * Linv * Q'
  442. This matrix rotates the input vector, scales it by some amount, and
  443. then rotates it back to where it was (i.e., Q'Q = Q Q' = I).
  444. We compute RC in order to correct the rotation vectors. We take its
  445. two columns as the corrected vectors.
  446. In order to show that the two columns of RC are orthonormal, we can
  447. show that (RC)'(RC) = I. Indeed, noticing that C is symmetric, let's
  448. expand the expression:
  449. (RC)'(RC) = C'R'R C = C R'R C = (Q Linv Q') (Q D Q') (Q Linv Q') =
  450. Q Linv (Q'Q) D (Q'Q) Linv Q' = Q Linv D Linv Q' =
  451. Q Linv (L L) Linv Q' = Q (Linv L) (L Linv) Q' = Q Q' = I
  452. I have provided below a closed formula to correct the rotation vectors.
  453. What C does to R is very interesting: it makes the singular values
  454. become 1. If U S V' is a SVD of R, then R'R = V S^2 V'. The singular
  455. values of R are the square roots of the eigenvalues of R'R. Letting
  456. S = L and V = Q, it follows that RC = U S V' V Linv V' = U V'. This
  457. means that RC is equivalent to the correction "trick" using the SVD
  458. found in the computer vision literature (i.e., compute the SVD and
  459. return U V'). That "trick" is known to return the rotation matrix that
  460. minimizes the Frobenius norm of the difference between the input and
  461. the output. Consequently, the technique I have just presented is also
  462. optimal in that sense!
  463. By the way, the input matrix R does not need to be 3x2.
  464. */
  465. // compute the entries of R'R
  466. const r1tr1 = r11*r11 + r21*r21 + r31*r31;
  467. const r2tr2 = r12*r12 + r22*r22 + r32*r32;
  468. const r1tr2 = r11*r12 + r21*r22 + r31*r32;
  469. // compute the two real eigenvalues of R'R
  470. const delta = (r1tr1 - r2tr2) * (r1tr1 - r2tr2) + 4 * r1tr2 * r1tr2;
  471. const sqrt = Math.sqrt(delta); // delta >= 0 always
  472. const eigval1 = (r1tr1 + r2tr2 + sqrt) / 2;
  473. const eigval2 = (r1tr1 + r2tr2 - sqrt) / 2;
  474. // compute two unit eigenvectors qi = (xi,yi) of R'R
  475. const alpha1 = (r2tr2 - eigval1) - r1tr2 * (1 + r1tr2) / (r1tr1 - eigval1);
  476. const x1 = Math.sqrt((alpha1 * alpha1) / (1 + alpha1 * alpha1));
  477. const y1 = x1 / alpha1;
  478. const alpha2 = (r2tr2 - eigval2) - r1tr2 * (1 + r1tr2) / (r1tr1 - eigval2);
  479. const x2 = Math.sqrt((alpha2 * alpha2) / (1 + alpha2 * alpha2));
  480. const y2 = x2 / alpha2;
  481. // compute the Cholesky decomposition LL' of the diagonal matrix D
  482. // whose entries are the two eigenvalues of R'R and then invert L
  483. const s1 = Math.sqrt(eigval1), s2 = Math.sqrt(eigval2); // singular values of R (pick s1 >= s2)
  484. /*
  485. const Linv = Speedy.Matrix(2, 2, [1/s1, 0, 0, 1/s2]); // L inverse
  486. // compute the correction matrix C = Q * Linv * Q', where Q = [q1|q2]
  487. // is orthogonal and Linv is computed as above
  488. const Q = Speedy.Matrix(2, 2, [x1, y1, x2, y2]);
  489. const Qt = Speedy.Matrix(2, 2, [x1, x2, y1, y2]);
  490. const C = Q.times(Linv).times(Qt);
  491. // correct the rotation vectors r1 and r2 using C
  492. const R = Speedy.Matrix(3, 2, [r11, r21, r31, r12, r22, r32]);
  493. return Speedy.Matrix(R.times(C)).read();
  494. */
  495. // find C = Q * Linv * Q' manually
  496. // [ a b ] is symmetric
  497. // [ b c ]
  498. const a = x1*x1/s1 + x2*x2/s2;
  499. const b = x1*y1/s1 + x2*y2/s2;
  500. const c = y1*y1/s1 + y2*y2/s2;
  501. // find RC manually
  502. return [
  503. a*r11 + b*r12,
  504. a*r21 + b*r22,
  505. a*r31 + b*r32,
  506. b*r11 + c*r12,
  507. b*r21 + c*r22,
  508. b*r31 + c*r32
  509. ];
  510. }
  511. /**
  512. * Compute a refined translation vector
  513. * @param normalizedHomography ideal pinhole K = I
  514. * @param rot rotation vectors [ r1 | r2 ] in column-major format
  515. * @param t0 initial estimate for the translation vector
  516. * @returns 3x1 translation vector in column-major format
  517. */
  518. private _refineTranslation(normalizedHomography: SpeedyMatrix, rot: number[], t0: number[]): number[]
  519. {
  520. /*
  521. Given a normalized homography H, the rotation vectors r1, r2, and a
  522. translation vector t, we know that [ r1 | r2 | t ] = s H for a non-zero
  523. scale factor s.
  524. If we take a homogeneous vector u = [ x y w ]' (i.e., w = 1), then
  525. [ r1 | r2 | t ] u is parallel to H u, which means that their cross
  526. product is zero:
  527. [ r1 | r2 | t ] u x H u = ( x r1 + y r2 + w t ) x H u = 0
  528. The following code finds an optimal translation vector t based on the
  529. above observation. H, r1, r2 are known.
  530. */
  531. const h = normalizedHomography.read();
  532. const h11 = h[0], h12 = h[3], h13 = h[6];
  533. const h21 = h[1], h22 = h[4], h23 = h[7];
  534. const h31 = h[2], h32 = h[5], h33 = h[8];
  535. const r11 = rot[0], r12 = rot[3];
  536. const r21 = rot[1], r22 = rot[4];
  537. const r31 = rot[2], r32 = rot[5];
  538. // generate a grid of sample points [ xi yi ]' in the image
  539. //const x = [ 0, -1, +1, +1, -1 ];
  540. //const y = [ 0, -1, -1, +1, +1 ];
  541. const g = TRANSLATION_REFINEMENT_GRIDSIZE;
  542. const x = new Array<number>(g*g);
  543. const y = new Array<number>(g*g);
  544. const halfWidth = this._imageSize.width / 2;
  545. const halfHeight = this._imageSize.height / 2;
  546. for(let k = 0, i = 0; i < g; i++) {
  547. for(let j = 0; j < g; j++, k++) {
  548. // in [-1,+1]
  549. x[k] = (i/(g-1)) * 2 - 1;
  550. y[k] = (j/(g-1)) * 2 - 1;
  551. // in [-s/2,+s/2], where s = w,h
  552. x[k] *= halfWidth;
  553. y[k] *= halfHeight;
  554. }
  555. }
  556. //console.log(x.toString(), y.toString());
  557. // set auxiliary values: ai = H [ xi yi 1 ]'
  558. const n = x.length;
  559. const a1 = new Array<number>(n);
  560. const a2 = new Array<number>(n);
  561. const a3 = new Array<number>(n);
  562. for(let i = 0; i < n; i++) {
  563. a1[i] = x[i] * h11 + y[i] * h12 + h13;
  564. a2[i] = x[i] * h21 + y[i] * h22 + h23;
  565. a3[i] = x[i] * h31 + y[i] * h32 + h33;
  566. }
  567. // we'll solve M t = v for t with linear least squares
  568. // M: 3n x 3, v: 3n x 1, t: 3 x 1
  569. const n3 = 3*n;
  570. const m = new Array<number>(n3 * 3);
  571. const v = new Array<number>(n3);
  572. for(let i = 0, k = 0; k < n; i += 3, k++) {
  573. m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
  574. m[i+n3] = -(m[i+1] = a3[k]);
  575. m[i+2] = -(m[i+n3+n3] = a2[k]);
  576. m[i+n3+n3+1] = -(m[i+n3+2] = a1[k]);
  577. v[i] = a3[k] * (x[k] * r21 + y[k] * r22) - a2[k] * (x[k] * r31 + y[k] * r32);
  578. v[i+1] = -a3[k] * (x[k] * r11 + y[k] * r12) + a1[k] * (x[k] * r31 + y[k] * r32);
  579. v[i+2] = a2[k] * (x[k] * r11 + y[k] * r12) - a1[k] * (x[k] * r21 + y[k] * r22);
  580. }
  581. /*
  582. // this works, but I want more lightweight
  583. const M = Speedy.Matrix(n3, 3, m);
  584. const v_ = Speedy.Matrix(n3, 1, v);
  585. return Speedy.Matrix(M.ldiv(v_)).read();
  586. */
  587. /*
  588. Gradient descent with optimal step size / learning rate
  589. -------------------------------------------------------
  590. Let's find the column-vector x that minimizes the error function
  591. E(x) = r'r, where r = Ax - b, using gradient descent. This is linear
  592. least squares. We want to find x easily, QUICKLY and iteratively.
  593. The update rule of gradient descent is set to:
  594. x := x - w * grad(E)
  595. where w is the learning rate and grad(E) is the gradient of E(x):
  596. grad(E) = 2 A'r = 2 A'(Ax - b) = 2 A'A x - 2 A'b
  597. Let's adjust w to make x "converge quickly". Define function S(w) as:
  598. S(w) = x - w * grad(E) (step)
  599. and another function F(w) as:
  600. F(w) = E(S(w))
  601. which is the error of the step. We minimize F by setting its derivative
  602. to zero:
  603. 0 = dF = dF dS
  604. dw dS dw
  605. What follows is a fair amount of algebra. Do the math and you'll find
  606. the following optimal update rule:
  607. (c'c)
  608. x := x - --------- c
  609. (Ac)'(Ac)
  610. where c = A'r = A'(Ax - b)
  611. */
  612. // gradient descent: super lightweight implementation
  613. const r = new Array<number>(3*n);
  614. const c = new Array<number>(3);
  615. const Mc = new Array<number>(3*n);
  616. // initial guess
  617. const t = new Array<number>(3);
  618. t[0] = t0[0];
  619. t[1] = t0[1];
  620. t[2] = t0[2];
  621. // iterate
  622. for(let it = 0; it < TRANSLATION_REFINEMENT_ITERATIONS; it++) {
  623. //console.log("it",it+1);
  624. // compute residual r = Mt - v
  625. for(let i = 0; i < n3; i++) {
  626. r[i] = 0;
  627. for(let j = 0; j < 3; j++)
  628. r[i] += m[j*n3 + i] * t[j];
  629. r[i] -= v[i];
  630. }
  631. // compute c = M'r
  632. for(let i = 0; i < 3; i++) {
  633. c[i] = 0;
  634. for(let j = 0; j < n3; j++)
  635. c[i] += m[i*n3 + j] * r[j];
  636. }
  637. // compute Mc
  638. for(let i = 0; i < n3; i++) {
  639. Mc[i] = 0;
  640. for(let j = 0; j < 3; j++)
  641. Mc[i] += m[j*n3 + i] * c[j];
  642. }
  643. // compute c'c
  644. let num = 0;
  645. for(let i = 0; i < 3; i++)
  646. num += c[i] * c[i];
  647. //console.log("c'c=",num," at #",it+1);
  648. if(num < TRANSLATION_REFINEMENT_TOLERANCE)
  649. break;
  650. // compute (Mc)'(Mc)
  651. let den = 0;
  652. for(let i = 0; i < n3; i++)
  653. den += Mc[i] * Mc[i];
  654. // compute frc = c'c / (Mc)'(Mc)
  655. const frc = num / den;
  656. if(Number.isNaN(frc)) // this shouldn't happen
  657. break;
  658. // iterate: t = t - frc * c
  659. for(let i = 0; i < 3; i++)
  660. t[i] -= frc * c[i];
  661. }
  662. //console.log("OLD t:\n\n",t0.join('\n'));
  663. //console.log("new t:\n\n",t.join('\n'));
  664. // done!
  665. return t;
  666. }
  667. /**
  668. * Find a 3x3 rotation matrix R given two orthonormal vectors [ r1 | r2 ]
  669. * @param partialRotation partial rotation matrix [ r1 | r2 ] in column-major format
  670. * @returns a rotation matrix R in column-major format
  671. */
  672. private _computeFullRotation(partialRotation: number[]): number[]
  673. {
  674. const r11 = partialRotation[0], r12 = partialRotation[3];
  675. const r21 = partialRotation[1], r22 = partialRotation[4];
  676. const r31 = partialRotation[2], r32 = partialRotation[5];
  677. // r3 = +- ( r1 x r2 )
  678. let r13 = r21 * r32 - r31 * r22;
  679. let r23 = r31 * r12 - r11 * r32;
  680. let r33 = r11 * r22 - r21 * r12;
  681. // let's make sure that det R = +1 (keep the orientation)
  682. const det = r11 * (r22 * r33 - r23 * r32) - r21 * (r12 * r33 - r13 * r32) + r31 * (r12 * r23 - r13 * r22);
  683. if(det < 0) {
  684. r13 = -r13;
  685. r23 = -r23;
  686. r33 = -r33;
  687. }
  688. // done!
  689. return [
  690. r11, r21, r31,
  691. r12, r22, r32,
  692. r13, r23, r33
  693. ];
  694. }
  695. /**
  696. * Estimate the pose [ R | t ] given a homography in sensor space
  697. * @param homography must be valid
  698. * @returns 3x4 matrix
  699. */
  700. private _estimatePose(homography: SpeedyMatrix): SpeedyMatrix
  701. {
  702. const normalizedHomography = this._normalizeHomography(homography);
  703. const partialPose = Speedy.Matrix.Eye(3);
  704. // we want the estimated partial pose [ r1 | r2 | t ] to be as close
  705. // as possible to the normalized homography, up to a scale factor;
  706. // i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
  707. // it won't be a perfect equality due to noise in the homography.
  708. // remark: composition of homographies
  709. const residual = Speedy.Matrix(normalizedHomography);
  710. for(let k = 0; k < POSE_REFINEMENT_ITERATIONS; k++) {
  711. // incrementally improve the partial pose
  712. const rt = this._estimatePartialPose(residual); // rt should converge to the identity matrix
  713. partialPose.setToSync(rt.times(partialPose));
  714. residual.setToSync(residual.times(rt.inverse()));
  715. //console.log("rt",rt.toString());
  716. //console.log("residual",residual.toString());
  717. }
  718. //console.log('-----------');
  719. // read the partial pose
  720. const mat = partialPose.read();
  721. const r0 = mat.slice(0, 6);
  722. const t0 = mat.slice(6, 9);
  723. // refine the translation vector and compute the full rotation matrix
  724. const t = this._refineTranslation(normalizedHomography, r0, t0);
  725. const r = this._computeFullRotation(r0);
  726. // done!
  727. return Speedy.Matrix(3, 4, r.concat(t));
  728. }
  729. }