Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

camera-model.ts 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. /*
  2. * MARTINS.js
  3. * GPU-accelerated Augmented Reality for the web
  4. * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published
  8. * by the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. *
  19. * camera-model.ts
  20. * Camera model
  21. */
  22. import Speedy from 'speedy-vision';
  23. import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
  24. import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
  25. import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
  26. import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
  27. import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
  28. import { Nullable, Utils } from '../utils/utils';
  29. import { Settings } from '../core/settings';
  30. import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
  31. /** A guess of the horizontal field-of-view of a typical camera, in degrees */
  32. const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
  33. /** Number of iterations used to refine the estimated pose */
  34. const POSE_ITERATIONS = 30;
  35. /** Number of samples used in the rotation filter */
  36. const ROTATION_FILTER_SAMPLES = 10;
  37. /** Number of samples used in the translation filter */
  38. const TRANSLATION_FILTER_SAMPLES = 5;
  39. /** Convert degrees to radians */
  40. const DEG2RAD = 0.017453292519943295; // pi / 180
  41. /** Convert radians to degrees */
  42. const RAD2DEG = 57.29577951308232; // 180 / pi
  43. /** Numerical tolerance */
  44. const EPSILON = 1e-6;
  45. /** Index of the horizontal focal length in the camera intrinsics matrix (column-major format) */
  46. export const FX = 0;
  47. /** Index of the vertical focal length in the camera intrinsics matrix */
  48. export const FY = 4;
  49. /** Index of the horizontal position of the principal point in the camera intrinsics matrix */
  50. export const U0 = 6;
  51. /** Index of the vertical position of the principal point in the camera intrinsics matrix */
  52. export const V0 = 7;
  53. /**
  54. * Camera model
  55. */
  56. export class CameraModel
  57. {
  58. /** size of the image sensor, in pixels */
  59. private _screenSize: SpeedySize;
  60. /** 3x4 camera matrix */
  61. private _matrix: SpeedyMatrix;
  62. /** intrinsics matrix, in column-major format */
  63. private _intrinsics: number[];
  64. /** extrinsics matrix, in column-major format */
  65. private _extrinsics: number[];
  66. /** filter: samples of partial rotation matrix [ r1 | r2 ] */
  67. private _partialRotationBuffer: number[][];
  68. /** filter: samples of translation vector t */
  69. private _translationBuffer: number[][];
  70. /**
  71. * Constructor
  72. */
  73. constructor()
  74. {
  75. this._screenSize = Speedy.Size(0, 0);
  76. this._matrix = Speedy.Matrix.Eye(3, 4);
  77. this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
  78. this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
  79. this._partialRotationBuffer = [];
  80. this._translationBuffer = [];
  81. }
  82. /**
  83. * Initialize the model
  84. * @param screenSize
  85. */
  86. init(screenSize: SpeedySize): void
  87. {
  88. // validate
  89. if(screenSize.area() == 0)
  90. throw new IllegalArgumentError(`Can't initialize the camera model with screenSize = ${screenSize.toString()}`);
  91. // set the screen size
  92. this._screenSize.width = screenSize.width;
  93. this._screenSize.height = screenSize.height;
  94. // reset the model
  95. this.reset();
  96. // log
  97. Utils.log(`Initializing the camera model...`);
  98. }
  99. /**
  100. * Release the model
  101. */
  102. release(): null
  103. {
  104. this.reset();
  105. return null;
  106. }
  107. /**
  108. * Update the camera model
  109. * @param homography 3x3 perspective transform
  110. * @param screenSize may change over time (e.g., when going from portrait to landscape or vice-versa)
  111. * @returns promise that resolves to a camera matrix
  112. */
  113. update(homography: SpeedyMatrix, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
  114. {
  115. // validate the shape of the homography
  116. if(homography.rows != 3 || homography.columns != 3)
  117. throw new IllegalArgumentError(`Camera model: provide a homography matrix`);
  118. // validate screenSize
  119. if(screenSize.area() == 0)
  120. throw new IllegalArgumentError(`Camera model: invalid screenSize = ${screenSize.toString()}`);
  121. // changed screen size?
  122. if(!this._screenSize.equals(screenSize)) {
  123. Utils.log(`Camera model: detected a change in screen size...`);
  124. // update the screen size
  125. this._screenSize.width = screenSize.width;
  126. this._screenSize.height = screenSize.height;
  127. // reset camera
  128. this.reset();
  129. }
  130. // read the entries of the homography
  131. const h = homography.read();
  132. const h11 = h[0], h12 = h[3], h13 = h[6],
  133. h21 = h[1], h22 = h[4], h23 = h[7],
  134. h31 = h[2], h32 = h[5], h33 = h[8];
  135. // validate the homography (homography matrices aren't singular)
  136. const det = h13 * (h21 * h32 - h22 * h31) - h23 * (h11 * h32 - h12 * h31) + h33 * (h11 * h22 - h12 * h21);
  137. if(Math.abs(det) < EPSILON) {
  138. Utils.warning(`Can't update the camera model using an invalid homography matrix`);
  139. return Speedy.Promise.resolve(this._matrix);
  140. }
  141. // estimate the pose
  142. const pose = this._estimatePose(homography);
  143. this._extrinsics = pose.read();
  144. // compute the camera matrix
  145. const C = this.denormalizer();
  146. const K = Speedy.Matrix(3, 3, this._intrinsics);
  147. const E = pose; //Speedy.Matrix(3, 4, this._extrinsics);
  148. this._matrix.setToSync(K.times(E).times(C));
  149. //console.log("intrinsics -----------", K.toString());
  150. //console.log("matrix ----------------",this._matrix.toString());
  151. return Speedy.Promise.resolve(this._matrix);
  152. }
  153. /**
  154. * Reset camera model
  155. */
  156. reset(): void
  157. {
  158. this._resetIntrinsics();
  159. this._resetExtrinsics();
  160. }
  161. /**
  162. * The camera matrix that maps the 3D normalized space [-1,1]^3 to the
  163. * 2D AR screen space (measured in pixels)
  164. * @returns 3x4 camera matrix
  165. */
  166. get matrix(): SpeedyMatrix
  167. {
  168. return this._matrix;
  169. }
  170. /**
  171. * Camera intrinsics matrix
  172. * @returns 3x3 intrinsics matrix in column-major format
  173. */
  174. get intrinsics(): number[]
  175. {
  176. return this._intrinsics;
  177. }
  178. /**
  179. * Camera extrinsics matrix
  180. * @returns 3x4 extrinsics matrix [ R | t ] in column-major format
  181. */
  182. get extrinsics(): number[]
  183. {
  184. return this._extrinsics;
  185. }
  186. /**
  187. * Convert coordinates from normalized space [-1,1]^3 to a
  188. * "3D pixel space" based on the dimensions of the AR screen.
  189. *
  190. * We perform a 180-degrees rotation around the x-axis so that
  191. * it looks nicer (the y-axis grows downwards in image space).
  192. *
  193. * The final camera matrix is P = K * [ R | t ] * C, where
  194. * C is this conversion matrix. The intent behind this is to
  195. * make tracking independent of target and screen sizes.
  196. *
  197. * Reminder: we use a right-handed coordinate system in 3D!
  198. * In 2D image space the coordinate system is left-handed.
  199. *
  200. * @returns 4x4 conversion matrix C
  201. */
  202. denormalizer(): SpeedyMatrix
  203. {
  204. const w = this._screenSize.width / 2; // half width, in pixels
  205. const h = this._screenSize.height / 2; // half height, in pixels
  206. const d = Math.min(w, h); // virtual unit length, in pixels
  207. /*
  208. return Speedy.Matrix(4, 4, [
  209. 1, 0, 0, 0,
  210. 0,-1, 0, 0,
  211. 0, 0,-1, 0,
  212. w/d, h/d, 0, 1/d
  213. ]);
  214. */
  215. return Speedy.Matrix(4, 4, [
  216. d, 0, 0, 0,
  217. 0,-d, 0, 0,
  218. 0, 0,-d, 0,
  219. w, h, 0, 1,
  220. ]);
  221. }
  222. /**
  223. * Size of the AR screen space, in pixels
  224. * @returns size in pixels
  225. */
  226. get screenSize(): SpeedySize
  227. {
  228. return this._screenSize;
  229. }
  230. /**
  231. * Focal length in pixel units (projection distance in the pinhole camera model)
  232. * same as (focal length in mm) * (number of pixels per world unit in pixels/mm)
  233. * @returns focal length
  234. */
  235. get focalLength(): number
  236. {
  237. return this._intrinsics[FY]; // fx == fy
  238. }
  239. /**
  240. * Horizontal field-of-view, given in radians
  241. * @returns vertical field-of-view
  242. */
  243. get fovx(): number
  244. {
  245. return 2 * Math.atan(this._intrinsics[U0] / this._intrinsics[FX]);
  246. }
  247. /**
  248. * Vertical field-of-view, given in radians
  249. * @returns vertical field-of-view
  250. */
  251. get fovy(): number
  252. {
  253. return 2 * Math.atan(this._intrinsics[V0] / this._intrinsics[FY]);
  254. }
  255. /**
  256. * Principal point
  257. * @returns principal point, in pixel coordinates
  258. */
  259. principalPoint(): SpeedyPoint2
  260. {
  261. return Speedy.Point2(this._intrinsics[U0], this._intrinsics[V0]);
  262. }
  263. /**
  264. * Reset camera extrinsics
  265. */
  266. private _resetExtrinsics(): void
  267. {
  268. // set the rotation matrix to the identity
  269. this._extrinsics.fill(0);
  270. this._extrinsics[0] = this._extrinsics[4] = this._extrinsics[8] = 1;
  271. // reset filters
  272. this._partialRotationBuffer.length = 0;
  273. this._translationBuffer.length = 0;
  274. }
  275. /**
  276. * Reset camera intrinsics
  277. */
  278. private _resetIntrinsics(): void
  279. {
  280. const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait?
  281. const u0 = this._screenSize.width / 2;
  282. const v0 = this._screenSize.height / 2;
  283. const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
  284. const fy = fx;
  285. this._intrinsics[FX] = fx;
  286. this._intrinsics[FY] = fy;
  287. this._intrinsics[U0] = u0;
  288. this._intrinsics[V0] = v0;
  289. }
  290. /**
  291. * Compute a normalized homography H^ = K^(-1) * H for an
  292. * ideal pinhole with f = 1 and principal point = (0,0)
  293. * @param homography homography H to be normalized
  294. * @returns normalized homography H^
  295. */
  296. private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
  297. {
  298. const h = homography.read();
  299. const u0 = this._intrinsics[U0];
  300. const v0 = this._intrinsics[V0];
  301. const fx = this._intrinsics[FX];
  302. const fy = this._intrinsics[FY];
  303. const u0fx = u0 / fx;
  304. const v0fy = v0 / fy;
  305. const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
  306. const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
  307. const h31 = h[2], h32 = h[5], h33 = h[8];
  308. /*console.log([
  309. h11, h21, h31,
  310. h12, h22, h32,
  311. h13, h23, h33,
  312. ]);*/
  313. return Speedy.Matrix(3, 3, [
  314. h11, h21, h31,
  315. h12, h22, h32,
  316. h13, h23, h33,
  317. ]);
  318. }
  319. /**
  320. * Estimate [ r1 | r2 | t ], where r1, r2 are orthonormal and t is a translation vector
  321. * @param normalizedHomography based on the ideal pinhole (where calibration K = I)
  322. * @returns a 3x3 matrix
  323. */
  324. private _estimatePartialPose(normalizedHomography: SpeedyMatrix): SpeedyMatrix
  325. {
  326. const h = normalizedHomography.read();
  327. const h11 = h[0], h12 = h[3], h13 = h[6];
  328. const h21 = h[1], h22 = h[4], h23 = h[7];
  329. const h31 = h[2], h32 = h[5], h33 = h[8];
  330. const h1norm2 = h11 * h11 + h21 * h21 + h31 * h31;
  331. const h2norm2 = h12 * h12 + h22 * h22 + h32 * h32;
  332. const h1norm = Math.sqrt(h1norm2);
  333. const h2norm = Math.sqrt(h2norm2);
  334. //const hnorm = (h1norm + h2norm) / 2;
  335. //const hnorm = Math.sqrt(h1norm * h2norm);
  336. const hnorm = Math.max(h1norm, h2norm); // this seems to work. why?
  337. // we expect h1norm to be approximately h2norm, but sometimes there is a lot of noise
  338. // if h1norm is not approximately h2norm, it means that the first two columns of
  339. // the normalized homography are not really encoding a rotation (up to a scale)
  340. //console.log("h1,h2",h1norm,h2norm);
  341. //console.log(normalizedHomography.toString());
  342. // compute a rough estimate for the scale factor
  343. // select the sign so that t3 = tz > 0
  344. const sign = h33 >= 0 ? 1 : -1;
  345. let scale = sign / hnorm;
  346. // sanity check
  347. if(Number.isNaN(scale))
  348. return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
  349. // recover the rotation
  350. let r = new Array(6) as number[];
  351. r[0] = scale * h11;
  352. r[1] = scale * h21;
  353. r[2] = scale * h31;
  354. r[3] = scale * h12;
  355. r[4] = scale * h22;
  356. r[5] = scale * h32;
  357. // refine the rotation
  358. r = this._refineRotation(r); // r is initially noisy
  359. /*
  360. After refining the rotation vectors, let's adjust the scale factor as
  361. follows:
  362. We know that [ r1 | r2 | t ] is equal to the normalized homography H up
  363. to a non-zero scale factor s, i.e., [ r1 | r2 | t ] = s H. Let's call M
  364. the first two columns of H, i.e., M = [ h1 | h2 ], and R = [ r1 | r2 ].
  365. It follows that R = s M, meaning that M'R = s M'M. The trace of 2x2 M'R
  366. is such that tr(M'R) = tr(s M'M) = s tr(M'M), which means:
  367. s = tr(M'R) / tr(M'M) = (r1'h1 + r2'h2) / (h1'h1 + h2'h2)
  368. (also: s^2 = det(M'R) / det(M'M))
  369. */
  370. // adjust the scale factor
  371. scale = r[0] * h11 + r[1] * h21 + r[2] * h31;
  372. scale += r[3] * h12 + r[4] * h22 + r[5] * h32;
  373. scale /= h1norm2 + h2norm2;
  374. // recover the translation
  375. let t = new Array(3) as number[];
  376. t[0] = scale * h13;
  377. t[1] = scale * h23;
  378. t[2] = scale * h33;
  379. // done!
  380. return Speedy.Matrix(3, 3, r.concat(t));
  381. }
  382. /**
  383. * Make two non-zero and non-parallel input vectors, r1 and r2, orthonormal
  384. * @param rot rotation vectors [ r1 | r2 ] in column-major format
  385. * @returns a 3x2 matrix R such that R'R = I (column-major format)
  386. */
  387. private _refineRotation(rot: number[]): number[]
  388. {
  389. const [r11, r21, r31, r12, r22, r32] = rot;
  390. /*
  391. A little technique I figured out to correct the rotation vectors
  392. ----------------------------------------------------------------
  393. We are given two 3x1 column-vectors r1 and r2 as input in a 3x2 matrix
  394. R = [ r1 | r2 ]. We would like that R'R = I, but that won't be the case
  395. because vectors r1 and r2 are not perfectly orthonormal due to noise.
  396. Let's first notice that R'R is symmetric. You can easily check that its
  397. two eigenvalues are both real and positive (as long as r1, r2 != 0 and
  398. r1 is not parallel to r2, but we never take such vectors as input).
  399. R'R = [ r1'r1 r1'r2 ] is of rank 2, positive-definite
  400. [ r1'r2 r2'r2 ]
  401. We proceed by computing an eigendecomposition Q D Q' of R'R, where Q is
  402. chosen to be orthogonal and D is a diagonal matrix whose entries are
  403. the eigenvalues of R'R.
  404. Let LL' be the Cholesky decomposition of D. Such decomposition exists
  405. and is trivially computed: just take the square roots of the entries of
  406. D. Since L is diagonal, we have L = L'. Its inverse is also trivially
  407. computed - call it Linv.
  408. Now, define a 2x2 correction matrix C as follows:
  409. C = Q * Linv * Q'
  410. This matrix rotates the input vector, scales it by some amount, and
  411. then rotates it back to where it was (i.e., Q'Q = Q Q' = I).
  412. We compute RC in order to correct the rotation vectors. We take its
  413. two columns as the corrected vectors.
  414. In order to show that the two columns of RC are orthonormal, we can
  415. show that (RC)'(RC) = I. Indeed, noticing that C is symmetric, let's
  416. expand the expression:
  417. (RC)'(RC) = C'R'R C = C R'R C = (Q Linv Q') (Q D Q') (Q Linv Q') =
  418. Q Linv (Q'Q) D (Q'Q) Linv Q' = Q Linv D Linv Q' =
  419. Q Linv (L L) Linv Q' = Q (Linv L) (L Linv) Q' = Q Q' = I
  420. I have provided below a closed formula to correct the rotation vectors.
  421. What C does to R is very interesting: it makes the singular values
  422. become 1. If U S V' is a SVD of R, then R'R = V S^2 V'. The singular
  423. values of R are the square roots of the eigenvalues of R'R. Letting
  424. S = L and V = Q, it follows that RC = U S V' V Linv V' = U V'. This
  425. means that RC is equivalent to the correction "trick" using the SVD
  426. found in the computer vision literature (i.e., compute the SVD and
  427. return U V'). That "trick" is known to return the rotation matrix that
  428. minimizes the Frobenius norm of the difference between the input and
  429. the output. Consequently, the technique I have just presented is also
  430. optimal in that sense!
  431. By the way, the input matrix R does not need to be 3x2.
  432. */
  433. // compute the entries of R'R
  434. const r1tr1 = r11*r11 + r21*r21 + r31*r31;
  435. const r2tr2 = r12*r12 + r22*r22 + r32*r32;
  436. const r1tr2 = r11*r12 + r21*r22 + r31*r32;
  437. // compute the two real eigenvalues of R'R
  438. const delta = (r1tr1 - r2tr2) * (r1tr1 - r2tr2) + 4 * r1tr2 * r1tr2;
  439. const sqrt = Math.sqrt(delta); // delta >= 0 always
  440. const eigval1 = (r1tr1 + r2tr2 + sqrt) / 2;
  441. const eigval2 = (r1tr1 + r2tr2 - sqrt) / 2;
  442. // compute two unit eigenvectors qi = (xi,yi) of R'R
  443. const alpha1 = (r2tr2 - eigval1) - r1tr2 * (1 + r1tr2) / (r1tr1 - eigval1);
  444. const x1 = Math.sqrt((alpha1 * alpha1) / (1 + alpha1 * alpha1));
  445. const y1 = x1 / alpha1;
  446. const alpha2 = (r2tr2 - eigval2) - r1tr2 * (1 + r1tr2) / (r1tr1 - eigval2);
  447. const x2 = Math.sqrt((alpha2 * alpha2) / (1 + alpha2 * alpha2));
  448. const y2 = x2 / alpha2;
  449. // compute the Cholesky decomposition LL' of the diagonal matrix D
  450. // whose entries are the two eigenvalues of R'R and then invert L
  451. const s1 = Math.sqrt(eigval1), s2 = Math.sqrt(eigval2); // singular values of R (pick s1 >= s2)
  452. const Linv = Speedy.Matrix(2, 2, [1/s1, 0, 0, 1/s2]); // L inverse
  453. // compute the correction matrix C = Q * Linv * Q', where Q = [q1|q2]
  454. // is orthogonal and Linv is computed as above
  455. const Q = Speedy.Matrix(2, 2, [x1, y1, x2, y2]);
  456. const Qt = Speedy.Matrix(2, 2, [x1, x2, y1, y2]);
  457. const C = Q.times(Linv).times(Qt);
  458. // correct the rotation vectors r1 and r2 using C
  459. const R = Speedy.Matrix(3, 2, [r11, r21, r31, r12, r22, r32]);
  460. return Speedy.Matrix(R.times(C)).read();
  461. }
  462. /**
  463. * Compute a refined translation vector
  464. * @param normalizedHomography ideal pinhole K = I
  465. * @param rot rotation vectors [ r1 | r2 ] in column-major format
  466. * @param t0 initial estimate for the translation vector
  467. * @returns 3x1 translation vector in column-major format
  468. */
  469. private _refineTranslation(normalizedHomography: SpeedyMatrix, rot: number[], t0: number[]): number[]
  470. {
  471. /*
  472. Given a normalized homography H, the rotation vectors r1, r2, and a
  473. translation vector t, we know that [ r1 | r2 | t ] = s H for a non-zero
  474. scale factor s.
  475. If we take a homogeneous vector u = [ x y w ]' (i.e., w = 1), then
  476. [ r1 | r2 | t ] u is parallel to H u, which means that their cross
  477. product is zero:
  478. [ r1 | r2 | t ] u x H u = ( x r1 + y r2 + w t ) x H u = 0
  479. The following code finds an optimal translation vector t based on the
  480. above observation. H, r1, r2 are known.
  481. */
  482. const h = normalizedHomography.read();
  483. const h11 = h[0], h12 = h[3], h13 = h[6];
  484. const h21 = h[1], h22 = h[4], h23 = h[7];
  485. const h31 = h[2], h32 = h[5], h33 = h[8];
  486. const r11 = rot[0], r12 = rot[3];
  487. const r21 = rot[1], r22 = rot[4];
  488. const r31 = rot[2], r32 = rot[5];
  489. // sample points [ xi yi ]' in AR screen space
  490. //const x = [ 0.5, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5, 0.0 ];
  491. //const y = [ 0.5, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5 ];
  492. const x = [ 0.5, 0.0, 1.0, 1.0, 0.0 ];
  493. const y = [ 0.5, 0.0, 0.0, 1.0, 1.0 ];
  494. const n = x.length;
  495. const n3 = 3*n;
  496. const width = this._screenSize.width;
  497. const height = this._screenSize.height;
  498. for(let i = 0; i < n; i++) {
  499. x[i] *= width;
  500. y[i] *= height;
  501. }
  502. // set auxiliary values: ai = H [ xi yi 1 ]'
  503. const a1 = new Array(n) as number[];
  504. const a2 = new Array(n) as number[];
  505. const a3 = new Array(n) as number[];
  506. for(let i = 0; i < n; i++) {
  507. a1[i] = x[i] * h11 + y[i] * h12 + h13;
  508. a2[i] = x[i] * h21 + y[i] * h22 + h23;
  509. a3[i] = x[i] * h31 + y[i] * h32 + h33;
  510. }
  511. // we'll solve M t = v for t with linear least squares
  512. // M: 3n x 3, v: 3n x 1, t: 3 x 1
  513. const m = new Array(3*n * 3) as number[];
  514. const v = new Array(3*n) as number[];
  515. for(let i = 0, k = 0; k < n; i += 3, k++) {
  516. m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
  517. m[i+n3] = -(m[i+1] = a3[k]);
  518. m[i+2] = -(m[i+n3+n3] = a2[k]);
  519. m[i+n3+n3+1] = -(m[i+n3+2] = a1[k]);
  520. v[i] = a3[k] * (x[k] * r21 + y[k] * r22) - a2[k] * (x[k] * r31 + y[k] * r32);
  521. v[i+1] = -a3[k] * (x[k] * r11 + y[k] * r12) + a1[k] * (x[k] * r31 + y[k] * r32);
  522. v[i+2] = a2[k] * (x[k] * r11 + y[k] * r12) - a1[k] * (x[k] * r21 + y[k] * r22);
  523. }
  524. /*
  525. // this works, but I want more lightweight
  526. const M = Speedy.Matrix(n3, 3, m);
  527. const v_ = Speedy.Matrix(n3, 1, v);
  528. return Speedy.Matrix(M.ldiv(v_)).read();
  529. */
  530. /*
  531. Gradient descent with optimal step size / learning rate
  532. -------------------------------------------------------
  533. Let's find the column-vector x that minimizes the error function
  534. E(x) = r'r, where r = Ax - b, using gradient descent. This is linear
  535. least squares. We want to find x easily, QUICKLY and iteratively.
  536. The update rule of gradient descent is set to:
  537. x := x - w * grad(E)
  538. where w is the learning rate and grad(E) is the gradient of E(x):
  539. grad(E) = 2 A'r = 2 A'(Ax - b) = 2 A'A x - 2 A'b
  540. Let's adjust w to make x "converge quickly". Define function S(w) as:
  541. S(w) = x - w * grad(E) (step)
  542. and another function F(w) as:
  543. F(w) = E(S(w))
  544. which is the error of the step. We minimize F by setting its derivative
  545. to zero:
  546. 0 = dF = dF dS
  547. dw dS dw
  548. What follows is a fair amount of algebra. Do the math and you'll find
  549. the following optimal update rule:
  550. (c'c)
  551. x := x - --------- c
  552. (Ac)'(Ac)
  553. where c = A'r = A'(Ax - b)
  554. */
  555. // gradient descent: super lightweight implementation
  556. const r = new Array(3*n) as number[];
  557. const c = new Array(3) as number[];
  558. const Mc = new Array(3*n) as number[];
  559. // initial guess
  560. const t = new Array(3) as number[];
  561. t[0] = t0[0];
  562. t[1] = t0[1];
  563. t[2] = t0[2];
  564. // iterate
  565. const MAX_ITERATIONS = 15;
  566. const TOLERANCE = 1;
  567. for(let it = 0; it < MAX_ITERATIONS; it++) {
  568. //console.log("it",it+1);
  569. // compute residual r = Mt - v
  570. for(let i = 0; i < n3; i++) {
  571. r[i] = 0;
  572. for(let j = 0; j < 3; j++)
  573. r[i] += m[j*n3 + i] * t[j];
  574. r[i] -= v[i];
  575. }
  576. // compute c = M'r
  577. for(let i = 0; i < 3; i++) {
  578. c[i] = 0;
  579. for(let j = 0; j < n3; j++)
  580. c[i] += m[i*n3 + j] * r[j];
  581. }
  582. // compute Mc
  583. for(let i = 0; i < n3; i++) {
  584. Mc[i] = 0;
  585. for(let j = 0; j < 3; j++)
  586. Mc[i] += m[j*n3 + i] * c[j];
  587. }
  588. // compute c'c
  589. let num = 0;
  590. for(let i = 0; i < 3; i++)
  591. num += c[i] * c[i];
  592. //console.log("c'c=",num);
  593. if(num < TOLERANCE)
  594. break;
  595. // compute (Mc)'(Mc)
  596. let den = 0;
  597. for(let i = 0; i < n3; i++)
  598. den += Mc[i] * Mc[i];
  599. // compute frc = c'c / (Mc)'(Mc)
  600. const frc = num / den;
  601. if(Number.isNaN(frc)) // this shouldn't happen
  602. break;
  603. // iterate: t = t - frc * c
  604. for(let i = 0; i < 3; i++)
  605. t[i] -= frc * c[i];
  606. }
  607. //console.log("OLD t:\n\n",t0.join('\n'));
  608. //console.log("new t:\n\n",t.join('\n'));
  609. // done!
  610. return t;
  611. }
  612. /**
  613. * Apply a smoothing filter to the partial pose
  614. * @param partialPose 3x3 [ r1 | r2 | t ]
  615. * @returns filtered partial pose
  616. */
  617. private _filterPartialPose(partialPose: SpeedyMatrix): SpeedyMatrix
  618. {
  619. const avg: number[] = new Array(9).fill(0);
  620. const entries = partialPose.read();
  621. const rotationBlock = entries.slice(0, 6);
  622. const translationBlock = entries.slice(6, 9);
  623. // how many samples should we store, at most?
  624. const div = (Settings.powerPreference == 'low-power') ? 1.5 : 1; // low-power ~ half the fps
  625. const N = Math.ceil(ROTATION_FILTER_SAMPLES / div);
  626. const M = Math.ceil(TRANSLATION_FILTER_SAMPLES / div);
  627. // is it a valid partial pose?
  628. if(!Number.isNaN(entries[0])) {
  629. // store samples
  630. this._partialRotationBuffer.unshift(rotationBlock);
  631. if(this._partialRotationBuffer.length > N)
  632. this._partialRotationBuffer.length = N;
  633. this._translationBuffer.unshift(translationBlock);
  634. if(this._translationBuffer.length > M)
  635. this._translationBuffer.length = M;
  636. }
  637. else if(this._partialRotationBuffer.length == 0) {
  638. // invalid pose, no samples
  639. return Speedy.Matrix.Eye(3);
  640. }
  641. // average *nearby* rotations
  642. const n = this._partialRotationBuffer.length;
  643. for(let i = 0; i < n; i++) {
  644. const r = this._partialRotationBuffer[i];
  645. for(let j = 0; j < 6; j++)
  646. avg[j] += r[j] / n;
  647. }
  648. const r = this._refineRotation(avg);
  649. // average translations
  650. const m = this._translationBuffer.length;
  651. for(let i = 0; i < m; i++) {
  652. const t = this._translationBuffer[i];
  653. for(let j = 0; j < 3; j++)
  654. avg[6 + j] += (m - i) * t[j] / ((m * m + m) / 2);
  655. //avg[6 + j] += t[j] / m;
  656. }
  657. const t = [ avg[6], avg[7], avg[8] ];
  658. // done!
  659. return Speedy.Matrix(3, 3, r.concat(t));
  660. }
  661. /**
  662. * Estimate extrinsics [ R | t ] given a partial pose [ r1 | r2 | t ]
  663. * @param partialPose
  664. * @returns 3x4 matrix
  665. */
  666. private _estimateFullPose(partialPose: SpeedyMatrix): SpeedyMatrix
  667. {
  668. const p = partialPose.read();
  669. const r11 = p[0], r12 = p[3], t1 = p[6];
  670. const r21 = p[1], r22 = p[4], t2 = p[7];
  671. const r31 = p[2], r32 = p[5], t3 = p[8];
  672. // r3 = +- ( r1 x r2 )
  673. let r13 = r21 * r32 - r31 * r22;
  674. let r23 = r31 * r12 - r11 * r32;
  675. let r33 = r11 * r22 - r21 * r12;
  676. // let's make sure that det R = +1 (keep the orientation)
  677. const det = r11 * (r22 * r33 - r23 * r32) - r21 * (r12 * r33 - r13 * r32) + r31 * (r12 * r23 - r13 * r22);
  678. if(det < 0) {
  679. r13 = -r13;
  680. r23 = -r23;
  681. r33 = -r33;
  682. }
  683. // done!
  684. return Speedy.Matrix(3, 4, [
  685. r11, r21, r31,
  686. r12, r22, r32,
  687. r13, r23, r33,
  688. t1, t2, t3,
  689. ]);
  690. }
  691. /**
  692. * Estimate the pose [ R | t ] given a homography in AR screen space
  693. * @param homography must be valid
  694. * @returns 3x4 matrix
  695. */
  696. private _estimatePose(homography: SpeedyMatrix): SpeedyMatrix
  697. {
  698. const normalizedHomography = this._normalizeHomography(homography);
  699. const partialPose = Speedy.Matrix.Eye(3);
  700. // we want the estimated partial pose [ r1 | r2 | t ] to be as close
  701. // as possible to the normalized homography, up to a scale factor;
  702. // i.e., H * [ r1 | r2 | t ]^(-1) = s * I for a non-zero scalar s
  703. // it won't be a perfect equality due to noise in the homography.
  704. // remark: composition of homographies
  705. const residual = Speedy.Matrix(normalizedHomography);
  706. for(let k = 0; k < POSE_ITERATIONS; k++) {
  707. // incrementally improve the partial pose
  708. const rt = this._estimatePartialPose(residual); // rt should converge to the identity matrix
  709. partialPose.setToSync(rt.times(partialPose));
  710. residual.setToSync(residual.times(rt.inverse()));
  711. //console.log("rt",rt.toString());
  712. //console.log("residual",residual.toString());
  713. }
  714. //console.log('-----------');
  715. // refine the translation vector
  716. const mat = partialPose.read();
  717. const r = mat.slice(0, 6);
  718. const t0 = mat.slice(6, 9);
  719. const t = this._refineTranslation(normalizedHomography, r, t0);
  720. const refinedPartialPose = Speedy.Matrix(3, 3, r.concat(t));
  721. // filter the partial pose
  722. const filteredPartialPose = this._filterPartialPose(refinedPartialPose);
  723. // estimate the full pose
  724. //const finalPartialPose = partialPose;
  725. const finalPartialPose = filteredPartialPose;
  726. return this._estimateFullPose(finalPartialPose);
  727. }
  728. }