Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

image-tracker-utils.ts 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. /*
  2. * encantar.js
  3. * GPU-accelerated Augmented Reality for the web
  4. * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
  5. *
  6. * This program is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published
  8. * by the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. *
  19. * image-tracker-utils.ts
  20. * Image Tracker: Utilities
  21. */
  22. import Speedy from 'speedy-vision';
  23. import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
  24. import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
  25. import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
  26. import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
  27. import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
  28. import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
  29. import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
  30. import { ReferenceImageWithMedia } from './reference-image';
  31. import { Utils } from '../../utils/utils';
  32. import { IllegalOperationError, IllegalArgumentError, NumericalError } from '../../utils/errors';
  33. import { NIS_SIZE, TRACK_GRID_GRANULARITY } from './settings';
  34. import { find6DoFHomography, solvePlanarPnPRansacOptions } from '../../geometry/pnp';
  35. /*
  36. Definitions:
  37. ------------
  38. 1. Raster space:
  39. an image space whose top-left coordinate is (0,0) and whose bottom-right
  40. coordinate is (w-1,h-1), where (w,h) is its size. The y-axis points down.
  41. 2. AR screen size:
  42. size in pixels used for image processing operations. It's determined by the
  43. resolution of the tracker and by the aspect ratio of the input media.
  44. 3. AR screen space (screen):
  45. a raster space whose size is the AR screen size.
  46. 4. Normalized Image Space (NIS):
  47. a raster space whose size is N x N, where N = NIS_SIZE.
  48. 5. Normalized Device Coordinates (NDC):
  49. the normalized 2D space [-1,1]x[-1,1]. The origin is at the center and the
  50. y-axis points up.
  51. */
  52. /** An ordered pair [src, dest] of keypoints */
  53. export type ImageTrackerKeypointPair = [ Readonly<SpeedyKeypoint>, Readonly<SpeedyKeypoint> ];
  54. /**
  55. * Utilities for the Image Tracker
  56. */
  57. export class ImageTrackerUtils
  58. {
  59. /**
  60. * Find a transformation that converts a raster space to NIS
  61. * @param size size of the raster space
  62. * @returns a 3x3 matrix
  63. */
  64. static rasterToNIS(size: SpeedySize): SpeedyMatrix
  65. {
  66. const sx = NIS_SIZE / size.width;
  67. const sy = NIS_SIZE / size.height;
  68. return Speedy.Matrix(3, 3, [
  69. sx, 0, 0,
  70. 0, sy, 0,
  71. 0, 0, 1
  72. ]);
  73. }
  74. /**
  75. * Find a transformation that converts a raster space to NDC
  76. * @param size size of the raster space
  77. * @returns a 3x3 matrix
  78. */
  79. static rasterToNDC(size: SpeedySize): SpeedyMatrix
  80. {
  81. const w = size.width, h = size.height;
  82. return Speedy.Matrix(3, 3, [
  83. 2/w, 0, 0,
  84. 0, -2/h, 0,
  85. -1, 1, 1
  86. ]);
  87. }
  88. /**
  89. * Find a transformation that converts NDC to a raster space
  90. * @param size size of the raster space
  91. * @returns a 3x3 matrix
  92. */
  93. static NDCToRaster(size: SpeedySize): SpeedyMatrix
  94. {
  95. const w = size.width, h = size.height;
  96. return Speedy.Matrix(3, 3, [
  97. w/2, 0, 0,
  98. 0, -h/2, 0,
  99. w/2, h/2, 1
  100. ]);
  101. }
  102. /**
  103. * Find a transformation that scales points in NDC
  104. * @param sx horizontal scale factor
  105. * @param sy vertical scale factor
  106. * @returns a 3x3 matrix
  107. */
  108. static scaleNDC(sx: number, sy: number = sx): SpeedyMatrix
  109. {
  110. // In NDC, the origin is at the center of the space!
  111. return Speedy.Matrix(3, 3, [
  112. sx, 0, 0,
  113. 0, sy, 0,
  114. 0, 0, 1
  115. ]);
  116. }
  117. /**
  118. * Find a scale transformation in NDC such that the output has a desired aspect ratio
  119. * @param aspectRatio desired aspect ratio
  120. * @param scale optional scale factor in both axes
  121. * @returns a 3x3 matrix
  122. */
  123. static bestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
  124. {
  125. if(aspectRatio >= 1)
  126. return this.scaleNDC(scale, scale / aspectRatio); // s/(s/a) = a, sx >= sy
  127. else
  128. return this.scaleNDC(scale * aspectRatio, scale); // (s*a)/s = a, sx < sy
  129. }
  130. /**
  131. * Find the inverse matrix of bestFitScaleNDC()
  132. * @param aspectRatio as given to bestFitScaleNDC()
  133. * @param scale optional, as given to bestFitScaleNDC()
  134. * @returns a 3x3 matrix
  135. */
  136. static inverseBestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
  137. {
  138. if(aspectRatio >= 1)
  139. return this.scaleNDC(1 / scale, aspectRatio / scale);
  140. else
  141. return this.scaleNDC(1 / (scale * aspectRatio), 1 / scale);
  142. }
  143. /**
  144. * Find the best-fit aspect ratio for the rectification of the reference image in NDC
  145. * @param screenSize
  146. * @param referenceImage
  147. * @returns a best-fit aspect ratio
  148. */
  149. static bestFitAspectRatioNDC(screenSize: SpeedySize, referenceImage: ReferenceImageWithMedia): number
  150. {
  151. /*
  152. The best-fit aspectRatio (a) is constructed as follows:
  153. 1) a fully stretched(*) and distorted reference image in NDC:
  154. a = 1
  155. 2) a square in NDC:
  156. a = 1 / screenAspectRatio
  157. 3) an image with the aspect ratio of the reference image in NDC:
  158. a = referenceImageAspectRatio * (1 / screenAspectRatio)
  159. (*) AR screen space
  160. By transforming the reference image twice, first by converting it to AR
  161. screen space, and then by rectifying it, we lose a little bit of quality.
  162. Nothing to be too concerned about, though?
  163. */
  164. const screenAspectRatio = screenSize.width / screenSize.height;
  165. return referenceImage.aspectRatio / screenAspectRatio;
  166. }
  167. /**
  168. * Given n > 0 pairs (src_i, dest_i) of keypoints in NIS,
  169. * convert them to NDC and output a 2 x 2n matrix of the form:
  170. * [ src_0.x src_1.x ... | dest_0.x dest_1.x ... ]
  171. * [ src_0.y src_1.y ... | dest_0.y dest_1.y ... ]
  172. * @param pairs pairs of keypoints in NIS
  173. * @returns 2 x 2n matrix with two 2 x n blocks: [ src | dest ]
  174. * @throws
  175. */
  176. static compilePairsOfKeypointsNDC(pairs: ImageTrackerKeypointPair[]): SpeedyMatrix
  177. {
  178. const n = pairs.length;
  179. if(n == 0)
  180. throw new IllegalArgumentError();
  181. const scale = 2 / NIS_SIZE;
  182. const data = new Array<number>(2 * 2*n);
  183. for(let i = 0, j = 0, k = 2*n; i < n; i++, j += 2, k += 2) {
  184. const src = pairs[i][0];
  185. const dest = pairs[i][1];
  186. data[j] = src.x * scale - 1; // convert from NIS to NDC
  187. data[j+1] = 1 - src.y * scale; // flip y-axis
  188. data[k] = dest.x * scale - 1;
  189. data[k+1] = 1 - dest.y * scale;
  190. }
  191. return Speedy.Matrix(2, 2*n, data);
  192. }
  193. /**
  194. * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
  195. * find a 6 DoF perspective warp (homography) from src to dest in NDC
  196. * @param cameraIntrinsics 3x3 camera intrinsics
  197. * @param points compiled pairs of keypoints in NDC
  198. * @param options to be passed to pnp
  199. * @returns a pair [ 3x3 transformation matrix, quality score ]
  200. */
  201. static find6DoFHomographyNDC(cameraIntrinsics: SpeedyMatrix, points: SpeedyMatrix, options: solvePlanarPnPRansacOptions): SpeedyPromise<[SpeedyMatrix,number]>
  202. {
  203. // too few data points?
  204. const n = points.columns / 2;
  205. if(n < 4) {
  206. return Speedy.Promise.reject(
  207. new IllegalArgumentError(`Too few data points to compute a perspective warp`)
  208. );
  209. }
  210. // compute a homography
  211. const src = points.block(0, 1, 0, n-1);
  212. const dest = points.block(0, 1, n, 2*n-1);
  213. const homography = find6DoFHomography(src, dest, cameraIntrinsics, options);
  214. //console.log('homography',homography.toString(), src.toString(), dest.toString());
  215. // quit without refinement (test)
  216. // we use a coarse estimate of the camera intrinsics
  217. //return Speedy.Promise.resolve([homography, 0]);
  218. const mask = Speedy.Matrix.Zeros(1, n);
  219. const intermediate = Speedy.Matrix.Zeros(2, n);
  220. // refine the result of find6DoFHomography() with DLT + RANSAC
  221. return Speedy.Matrix.applyPerspectiveTransform(intermediate, src, homography)
  222. .then(intermediate =>
  223. Speedy.Matrix.findHomography(
  224. Speedy.Matrix.Zeros(3),
  225. intermediate,
  226. dest,
  227. {
  228. method: 'pransac',
  229. numberOfHypotheses: 512, // XXX we can reduce this number without compromising quality
  230. bundleSize: 128, // maybe make it a parameter in case we need an extra performance boost?
  231. reprojectionError: options.reprojectionError,
  232. mask,
  233. }
  234. )
  235. )
  236. .then(adjustment => adjustment.setTo(adjustment.times(homography)))
  237. .then(newHomography => {
  238. // count inliers
  239. let m = 0;
  240. const inliers = mask.read();
  241. for(let i = 0; i < n; i++)
  242. m += inliers[i];
  243. /*
  244. // count and collect inliers
  245. let m = 0;
  246. const _mask = mask.read(), _src = src.read(), _dest = dest.read();
  247. const _isrc = new Array<number>(2*n), _idest = new Array<number>(2*n);
  248. for(let i = 0; i < n; i++) {
  249. if(_mask[i]) {
  250. const j = m++;
  251. _isrc[2*j] = _src[2*i];
  252. _isrc[2*j+1] = _src[2*i+1];
  253. _idest[2*j] = _dest[2*i];
  254. _idest[2*j+1] = _dest[2*i+1];
  255. }
  256. }
  257. _isrc.length = _idest.length = 2*m;
  258. // refine homography
  259. if(m > 0) {
  260. const isrc = Speedy.Matrix(2, m, _isrc);
  261. const idest = Speedy.Matrix(2, m, _idest);
  262. newHomography = refineHomography(newHomography, isrc, idest);
  263. }
  264. */
  265. // done!
  266. const score = m / n;
  267. return [newHomography, score];
  268. });
  269. }
  270. /**
  271. * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
  272. * find a perspective warp (homography) from src to dest in NDC
  273. * @param points compiled pairs of keypoints in NDC
  274. * @param options to be passed to speedy-vision
  275. * @returns a pair [ 3x3 transformation matrix, quality score ]
  276. */
  277. static findPerspectiveWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
  278. {
  279. // too few data points?
  280. const n = points.columns / 2;
  281. if(n < 4) {
  282. return Speedy.Promise.reject(
  283. new IllegalArgumentError(`Too few data points to compute a perspective warp`)
  284. );
  285. }
  286. // compute a homography
  287. const src = points.block(0, 1, 0, n-1);
  288. const dest = points.block(0, 1, n, 2*n-1);
  289. const mask = Speedy.Matrix.Zeros(1, n);
  290. return Speedy.Matrix.findHomography(
  291. Speedy.Matrix.Zeros(3),
  292. src,
  293. dest,
  294. Object.assign({ mask }, options)
  295. ).then(homography => {
  296. // check if this is a valid warp
  297. const a00 = homography.at(0,0);
  298. if(Number.isNaN(a00))
  299. throw new NumericalError(`Can't compute a perspective warp: bad keypoints`);
  300. // count the number of inliers
  301. const inliers = mask.read();
  302. let inlierCount = 0;
  303. for(let i = inliers.length - 1; i >= 0; i--)
  304. inlierCount += inliers[i];
  305. const score = inlierCount / inliers.length;
  306. // done!
  307. return [ homography, score ];
  308. });
  309. }
  310. /**
  311. * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
  312. * find an affine warp from src to dest in NDC. The affine warp is given as
  313. * a 3x3 matrix whose last row is [0 0 1]
  314. * @param points compiled pairs of keypoints in NDC
  315. * @param options to be passed to speedy-vision
  316. * @returns a pair [ 3x3 transformation matrix, quality score ]
  317. */
  318. static findAffineWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
  319. {
  320. // too few data points?
  321. const n = points.columns / 2;
  322. if(n < 3) {
  323. return Speedy.Promise.reject(
  324. new IllegalArgumentError(`Too few data points to compute an affine warp`)
  325. );
  326. }
  327. // compute an affine transformation
  328. const model = Speedy.Matrix.Eye(3);
  329. const src = points.block(0, 1, 0, n-1);
  330. const dest = points.block(0, 1, n, 2*n-1);
  331. const mask = Speedy.Matrix.Zeros(1, n);
  332. return Speedy.Matrix.findAffineTransform(
  333. model.block(0, 1, 0, 2), // 2x3 submatrix
  334. src,
  335. dest,
  336. Object.assign({ mask }, options)
  337. ).then(_ => {
  338. // check if this is a valid warp
  339. const a00 = model.at(0,0);
  340. if(Number.isNaN(a00))
  341. throw new NumericalError(`Can't compute an affine warp: bad keypoints`);
  342. // count the number of inliers
  343. const inliers = mask.read();
  344. let inlierCount = 0;
  345. for(let i = inliers.length - 1; i >= 0; i--)
  346. inlierCount += inliers[i];
  347. const score = inlierCount / inliers.length;
  348. // done!
  349. return [ model, score ];
  350. });
  351. }
  352. /**
  353. * Find a polyline in Normalized Device Coordinates (NDC)
  354. * @param homography maps the corners of NDC to a quadrilateral in NDC
  355. * @returns 4 points in NDC
  356. */
  357. static findPolylineNDC(homography: SpeedyMatrix): SpeedyPoint2[]
  358. {
  359. const h = homography.read();
  360. const uv = [ -1, +1, -1, -1, +1, -1, +1, +1 ]; // the corners of a reference image in NDC
  361. const polyline = new Array<SpeedyPoint2>(4);
  362. for(let i = 0, j = 0; i < 4; i++, j += 2) {
  363. const u = uv[j], v = uv[j+1];
  364. const x = h[0]*u + h[3]*v + h[6];
  365. const y = h[1]*u + h[4]*v + h[7];
  366. const w = h[2]*u + h[5]*v + h[8];
  367. polyline[i] = Speedy.Point2(x/w, y/w);
  368. }
  369. return polyline;
  370. }
  371. /**
  372. * Find a better spatial distribution of the input matches
  373. * @param pairs in the [src, dest] format
  374. * @returns refined pairs of quality matches
  375. */
  376. static refineMatchingPairs(pairs: ImageTrackerKeypointPair[]): ImageTrackerKeypointPair[]
  377. {
  378. // collect all keypoints obtained in this frame
  379. const m = pairs.length;
  380. const destKeypoints = new Array<SpeedyKeypoint>(m);
  381. for(let j = 0; j < m; j++)
  382. destKeypoints[j] = pairs[j][1];
  383. // find a better spatial distribution of the keypoints
  384. const indices = this._distributeKeypoints(destKeypoints);
  385. // assemble output
  386. const n = indices.length; // number of refined matches
  387. const result = new Array<ImageTrackerKeypointPair>(n);
  388. for(let i = 0; i < n; i++)
  389. result[i] = pairs[indices[i]];
  390. // done!
  391. return result;
  392. }
  393. /**
  394. * Spatially distribute keypoints over a grid
  395. * @param keypoints keypoints to be distributed
  396. * @returns a list of indices of keypoints[]
  397. */
  398. private static _distributeKeypoints(keypoints: SpeedyKeypoint[]): number[]
  399. {
  400. // create a grid
  401. const gridCells = TRACK_GRID_GRANULARITY; // number of grid elements in each axis
  402. const numberOfCells = gridCells * gridCells;
  403. const n = keypoints.length;
  404. // get the coordinates of the keypoints
  405. const points: number[] = new Array(2 * n);
  406. for(let i = 0, j = 0; i < n; i++, j += 2) {
  407. points[j] = keypoints[i].x;
  408. points[j+1] = keypoints[i].y;
  409. }
  410. // normalize the coordinates to [0,1) x [0,1)
  411. this._normalizePoints(points);
  412. // distribute the keypoints over the grid
  413. const grid = new Array<number>(numberOfCells).fill(-1);
  414. for(let i = 0, j = 0; i < n; i++, j += 2) {
  415. // find the grid location of the i-th point
  416. const xg = Math.floor(points[j] * gridCells); // 0 <= xg,yg < gridCells
  417. const yg = Math.floor(points[j+1] * gridCells);
  418. // store the index of the i-th point in the grid
  419. const k = yg * gridCells + xg;
  420. if(grid[k] < 0)
  421. grid[k] = i;
  422. }
  423. // retrieve points of the grid
  424. let m = 0;
  425. const indices = new Array<number>(numberOfCells);
  426. for(let g = 0; g < numberOfCells; g++) {
  427. if(grid[g] >= 0)
  428. indices[m++] = grid[g];
  429. }
  430. indices.length = m;
  431. // done!
  432. return indices;
  433. }
  434. /**
  435. * Normalize points to [0,1)^2
  436. * @param points 2 x n matrix of points in column-major format
  437. * @returns points
  438. */
  439. private static _normalizePoints(points: number[]): number[]
  440. {
  441. Utils.assert(points.length % 2 == 0);
  442. const n = points.length / 2;
  443. if(n == 0)
  444. return points;
  445. let xmin = Number.POSITIVE_INFINITY, xmax = Number.NEGATIVE_INFINITY;
  446. let ymin = Number.POSITIVE_INFINITY, ymax = Number.NEGATIVE_INFINITY;
  447. for(let i = 0, j = 0; i < n; i++, j += 2) {
  448. const x = points[j], y = points[j+1];
  449. xmin = x < xmin ? x : xmin;
  450. ymin = y < ymin ? y : ymin;
  451. xmax = x > xmax ? x : xmax;
  452. ymax = y > ymax ? y : ymax;
  453. }
  454. const xlen = xmax - xmin + 1; // +1 is a correction factor, so that 0 <= x,y < 1
  455. const ylen = ymax - ymin + 1;
  456. for(let i = 0, j = 0; i < n; i++, j += 2) {
  457. points[j] = (points[j] - xmin) / xlen;
  458. points[j+1] = (points[j+1] - ymin) / ylen;
  459. }
  460. return points;
  461. }
  462. }