123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544 |
- /*
- * encantar.js
- * GPU-accelerated Augmented Reality for the web
- * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
- *
- * image-tracker-utils.ts
- * Image Tracker: Utilities
- */
-
- import Speedy from 'speedy-vision';
- import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
- import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
- import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
- import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
- import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
- import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
- import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
- import { ReferenceImageWithMedia } from './reference-image';
- import { Utils } from '../../utils/utils';
- import { IllegalOperationError, IllegalArgumentError, NumericalError } from '../../utils/errors';
- import { NIS_SIZE, TRACK_GRID_GRANULARITY } from './settings';
- import { find6DoFHomography, solvePlanarPnPRansacOptions } from '../../geometry/pnp';
-
- /*
-
- Definitions:
- ------------
-
- 1. Raster space:
- an image space whose top-left coordinate is (0,0) and whose bottom-right
- coordinate is (w-1,h-1), where (w,h) is its size. The y-axis points down.
-
- 2. AR screen size:
- size in pixels used for image processing operations. It's determined by the
- resolution of the tracker and by the aspect ratio of the input media.
-
- 3. AR screen space (screen):
- a raster space whose size is the AR screen size.
-
- 4. Normalized Image Space (NIS):
- a raster space whose size is N x N, where N = NIS_SIZE.
-
- 5. Normalized Device Coordinates (NDC):
- the normalized 2D space [-1,1]x[-1,1]. The origin is at the center and the
- y-axis points up.
-
- */
-
- /** An ordered pair [src, dest] of keypoints */
- export type ImageTrackerKeypointPair = [ Readonly<SpeedyKeypoint>, Readonly<SpeedyKeypoint> ];
-
- /**
- * Utilities for the Image Tracker
- */
- export class ImageTrackerUtils
- {
- /**
- * Find a transformation that converts a raster space to NIS
- * @param size size of the raster space
- * @returns a 3x3 matrix
- */
- static rasterToNIS(size: SpeedySize): SpeedyMatrix
- {
- const sx = NIS_SIZE / size.width;
- const sy = NIS_SIZE / size.height;
-
- return Speedy.Matrix(3, 3, [
- sx, 0, 0,
- 0, sy, 0,
- 0, 0, 1
- ]);
- }
-
- /**
- * Find a transformation that converts a raster space to NDC
- * @param size size of the raster space
- * @returns a 3x3 matrix
- */
- static rasterToNDC(size: SpeedySize): SpeedyMatrix
- {
- const w = size.width, h = size.height;
-
- return Speedy.Matrix(3, 3, [
- 2/w, 0, 0,
- 0, -2/h, 0,
- -1, 1, 1
- ]);
- }
-
- /**
- * Find a transformation that converts NDC to a raster space
- * @param size size of the raster space
- * @returns a 3x3 matrix
- */
- static NDCToRaster(size: SpeedySize): SpeedyMatrix
- {
- const w = size.width, h = size.height;
-
- return Speedy.Matrix(3, 3, [
- w/2, 0, 0,
- 0, -h/2, 0,
- w/2, h/2, 1
- ]);
- }
-
- /**
- * Find a transformation that scales points in NDC
- * @param sx horizontal scale factor
- * @param sy vertical scale factor
- * @returns a 3x3 matrix
- */
- static scaleNDC(sx: number, sy: number = sx): SpeedyMatrix
- {
- // In NDC, the origin is at the center of the space!
- return Speedy.Matrix(3, 3, [
- sx, 0, 0,
- 0, sy, 0,
- 0, 0, 1
- ]);
- }
-
- /**
- * Find a scale transformation in NDC such that the output has a desired aspect ratio
- * @param aspectRatio desired aspect ratio
- * @param scale optional scale factor in both axes
- * @returns a 3x3 matrix
- */
- static bestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
- {
- if(aspectRatio >= 1)
- return this.scaleNDC(scale, scale / aspectRatio); // s/(s/a) = a, sx >= sy
- else
- return this.scaleNDC(scale * aspectRatio, scale); // (s*a)/s = a, sx < sy
- }
-
- /**
- * Find the inverse matrix of bestFitScaleNDC()
- * @param aspectRatio as given to bestFitScaleNDC()
- * @param scale optional, as given to bestFitScaleNDC()
- * @returns a 3x3 matrix
- */
- static inverseBestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
- {
- if(aspectRatio >= 1)
- return this.scaleNDC(1 / scale, aspectRatio / scale);
- else
- return this.scaleNDC(1 / (scale * aspectRatio), 1 / scale);
- }
-
- /**
- * Find the best-fit aspect ratio for the rectification of the reference image in NDC
- * @param screenSize
- * @param referenceImage
- * @returns a best-fit aspect ratio
- */
- static bestFitAspectRatioNDC(screenSize: SpeedySize, referenceImage: ReferenceImageWithMedia): number
- {
- /*
-
- The best-fit aspectRatio (a) is constructed as follows:
-
- 1) a fully stretched(*) and distorted reference image in NDC:
- a = 1
-
- 2) a square in NDC:
- a = 1 / screenAspectRatio
-
- 3) an image with the aspect ratio of the reference image in NDC:
- a = referenceImageAspectRatio * (1 / screenAspectRatio)
-
- (*) AR screen space
-
- By transforming the reference image twice, first by converting it to AR
- screen space, and then by rectifying it, we lose a little bit of quality.
- Nothing to be too concerned about, though?
-
- */
-
- const screenAspectRatio = screenSize.width / screenSize.height;
- return referenceImage.aspectRatio / screenAspectRatio;
- }
-
- /**
- * Given n > 0 pairs (src_i, dest_i) of keypoints in NIS,
- * convert them to NDC and output a 2 x 2n matrix of the form:
- * [ src_0.x src_1.x ... | dest_0.x dest_1.x ... ]
- * [ src_0.y src_1.y ... | dest_0.y dest_1.y ... ]
- * @param pairs pairs of keypoints in NIS
- * @returns 2 x 2n matrix with two 2 x n blocks: [ src | dest ]
- * @throws
- */
- static compilePairsOfKeypointsNDC(pairs: ImageTrackerKeypointPair[]): SpeedyMatrix
- {
- const n = pairs.length;
-
- if(n == 0)
- throw new IllegalArgumentError();
-
- const scale = 2 / NIS_SIZE;
- const data = new Array<number>(2 * 2*n);
-
- for(let i = 0, j = 0, k = 2*n; i < n; i++, j += 2, k += 2) {
- const src = pairs[i][0];
- const dest = pairs[i][1];
-
- data[j] = src.x * scale - 1; // convert from NIS to NDC
- data[j+1] = 1 - src.y * scale; // flip y-axis
-
- data[k] = dest.x * scale - 1;
- data[k+1] = 1 - dest.y * scale;
- }
-
- return Speedy.Matrix(2, 2*n, data);
- }
-
- /**
- * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
- * find a 6 DoF perspective warp (homography) from src to dest in NDC
- * @param cameraIntrinsics 3x3 camera intrinsics
- * @param points compiled pairs of keypoints in NDC
- * @param options to be passed to pnp
- * @returns a pair [ 3x3 transformation matrix, quality score ]
- */
- static find6DoFHomographyNDC(cameraIntrinsics: SpeedyMatrix, points: SpeedyMatrix, options: solvePlanarPnPRansacOptions): SpeedyPromise<[SpeedyMatrix,number]>
- {
- // too few data points?
- const n = points.columns / 2;
- if(n < 4) {
- return Speedy.Promise.reject(
- new IllegalArgumentError(`Too few data points to compute a perspective warp`)
- );
- }
-
- // compute a homography
- const src = points.block(0, 1, 0, n-1);
- const dest = points.block(0, 1, n, 2*n-1);
- const homography = find6DoFHomography(src, dest, cameraIntrinsics, options);
- //console.log('homography',homography.toString(), src.toString(), dest.toString());
-
- // quit without refinement (test)
- // we use a coarse estimate of the camera intrinsics
- //return Speedy.Promise.resolve([homography, 0]);
-
- const mask = Speedy.Matrix.Zeros(1, n);
- const intermediate = Speedy.Matrix.Zeros(2, n);
-
- // refine the result of find6DoFHomography() with DLT + RANSAC
- return Speedy.Matrix.applyPerspectiveTransform(intermediate, src, homography)
- .then(intermediate =>
- Speedy.Matrix.findHomography(
- Speedy.Matrix.Zeros(3),
- intermediate,
- dest,
- {
- method: 'pransac',
- numberOfHypotheses: 512, // XXX we can reduce this number without compromising quality
- bundleSize: 128, // maybe make it a parameter in case we need an extra performance boost?
- reprojectionError: options.reprojectionError,
- mask,
- }
- )
- )
- .then(adjustment => adjustment.setTo(adjustment.times(homography)))
- .then(newHomography => {
-
- // count inliers
- let m = 0;
- const inliers = mask.read();
- for(let i = 0; i < n; i++)
- m += inliers[i];
-
- /*
- // count and collect inliers
- let m = 0;
- const _mask = mask.read(), _src = src.read(), _dest = dest.read();
- const _isrc = new Array<number>(2*n), _idest = new Array<number>(2*n);
- for(let i = 0; i < n; i++) {
- if(_mask[i]) {
- const j = m++;
- _isrc[2*j] = _src[2*i];
- _isrc[2*j+1] = _src[2*i+1];
- _idest[2*j] = _dest[2*i];
- _idest[2*j+1] = _dest[2*i+1];
- }
- }
- _isrc.length = _idest.length = 2*m;
-
- // refine homography
- if(m > 0) {
- const isrc = Speedy.Matrix(2, m, _isrc);
- const idest = Speedy.Matrix(2, m, _idest);
- newHomography = refineHomography(newHomography, isrc, idest);
- }
- */
-
- // done!
- const score = m / n;
- return [newHomography, score];
-
- });
- }
-
- /**
- * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
- * find a perspective warp (homography) from src to dest in NDC
- * @param points compiled pairs of keypoints in NDC
- * @param options to be passed to speedy-vision
- * @returns a pair [ 3x3 transformation matrix, quality score ]
- */
- static findPerspectiveWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
- {
- // too few data points?
- const n = points.columns / 2;
- if(n < 4) {
- return Speedy.Promise.reject(
- new IllegalArgumentError(`Too few data points to compute a perspective warp`)
- );
- }
-
- // compute a homography
- const src = points.block(0, 1, 0, n-1);
- const dest = points.block(0, 1, n, 2*n-1);
- const mask = Speedy.Matrix.Zeros(1, n);
-
- return Speedy.Matrix.findHomography(
- Speedy.Matrix.Zeros(3),
- src,
- dest,
- Object.assign({ mask }, options)
- ).then(homography => {
-
- // check if this is a valid warp
- const a00 = homography.at(0,0);
- if(Number.isNaN(a00))
- throw new NumericalError(`Can't compute a perspective warp: bad keypoints`);
-
- // count the number of inliers
- const inliers = mask.read();
- let inlierCount = 0;
- for(let i = inliers.length - 1; i >= 0; i--)
- inlierCount += inliers[i];
- const score = inlierCount / inliers.length;
-
- // done!
- return [ homography, score ];
-
- });
- }
-
- /**
- * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
- * find an affine warp from src to dest in NDC. The affine warp is given as
- * a 3x3 matrix whose last row is [0 0 1]
- * @param points compiled pairs of keypoints in NDC
- * @param options to be passed to speedy-vision
- * @returns a pair [ 3x3 transformation matrix, quality score ]
- */
- static findAffineWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
- {
- // too few data points?
- const n = points.columns / 2;
- if(n < 3) {
- return Speedy.Promise.reject(
- new IllegalArgumentError(`Too few data points to compute an affine warp`)
- );
- }
-
- // compute an affine transformation
- const model = Speedy.Matrix.Eye(3);
- const src = points.block(0, 1, 0, n-1);
- const dest = points.block(0, 1, n, 2*n-1);
- const mask = Speedy.Matrix.Zeros(1, n);
-
- return Speedy.Matrix.findAffineTransform(
- model.block(0, 1, 0, 2), // 2x3 submatrix
- src,
- dest,
- Object.assign({ mask }, options)
- ).then(_ => {
-
- // check if this is a valid warp
- const a00 = model.at(0,0);
- if(Number.isNaN(a00))
- throw new NumericalError(`Can't compute an affine warp: bad keypoints`);
-
- // count the number of inliers
- const inliers = mask.read();
- let inlierCount = 0;
- for(let i = inliers.length - 1; i >= 0; i--)
- inlierCount += inliers[i];
- const score = inlierCount / inliers.length;
-
- // done!
- return [ model, score ];
-
- });
- }
-
- /**
- * Find a polyline in Normalized Device Coordinates (NDC)
- * @param homography maps the corners of NDC to a quadrilateral in NDC
- * @returns 4 points in NDC
- */
- static findPolylineNDC(homography: SpeedyMatrix): SpeedyPoint2[]
- {
- const h = homography.read();
- const uv = [ -1, +1, -1, -1, +1, -1, +1, +1 ]; // the corners of a reference image in NDC
- const polyline = new Array<SpeedyPoint2>(4);
-
- for(let i = 0, j = 0; i < 4; i++, j += 2) {
- const u = uv[j], v = uv[j+1];
-
- const x = h[0]*u + h[3]*v + h[6];
- const y = h[1]*u + h[4]*v + h[7];
- const w = h[2]*u + h[5]*v + h[8];
-
- polyline[i] = Speedy.Point2(x/w, y/w);
- }
-
- return polyline;
- }
-
- /**
- * Find a better spatial distribution of the input matches
- * @param pairs in the [src, dest] format
- * @returns refined pairs of quality matches
- */
- static refineMatchingPairs(pairs: ImageTrackerKeypointPair[]): ImageTrackerKeypointPair[]
- {
- // collect all keypoints obtained in this frame
- const m = pairs.length;
- const destKeypoints = new Array<SpeedyKeypoint>(m);
-
- for(let j = 0; j < m; j++)
- destKeypoints[j] = pairs[j][1];
-
- // find a better spatial distribution of the keypoints
- const indices = this._distributeKeypoints(destKeypoints);
-
- // assemble output
- const n = indices.length; // number of refined matches
- const result = new Array<ImageTrackerKeypointPair>(n);
-
- for(let i = 0; i < n; i++)
- result[i] = pairs[indices[i]];
-
- // done!
- return result;
- }
-
- /**
- * Spatially distribute keypoints over a grid
- * @param keypoints keypoints to be distributed
- * @returns a list of indices of keypoints[]
- */
- private static _distributeKeypoints(keypoints: SpeedyKeypoint[]): number[]
- {
- // create a grid
- const gridCells = TRACK_GRID_GRANULARITY; // number of grid elements in each axis
- const numberOfCells = gridCells * gridCells;
- const n = keypoints.length;
-
- // get the coordinates of the keypoints
- const points: number[] = new Array(2 * n);
- for(let i = 0, j = 0; i < n; i++, j += 2) {
- points[j] = keypoints[i].x;
- points[j+1] = keypoints[i].y;
- }
-
- // normalize the coordinates to [0,1) x [0,1)
- this._normalizePoints(points);
-
- // distribute the keypoints over the grid
- const grid = new Array<number>(numberOfCells).fill(-1);
- for(let i = 0, j = 0; i < n; i++, j += 2) {
- // find the grid location of the i-th point
- const xg = Math.floor(points[j] * gridCells); // 0 <= xg,yg < gridCells
- const yg = Math.floor(points[j+1] * gridCells);
-
- // store the index of the i-th point in the grid
- const k = yg * gridCells + xg;
- if(grid[k] < 0)
- grid[k] = i;
- }
-
- // retrieve points of the grid
- let m = 0;
- const indices = new Array<number>(numberOfCells);
- for(let g = 0; g < numberOfCells; g++) {
- if(grid[g] >= 0)
- indices[m++] = grid[g];
- }
- indices.length = m;
-
- // done!
- return indices;
- }
-
- /**
- * Normalize points to [0,1)^2
- * @param points 2 x n matrix of points in column-major format
- * @returns points
- */
- private static _normalizePoints(points: number[]): number[]
- {
- Utils.assert(points.length % 2 == 0);
-
- const n = points.length / 2;
- if(n == 0)
- return points;
-
- let xmin = Number.POSITIVE_INFINITY, xmax = Number.NEGATIVE_INFINITY;
- let ymin = Number.POSITIVE_INFINITY, ymax = Number.NEGATIVE_INFINITY;
- for(let i = 0, j = 0; i < n; i++, j += 2) {
- const x = points[j], y = points[j+1];
- xmin = x < xmin ? x : xmin;
- ymin = y < ymin ? y : ymin;
- xmax = x > xmax ? x : xmax;
- ymax = y > ymax ? y : ymax;
- }
-
- const xlen = xmax - xmin + 1; // +1 is a correction factor, so that 0 <= x,y < 1
- const ylen = ymax - ymin + 1;
- for(let i = 0, j = 0; i < n; i++, j += 2) {
- points[j] = (points[j] - xmin) / xlen;
- points[j+1] = (points[j+1] - ymin) / ylen;
- }
-
- return points;
- }
- }
|