Sfoglia il codice sorgente

Merge branch 'ndc'

customisations
alemart 9 mesi fa
parent
commit
93b43c3180

+ 2
- 2
demos/hello-aframe/index.html Vedi File

@@ -86,7 +86,7 @@
86 86
             <ar-root reference-image="mage">
87 87
 
88 88
                 <!-- Switch from top view to front view -->
89
-                <a-entity rotation="-90 0 0" position="0 -0.5 0">
89
+                <a-entity rotation="-90 0 0" position="0 -0.8 0">
90 90
 
91 91
                     <!-- Light -->
92 92
                     <a-light type="ambient" intensity="1.5"></a-light>
@@ -109,7 +109,7 @@
109 109
 
110 110
             <!-- The cat -->
111 111
             <ar-root reference-image="cat">
112
-                <a-entity rotation="-90 0 0" position="0 -0.5 0">
112
+                <a-entity rotation="-90 0 0" position="0 -0.8 0">
113 113
                     <a-light type="ambient" intensity="1.5"></a-light>
114 114
 
115 115
                     <a-entity

+ 2
- 2
demos/hello-aframe/video.html Vedi File

@@ -86,7 +86,7 @@
86 86
             <ar-root reference-image="mage">
87 87
 
88 88
                 <!-- Switch from top view to front view -->
89
-                <a-entity rotation="-90 0 0" position="0 -0.5 0">
89
+                <a-entity rotation="-90 0 0" position="0 -0.8 0">
90 90
 
91 91
                     <!-- Light -->
92 92
                     <a-light type="ambient" intensity="1.5"></a-light>
@@ -109,7 +109,7 @@
109 109
 
110 110
             <!-- The cat -->
111 111
             <ar-root reference-image="cat">
112
-                <a-entity rotation="-90 0 0" position="0 -0.5 0">
112
+                <a-entity rotation="-90 0 0" position="0 -0.8 0">
113 113
                     <a-light type="ambient" intensity="1.5"></a-light>
114 114
 
115 115
                     <a-entity

+ 12
- 2
demos/hello-babylon/demo.js Vedi File

@@ -18,6 +18,7 @@ class EnchantedDemo extends ARDemo
18 18
         super();
19 19
 
20 20
         this._objects = { };
21
+        this._initialized = false;
21 22
     }
22 23
 
23 24
     /**
@@ -99,7 +100,7 @@ class EnchantedDemo extends ARDemo
99 100
         });
100 101
 
101 102
         // Change the point of view - slightly
102
-        ar.root.position.y = -0.5;
103
+        ar.root.position.y = -0.8;
103 104
 
104 105
         // Initialize objects
105 106
         this._initLight(ar);
@@ -110,6 +111,9 @@ class EnchantedDemo extends ARDemo
110 111
             this._initMage(ar),
111 112
             this._initCat(ar),
112 113
         ]);
114
+
115
+        // done!
116
+        this._initialized = true;
113 117
     }
114 118
 
115 119
     /**
@@ -130,7 +134,7 @@ class EnchantedDemo extends ARDemo
130 134
 
131 135
     _initLight(ar)
132 136
     {
133
-        const light = new BABYLON.HemisphericLight('light', BABYLON.Vector3.Down());
137
+        const light = new BABYLON.HemisphericLight('light', BABYLON.Vector3.Up());
134 138
         light.intensity = 1.0;
135 139
         light.diffuse.set(1, 1, 0.9);
136 140
         light.specular.set(0, 0, 0);
@@ -229,6 +233,12 @@ class EnchantedDemo extends ARDemo
229 233
 
230 234
     _onTargetFound(referenceImage)
231 235
     {
236
+        // make sure that the scene is initialized
237
+        if(!this._initialized) {
238
+            alert(`Target \"${referenceImage.name}\" was found, but the 3D scene is not yet initialized!`);
239
+            return;
240
+        }
241
+
232 242
         // change the scene based on the tracked image
233 243
         switch(referenceImage.name) {
234 244
             case 'mage':

+ 12
- 2
demos/hello-three/demo.js Vedi File

@@ -92,6 +92,7 @@ class EnchantedDemo extends ARDemo
92 92
         super();
93 93
 
94 94
         this._objects = { };
95
+        this._initialized = false;
95 96
     }
96 97
 
97 98
     /**
@@ -169,9 +170,9 @@ class EnchantedDemo extends ARDemo
169 170
         // ar.root, a node that is automatically aligned to the physical scene.
170 171
         // Adjusting ar.root will adjust all virtual objects.
171 172
         Utils.switchToFrontView(ar);
172
-        ar.root.position.set(0, -0.5, 0);
173
+        ar.root.position.set(0, -0.8, 0);
173 174
 
174
-        // initialize objects
175
+        // Initialize objects
175 176
         this._initLight(ar);
176 177
         this._initText(ar);
177 178
         this._initMagicCircle(ar);
@@ -180,6 +181,9 @@ class EnchantedDemo extends ARDemo
180 181
             this._initMage(ar),
181 182
             this._initCat(ar),
182 183
         ]);
184
+
185
+        // done!
186
+        this._initialized = true;
183 187
     }
184 188
 
185 189
     /**
@@ -298,6 +302,12 @@ class EnchantedDemo extends ARDemo
298 302
 
299 303
     _onTargetFound(referenceImage)
300 304
     {
305
+        // make sure that the scene is initialized
306
+        if(!this._initialized) {
307
+            alert(`Target \"${referenceImage.name}\" was found, but the 3D scene is not yet initialized!`);
308
+            return;
309
+        }
310
+
301 311
         // change the scene based on the tracked image
302 312
         switch(referenceImage.name) {
303 313
             case 'mage':

+ 4
- 2
docs/api/reference-image.md Vedi File

@@ -12,6 +12,8 @@ A name used to identify this reference image in a [database](reference-image-dat
12 12
 
13 13
 ### image
14 14
 
15
-`referenceImage.image: HTMLImageElement | HTMLCanvasElement | ImageBitmap, read-only`
15
+`referenceImage.image: HTMLImageElement | ImageBitmap | ImageData, read-only`
16 16
 
17
-Image template with pixel data.
17
+Image template with pixel data.
18
+
19
+*Note:* `ImageData` is acceptable since version 0.4.0.

+ 9
- 6
src/core/session.ts Vedi File

@@ -541,14 +541,14 @@ export class Session extends AREventTarget<SessionEventType>
541 541
         // render user media
542 542
         if(this._primarySource !== null) {
543 543
             const media = this._primarySource._internalMedia;
544
-            this._renderMedia(ctx, media);
544
+            this._renderMedia(ctx, media, true);
545 545
         }
546 546
 
547
-        // render output image(s)
547
+        // render output image(s) for debugging
548 548
         for(let i = 0; i < this._trackers.length; i++) {
549 549
             const media = this._trackers[i]._output.image;
550 550
             if(media !== undefined)
551
-                this._renderMedia(ctx, media);
551
+                this._renderMedia(ctx, media, false);
552 552
         }
553 553
 
554 554
         // render gizmos
@@ -559,18 +559,21 @@ export class Session extends AREventTarget<SessionEventType>
559 559
      * Render a SpeedyMedia
560 560
      * @param ctx rendering context
561 561
      * @param media
562
+     * @param stretch
562 563
      */
563
-    private _renderMedia(ctx: CanvasRenderingContext2D, media: SpeedyMedia): void
564
+    private _renderMedia(ctx: CanvasRenderingContext2D, media: SpeedyMedia, stretch: boolean): void
564 565
     {
565 566
         const canvas = ctx.canvas;
567
+        const width = stretch ? canvas.width : media.width;
568
+        const height = stretch ? canvas.height : media.height;
566 569
 
567 570
         if(media.type != 'data') {
568 571
             const image = media.source as Exclude<SpeedyMediaSourceNativeElement, ImageData>;
569
-            ctx.drawImage(image, 0, 0, canvas.width, canvas.height);
572
+            ctx.drawImage(image, 0, 0, width, height);
570 573
         }
571 574
         else {
572 575
             const image = media.source as ImageData;
573
-            ctx.putImageData(image, 0, 0, 0, 0, canvas.width, canvas.height);
576
+            ctx.putImageData(image, 0, 0, 0, 0, width, height);
574 577
         }
575 578
     }
576 579
 

+ 225
- 161
src/geometry/camera-model.ts Vedi File

@@ -29,13 +29,13 @@ import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
29 29
 import { Nullable, Utils } from '../utils/utils';
30 30
 import { Settings } from '../core/settings';
31 31
 import { PoseFilter } from './pose-filter';
32
-import { IllegalOperationError, IllegalArgumentError } from '../utils/errors';
32
+import { NumericalError } from '../utils/errors';
33 33
 
34 34
 /** A guess of the horizontal field-of-view of a typical camera, in degrees */
35 35
 const HFOV_GUESS = 60; // https://developer.apple.com/library/archive/documentation/DeviceInformation/Reference/iOSDeviceCompatibility/Cameras/Cameras.html
36 36
 
37
-/** Number of iterations used to refine the estimated pose */
38
-const POSE_ITERATIONS = 30;
37
+/** The default scale of the image plane. The scale affects the focal length */
38
+const DEFAULT_SCALE = 2; // the length of the [-1,+1] interval
39 39
 
40 40
 /** Convert degrees to radians */
41 41
 const DEG2RAD = 0.017453292519943295; // pi / 180
@@ -47,17 +47,28 @@ const RAD2DEG = 57.29577951308232; // 180 / pi
47 47
 const EPSILON = 1e-6;
48 48
 
49 49
 /** Index of the horizontal focal length in the camera intrinsics matrix (column-major format) */
50
-export const FX = 0;
50
+const FX = 0;
51 51
 
52 52
 /** Index of the vertical focal length in the camera intrinsics matrix */
53
-export const FY = 4;
53
+const FY = 4;
54 54
 
55 55
 /** Index of the horizontal position of the principal point in the camera intrinsics matrix */
56
-export const U0 = 6;
56
+const U0 = 6;
57 57
 
58 58
 /** Index of the vertical position of the principal point in the camera intrinsics matrix */
59
-export const V0 = 7;
59
+const V0 = 7;
60
+
61
+/** Number of iterations used to refine the estimated pose */
62
+const POSE_REFINEMENT_ITERATIONS = 30;
60 63
 
64
+/** Maximum number of iterations used when refining the translation vector */
65
+const TRANSLATION_REFINEMENT_ITERATIONS = 15;
66
+
67
+/** Tolerance used to exit early when refining the translation vector */
68
+const TRANSLATION_REFINEMENT_TOLERANCE = DEFAULT_SCALE * 0.01;
69
+
70
+/** Size of the grid used to refine the translation vector */
71
+const TRANSLATION_REFINEMENT_GRIDSIZE = 5; //3;
61 72
 
62 73
 
63 74
 
@@ -66,16 +77,19 @@ export const V0 = 7;
66 77
  */
67 78
 export class CameraModel
68 79
 {
69
-    /** size of the image sensor, in pixels */
70
-    private _screenSize: SpeedySize;
80
+    /** size of the image plane */
81
+    private _imageSize: SpeedySize;
71 82
 
72 83
     /** 3x4 camera matrix */
73 84
     private _matrix: SpeedyMatrix;
74 85
 
75
-    /** intrinsics matrix, in column-major format */
86
+    /** a helper to switch the handedness of a coordinate system */
87
+    private _flipZ: SpeedyMatrix;
88
+
89
+    /** entries of the intrinsics matrix in column-major format */
76 90
     private _intrinsics: number[];
77 91
 
78
-    /** extrinsics matrix, in column-major format */
92
+    /** entries of the extrinsics matrix in column-major format */
79 93
     private _extrinsics: number[];
80 94
 
81 95
     /** smoothing filter */
@@ -88,32 +102,43 @@ export class CameraModel
88 102
      */
89 103
     constructor()
90 104
     {
91
-        this._screenSize = Speedy.Size(0, 0);
105
+        this._imageSize = Speedy.Size(0, 0);
92 106
         this._matrix = Speedy.Matrix.Eye(3, 4);
93 107
         this._intrinsics = [1,0,0,0,1,0,0,0,1]; // 3x3 identity matrix
94 108
         this._extrinsics = [1,0,0,0,1,0,0,0,1,0,0,0]; // 3x4 matrix [ R | t ] = [ I | 0 ] no rotation & no translation
95 109
         this._filter = new PoseFilter();
110
+        this._flipZ = Speedy.Matrix(4, 4, [
111
+            1, 0, 0, 0,
112
+            0, 1, 0, 0,
113
+            0, 0,-1, 0,
114
+            0, 0, 0, 1
115
+        ]);
96 116
     }
97 117
 
98 118
     /**
99 119
      * Initialize the model
100
-     * @param screenSize
120
+     * @param aspectRatio aspect ratio of the image plane
121
+     * @param scale optional scale factor of the image plane
101 122
      */
102
-    init(screenSize: SpeedySize): void
123
+    init(aspectRatio: number, scale: number = DEFAULT_SCALE): void
103 124
     {
104
-        // validate
105
-        if(screenSize.area() == 0)
106
-            throw new IllegalArgumentError(`Can't initialize the camera model with screenSize = ${screenSize.toString()}`);
125
+        // log
126
+        Utils.log(`Initializing the camera model...`);
127
+        Utils.assert(aspectRatio > 0 && scale > 1e-5);
107 128
 
108
-        // set the screen size
109
-        this._screenSize.width = screenSize.width;
110
-        this._screenSize.height = screenSize.height;
129
+        // set the size of the image plane
130
+        // this rule is conceived so that min(w,h) = s and w/h = a
131
+        if(aspectRatio >= 1) {
132
+            this._imageSize.width = aspectRatio * scale;
133
+            this._imageSize.height = scale;
134
+        }
135
+        else {
136
+            this._imageSize.width = scale;
137
+            this._imageSize.height = scale / aspectRatio;
138
+        }
111 139
 
112 140
         // reset the model
113 141
         this.reset();
114
-
115
-        // log
116
-        Utils.log(`Initializing the camera model...`);
117 142
     }
118 143
 
119 144
     /**
@@ -127,31 +152,15 @@ export class CameraModel
127 152
 
128 153
     /**
129 154
      * Update the camera model
130
-     * @param homography 3x3 perspective transform
131
-     * @param screenSize may change over time (e.g., when going from portrait to landscape or vice-versa)
132
-     * @returns promise that resolves to a camera matrix
155
+     * @param homographyNDC 3x3 perspective transform
156
+     * @returns a promise that resolves to a camera matrix
133 157
      */
134
-    update(homography: SpeedyMatrix, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
158
+    update(homographyNDC: SpeedyMatrix): SpeedyPromise<SpeedyMatrix>
135 159
     {
136
-        // validate the shape of the homography
137
-        if(homography.rows != 3 || homography.columns != 3)
138
-            throw new IllegalArgumentError(`Camera model: provide a homography matrix`);
160
+        Utils.assert(homographyNDC.rows == 3 && homographyNDC.columns == 3);
139 161
 
140
-        // validate screenSize
141
-        if(screenSize.area() == 0)
142
-            throw new IllegalArgumentError(`Camera model: invalid screenSize = ${screenSize.toString()}`);
143
-
144
-        // changed screen size?
145
-        if(!this._screenSize.equals(screenSize)) {
146
-            Utils.log(`Camera model: detected a change in screen size...`);
147
-
148
-            // update the screen size
149
-            this._screenSize.width = screenSize.width;
150
-            this._screenSize.height = screenSize.height;
151
-
152
-            // reset camera
153
-            this.reset();
154
-        }
162
+        // convert to image space
163
+        const homography = this._convertToImageSpace(homographyNDC);
155 164
 
156 165
         // read the entries of the homography
157 166
         const h = homography.read();
@@ -161,10 +170,8 @@ export class CameraModel
161 170
 
162 171
         // validate the homography (homography matrices aren't singular)
163 172
         const det = h13 * (h21 * h32 - h22 * h31) - h23 * (h11 * h32 - h12 * h31) + h33 * (h11 * h22 - h12 * h21);
164
-        if(Math.abs(det) < EPSILON) {
165
-            Utils.warning(`Can't update the camera model using an invalid homography matrix`);
166
-            return Speedy.Promise.resolve(this._matrix);
167
-        }
173
+        if(Math.abs(det) < EPSILON || Number.isNaN(det))
174
+            return Speedy.Promise.reject(new NumericalError(`Can't update the camera model using an invalid homography matrix`));
168 175
 
169 176
         // estimate the pose
170 177
         const pose = this._estimatePose(homography);
@@ -172,12 +179,22 @@ export class CameraModel
172 179
             this._extrinsics = this._filter.output().read();
173 180
 
174 181
         // compute the camera matrix
175
-        const C = this.denormalizer();
182
+        const Z = this._flipZ; // switch to a right handed system
176 183
         const K = Speedy.Matrix(3, 3, this._intrinsics);
177 184
         const E = Speedy.Matrix(3, 4, this._extrinsics);
178
-        this._matrix.setToSync(K.times(E).times(C));
179
-        //console.log("intrinsics -----------", K.toString());
180
-        //console.log("matrix ----------------",this._matrix.toString());
185
+        this._matrix.setToSync(K.times(E).times(Z));
186
+
187
+        /*
188
+        // test
189
+        console.log("homography ------------", homography.toString());
190
+        console.log("intrinsics ------------", K.toString());
191
+        console.log("extrinsics ------------", E.toString());
192
+        console.log("extrinsicsINV ---------", Speedy.Matrix(this.computeViewMatrix().inverse()).toString());
193
+        console.log("matrix ----------------", this._matrix.toString());
194
+        console.log("projectionMatrix ----- ", this.computeProjectionMatrix(0.1,100).toString());
195
+        */
196
+
197
+        // done!
181 198
         return Speedy.Promise.resolve(this._matrix);
182 199
     }
183 200
 
@@ -191,9 +208,7 @@ export class CameraModel
191 208
     }
192 209
 
193 210
     /**
194
-     * The camera matrix that maps the 3D normalized space [-1,1]^3 to the
195
-     * 2D AR screen space (measured in pixels)
196
-     * @returns 3x4 camera matrix
211
+     * The 3x4 camera matrix
197 212
      */
198 213
     get matrix(): SpeedyMatrix
199 214
     {
@@ -201,106 +216,99 @@ export class CameraModel
201 216
     }
202 217
 
203 218
     /**
204
-     * Camera intrinsics matrix
205
-     * @returns 3x3 intrinsics matrix in column-major format
219
+     * The size of the image plane
206 220
      */
207
-    get intrinsics(): number[]
221
+    get imageSize(): SpeedySize
208 222
     {
209
-        return this._intrinsics;
223
+        return this._imageSize;
210 224
     }
211 225
 
212 226
     /**
213
-     * Camera extrinsics matrix
214
-     * @returns 3x4 extrinsics matrix [ R | t ] in column-major format
227
+     * The aspect ratio of the image
215 228
      */
216
-    get extrinsics(): number[]
229
+    get aspectRatio(): number
217 230
     {
218
-        return this._extrinsics;
231
+        return this._imageSize.width / this._imageSize.height;
219 232
     }
220 233
 
221 234
     /**
222
-     * Convert coordinates from normalized space [-1,1]^3 to a
223
-     * "3D pixel space" based on the dimensions of the AR screen.
224
-     *
225
-     * We perform a 180-degrees rotation around the x-axis so that
226
-     * it looks nicer (the y-axis grows downwards in image space).
227
-     *
228
-     * The final camera matrix is P = K * [ R | t ] * C, where
229
-     * C is this conversion matrix. The intent behind this is to
230
-     * make tracking independent of target and screen sizes.
231
-     *
232
-     * Reminder: we use a right-handed coordinate system in 3D!
233
-     * In 2D image space the coordinate system is left-handed.
234
-     *
235
-     * @returns 4x4 conversion matrix C
235
+     * Focal length in "pixels" (projection distance in the pinhole camera model)
236
+     * same as (focal length in mm) * (number of "pixels" per world unit in "pixels"/mm)
237
+     * "pixels" means image plane units
236 238
      */
237
-    denormalizer(): SpeedyMatrix
239
+    get focalLength(): number
238 240
     {
239
-        const w = this._screenSize.width / 2; // half width, in pixels
240
-        const h = this._screenSize.height / 2; // half height, in pixels
241
-        const d = Math.min(w, h); // virtual unit length, in pixels
242
-
243
-        /*
244
-        return Speedy.Matrix(4, 4, [
245
-            1, 0, 0, 0,
246
-            0,-1, 0, 0,
247
-            0, 0,-1, 0,
248
-            w/d, h/d, 0, 1/d
249
-        ]);
250
-        */
251
-
252
-        return Speedy.Matrix(4, 4, [
253
-            d, 0, 0, 0,
254
-            0,-d, 0, 0,
255
-            0, 0,-d, 0,
256
-            w, h, 0, 1,
257
-        ]);
241
+        return this._intrinsics[FX]; // fx == fy
258 242
     }
259 243
 
260 244
     /**
261
-     * Size of the AR screen space, in pixels
262
-     * @returns size in pixels
245
+     * Horizontal field-of-view, given in radians
263 246
      */
264
-    get screenSize(): SpeedySize
247
+    get fovx(): number
265 248
     {
266
-        return this._screenSize;
249
+        const halfWidth = this._imageSize.width / 2;
250
+        return 2 * Math.atan(halfWidth / this._intrinsics[FX]);
267 251
     }
268 252
 
269 253
     /**
270
-     * Focal length in pixel units (projection distance in the pinhole camera model)
271
-     * same as (focal length in mm) * (number of pixels per world unit in pixels/mm)
272
-     * @returns focal length
254
+     * Vertical field-of-view, given in radians
273 255
      */
274
-    get focalLength(): number
256
+    get fovy(): number
275 257
     {
276
-        return this._intrinsics[FY]; // fx == fy
258
+        const halfHeight = this._imageSize.height / 2;
259
+        return 2 * Math.atan(halfHeight / this._intrinsics[FY]);
277 260
     }
278 261
 
279 262
     /**
280
-     * Horizontal field-of-view, given in radians
281
-     * @returns vertical field-of-view
263
+     * Compute the view matrix. This 4x4 matrix moves 3D points from
264
+     * world space to view space. We want the camera looking in the
265
+     * direction of the negative z-axis (WebGL-friendly)
266
+     * @returns a view matrix
282 267
      */
283
-    get fovx(): number
268
+    computeViewMatrix(): SpeedyMatrix
284 269
     {
285
-        return 2 * Math.atan(this._intrinsics[U0] / this._intrinsics[FX]);
286
-    }
270
+        const E = this._extrinsics;
287 271
 
288
-    /**
289
-     * Vertical field-of-view, given in radians
290
-     * @returns vertical field-of-view
291
-     */
292
-    get fovy(): number
293
-    {
294
-        return 2 * Math.atan(this._intrinsics[V0] / this._intrinsics[FY]);
272
+        // We augment the 3x4 extrinsics matrix E with the [ 0  0  0  1 ] row
273
+        // and get E+. Let Z be 4x4 flipZ, the identity matrix with the third
274
+        // column negated. The following matrix is View = Z * E+ * Z. We get
275
+        // the camera looking in the direction of the negative z-axis in a
276
+        // right handed system!
277
+        return Speedy.Matrix(4, 4, [
278
+            E[0], E[1],-E[2], 0, // r1
279
+            E[3], E[4],-E[5], 0, // r2
280
+           -E[6],-E[7],+E[8], 0, // r3
281
+            E[9], E[10],-E[11], 1 // t
282
+        ]);
295 283
     }
296 284
 
297 285
     /**
298
-     * Principal point
299
-     * @returns principal point, in pixel coordinates
286
+     * Compute a perspective projection matrix for WebGL
287
+     * @param near distance of the near plane
288
+     * @param far distance of the far plane
300 289
      */
301
-    principalPoint(): SpeedyPoint2
290
+    computeProjectionMatrix(near: number, far: number): SpeedyMatrix
302 291
     {
303
-        return Speedy.Point2(this._intrinsics[U0], this._intrinsics[V0]);
292
+        const fx = this._intrinsics[FX];
293
+        const fy = this._intrinsics[FY];
294
+        const halfWidth = this._imageSize.width / 2;
295
+        const halfHeight = this._imageSize.height / 2;
296
+
297
+        // we assume that the principal point is at the center of the image plane
298
+        const right = near * (halfWidth / fx);
299
+        const top = near * (halfHeight / fy);
300
+        //const top = right * (halfHeight / halfWidth); // same thing
301
+        const bottom = -top, left = -right; // symmetric frustum
302
+
303
+        // a derivation of this projection matrix can be found at
304
+        // https://www.songho.ca/opengl/gl_projectionmatrix.html
305
+        // http://learnwebgl.brown37.net/08_projections/projections_perspective.html
306
+        return Speedy.Matrix(4, 4, [
307
+            2 * near / (right - left), 0, 0, 0,
308
+            0, 2 * near / (top - bottom), 0, 0,
309
+            (right + left) / (right - left), (top + bottom) / (top - bottom), -(far + near) / (far - near), -1,
310
+            0, 0, -2 * far * near / (far - near), 0
311
+        ]);
304 312
     }
305 313
 
306 314
     /**
@@ -321,10 +329,10 @@ export class CameraModel
321 329
      */
322 330
     private _resetIntrinsics(): void
323 331
     {
324
-        const cameraWidth = Math.max(this._screenSize.width, this._screenSize.height); // portrait or landscape?
332
+        const cameraWidth = Math.max(this._imageSize.width, this._imageSize.height); // portrait or landscape?
325 333
 
326
-        const u0 = this._screenSize.width / 2;
327
-        const v0 = this._screenSize.height / 2;
334
+        const u0 = 0; // principal point at the center of the image plane
335
+        const v0 = 0;
328 336
         const fx = (cameraWidth / 2) / Math.tan(DEG2RAD * HFOV_GUESS / 2);
329 337
         const fy = fx;
330 338
 
@@ -335,6 +343,34 @@ export class CameraModel
335 343
     }
336 344
 
337 345
     /**
346
+     * Convert a homography from NDC to image space
347
+     * @param homographyNDC
348
+     * @returns a new homography
349
+     */
350
+    private _convertToImageSpace(homographyNDC: SpeedyMatrix): SpeedyMatrix
351
+    {
352
+        const w = this._imageSize.width / 2;
353
+        const h = this._imageSize.height / 2;
354
+
355
+        // fromNDC converts points from NDC to image space
356
+        const fromNDC = Speedy.Matrix(3, 3, [
357
+            w, 0, 0,
358
+            0, h, 0,
359
+            0, 0, 1
360
+        ]);
361
+
362
+        /*
363
+        // make h33 = 1 (wanted?)
364
+        const data = homographyNDC.read();
365
+        const h33 = data[8];
366
+        const hom = homographyNDC.times(1/h33);
367
+        */
368
+
369
+        // convert homography
370
+        return Speedy.Matrix(fromNDC.times(homographyNDC));
371
+    }
372
+
373
+    /**
338 374
      * Compute a normalized homography H^ = K^(-1) * H for an
339 375
      * ideal pinhole with f = 1 and principal point = (0,0)
340 376
      * @param homography homography H to be normalized
@@ -342,7 +378,6 @@ export class CameraModel
342 378
      */
343 379
     private _normalizeHomography(homography: SpeedyMatrix): SpeedyMatrix
344 380
     {
345
-        const h = homography.read();
346 381
         const u0 = this._intrinsics[U0];
347 382
         const v0 = this._intrinsics[V0];
348 383
         const fx = this._intrinsics[FX];
@@ -350,6 +385,7 @@ export class CameraModel
350 385
         const u0fx = u0 / fx;
351 386
         const v0fy = v0 / fy;
352 387
 
388
+        const h = homography.read();
353 389
         const h11 = h[0] / fx - u0fx * h[2], h12 = h[3] / fx - u0fx * h[5], h13 = h[6] / fx - u0fx * h[8];
354 390
         const h21 = h[1] / fy - v0fy * h[2], h22 = h[4] / fy - v0fy * h[5], h23 = h[7] / fy - v0fy * h[8];
355 391
         const h31 = h[2], h32 = h[5], h33 = h[8];
@@ -401,10 +437,10 @@ export class CameraModel
401 437
 
402 438
         // sanity check
403 439
         if(Number.isNaN(scale))
404
-            return Speedy.Matrix(3, 3, (new Array(9)).fill(Number.NaN));
440
+            return Speedy.Matrix(3, 3, (new Array<number>(9)).fill(Number.NaN));
405 441
 
406 442
         // recover the rotation
407
-        let r = new Array(6) as number[];
443
+        let r = new Array<number>(6);
408 444
         r[0] = scale * h11;
409 445
         r[1] = scale * h21;
410 446
         r[2] = scale * h31;
@@ -412,8 +448,8 @@ export class CameraModel
412 448
         r[4] = scale * h22;
413 449
         r[5] = scale * h32;
414 450
 
415
-        // refine the rotation
416
-        r = this._refineRotation(r); // r is initially noisy
451
+        // refine the rotation (r is initially noisy)
452
+        r = this._refineRotation(r);
417 453
 
418 454
         /*
419 455
 
@@ -438,7 +474,7 @@ export class CameraModel
438 474
         scale /= h1norm2 + h2norm2;
439 475
 
440 476
         // recover the translation
441
-        let t = new Array(3) as number[];
477
+        let t = new Array<number>(3);
442 478
         t[0] = scale * h13;
443 479
         t[1] = scale * h23;
444 480
         t[2] = scale * h33;
@@ -539,6 +575,8 @@ export class CameraModel
539 575
         // compute the Cholesky decomposition LL' of the diagonal matrix D
540 576
         // whose entries are the two eigenvalues of R'R and then invert L
541 577
         const s1 = Math.sqrt(eigval1), s2 = Math.sqrt(eigval2); // singular values of R (pick s1 >= s2)
578
+
579
+        /*
542 580
         const Linv = Speedy.Matrix(2, 2, [1/s1, 0, 0, 1/s2]); // L inverse
543 581
 
544 582
         // compute the correction matrix C = Q * Linv * Q', where Q = [q1|q2]
@@ -550,6 +588,25 @@ export class CameraModel
550 588
         // correct the rotation vectors r1 and r2 using C
551 589
         const R = Speedy.Matrix(3, 2, [r11, r21, r31, r12, r22, r32]);
552 590
         return Speedy.Matrix(R.times(C)).read();
591
+        */
592
+
593
+        // find C = Q * Linv * Q' manually
594
+        // [ a  b ] is symmetric
595
+        // [ b  c ]
596
+        const a = x1*x1/s1 + x2*x2/s2;
597
+        const b = x1*y1/s1 + x2*y2/s2;
598
+        const c = y1*y1/s1 + y2*y2/s2;
599
+
600
+        // find RC manually
601
+        return [
602
+            a*r11 + b*r12,
603
+            a*r21 + b*r22,
604
+            a*r31 + b*r32,
605
+
606
+            b*r11 + c*r12,
607
+            b*r21 + c*r22,
608
+            b*r31 + c*r32
609
+        ];
553 610
     }
554 611
 
555 612
     /**
@@ -587,25 +644,33 @@ export class CameraModel
587 644
         const r21 = rot[1], r22 = rot[4];
588 645
         const r31 = rot[2], r32 = rot[5];
589 646
 
590
-        // sample points [ xi  yi ]' in AR screen space
591
-        //const x = [ 0.5, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5, 0.0 ];
592
-        //const y = [ 0.5, 0.0, 0.0, 1.0, 1.0, 0.0, 0.5, 1.0, 0.5 ];
593
-        const x = [ 0.5, 0.0, 1.0, 1.0, 0.0 ];
594
-        const y = [ 0.5, 0.0, 0.0, 1.0, 1.0 ];
595
-        const n = x.length;
596
-        const n3 = 3*n;
597
-
598
-        const width = this._screenSize.width;
599
-        const height = this._screenSize.height;
600
-        for(let i = 0; i < n; i++) {
601
-            x[i] *= width;
602
-            y[i] *= height;
647
+        // generate a grid of sample points [ xi  yi ]' in the image
648
+        //const x = [ 0, -1, +1, +1, -1 ];
649
+        //const y = [ 0, -1, -1, +1, +1 ];
650
+        const g = TRANSLATION_REFINEMENT_GRIDSIZE;
651
+        const x = new Array<number>(g*g);
652
+        const y = new Array<number>(g*g);
653
+        const halfWidth = this._imageSize.width / 2;
654
+        const halfHeight = this._imageSize.height / 2;
655
+
656
+        for(let k = 0, i = 0; i < g; i++) {
657
+            for(let j = 0; j < g; j++, k++) {
658
+                // in [-1,+1]
659
+                x[k] = (i/(g-1)) * 2 - 1;
660
+                y[k] = (j/(g-1)) * 2 - 1;
661
+
662
+                // in [-s/2,+s/2], where s = w,h
663
+                x[k] *= halfWidth;
664
+                y[k] *= halfHeight;
665
+            }
603 666
         }
667
+        //console.log(x.toString(), y.toString());
604 668
 
605 669
         // set auxiliary values: ai = H [ xi  yi  1 ]'
606
-        const a1 = new Array(n) as number[];
607
-        const a2 = new Array(n) as number[];
608
-        const a3 = new Array(n) as number[];
670
+        const n = x.length;
671
+        const a1 = new Array<number>(n);
672
+        const a2 = new Array<number>(n);
673
+        const a3 = new Array<number>(n);
609 674
         for(let i = 0; i < n; i++) {
610 675
             a1[i] = x[i] * h11 + y[i] * h12 + h13;
611 676
             a2[i] = x[i] * h21 + y[i] * h22 + h23;
@@ -614,8 +679,9 @@ export class CameraModel
614 679
 
615 680
         // we'll solve M t = v for t with linear least squares
616 681
         // M: 3n x 3, v: 3n x 1, t: 3 x 1
617
-        const m = new Array(3*n * 3) as number[];
618
-        const v = new Array(3*n) as number[];
682
+        const n3 = 3*n;
683
+        const m = new Array<number>(n3 * 3);
684
+        const v = new Array<number>(n3);
619 685
         for(let i = 0, k = 0; k < n; i += 3, k++) {
620 686
             m[i] = m[i+n3+1] = m[i+n3+n3+2] = 0;
621 687
             m[i+n3] = -(m[i+1] = a3[k]);
@@ -676,20 +742,18 @@ export class CameraModel
676 742
         */
677 743
 
678 744
         // gradient descent: super lightweight implementation
679
-        const r = new Array(3*n) as number[];
680
-        const c = new Array(3) as number[];
681
-        const Mc = new Array(3*n) as number[];
745
+        const r = new Array<number>(3*n);
746
+        const c = new Array<number>(3);
747
+        const Mc = new Array<number>(3*n);
682 748
 
683 749
         // initial guess
684
-        const t = new Array(3) as number[];
750
+        const t = new Array<number>(3);
685 751
         t[0] = t0[0];
686 752
         t[1] = t0[1];
687 753
         t[2] = t0[2];
688 754
 
689 755
         // iterate
690
-        const MAX_ITERATIONS = 15;
691
-        const TOLERANCE = 1;
692
-        for(let it = 0; it < MAX_ITERATIONS; it++) {
756
+        for(let it = 0; it < TRANSLATION_REFINEMENT_ITERATIONS; it++) {
693 757
             //console.log("it",it+1);
694 758
 
695 759
             // compute residual r = Mt - v
@@ -718,8 +782,8 @@ export class CameraModel
718 782
             let num = 0;
719 783
             for(let i = 0; i < 3; i++)
720 784
                 num += c[i] * c[i];
721
-            //console.log("c'c=",num);
722
-            if(num < TOLERANCE)
785
+            //console.log("c'c=",num," at #",it+1);
786
+            if(num < TRANSLATION_REFINEMENT_TOLERANCE)
723 787
                 break;
724 788
 
725 789
             // compute (Mc)'(Mc)
@@ -778,7 +842,7 @@ export class CameraModel
778 842
     }
779 843
 
780 844
     /**
781
-     * Estimate the pose [ R | t ] given a homography in AR screen space
845
+     * Estimate the pose [ R | t ] given a homography in sensor space
782 846
      * @param homography must be valid
783 847
      * @returns 3x4 matrix
784 848
      */
@@ -793,7 +857,7 @@ export class CameraModel
793 857
         // it won't be a perfect equality due to noise in the homography.
794 858
         // remark: composition of homographies
795 859
         const residual = Speedy.Matrix(normalizedHomography);
796
-        for(let k = 0; k < POSE_ITERATIONS; k++) {
860
+        for(let k = 0; k < POSE_REFINEMENT_ITERATIONS; k++) {
797 861
             // incrementally improve the partial pose
798 862
             const rt = this._estimatePartialPose(residual); // rt should converge to the identity matrix
799 863
             partialPose.setToSync(rt.times(partialPose));

+ 36
- 70
src/geometry/view.ts Vedi File

@@ -23,16 +23,15 @@
23 23
 
24 24
 import Speedy from 'speedy-vision';
25 25
 import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
26
-import { CameraModel, FX, FY, U0, V0 } from './camera-model';
26
+import { CameraModel } from './camera-model';
27 27
 import { IllegalArgumentError } from '../utils/errors';
28 28
 import { Nullable } from '../utils/utils';
29 29
 
30
-/** Default distance in pixels of the near plane to the optical center of the camera */
31
-const DEFAULT_NEAR = 1;
32
-
33
-/** Default distance in pixels of the far plane to the optical center of the camera */
34
-const DEFAULT_FAR = 20000;
30
+/** Default distance of the near plane to the optical center of the camera */
31
+const DEFAULT_NEAR = 0.1;
35 32
 
33
+/** Default distance of the far plane to the optical center of the camera */
34
+const DEFAULT_FAR = 10000 * DEFAULT_NEAR;
36 35
 
37 36
 
38 37
 /**
@@ -48,33 +47,27 @@ export interface View
48 47
     readonly _projectionMatrixInverse: SpeedyMatrix;
49 48
 }
50 49
 
50
+
51 51
 /**
52 52
  * A PerspectiveView is a View defining a symmetric frustum around the z-axis
53 53
  * (perspective projection)
54 54
  */
55 55
 export class PerspectiveView implements View
56 56
 {
57
-    /** A 4x4 matrix that projects the viewer space into the clip space, i.e., [-1,1]^3 */
58
-    private readonly _projectionMatrix: SpeedyMatrix;
59
-
60
-    /** The inverse of the projection matrix, computed lazily */
61
-    private _inverseProjection: Nullable<SpeedyMatrix>;
62
-
63
-    /** Tangent of the half of the horizontal field-of-view */
64
-    private readonly _tanOfHalfFovx: number;
65
-
66
-    /** Tangent of the half of the vertical field-of-view */
67
-    private readonly _tanOfHalfFovy: number;
57
+    /** Camera model */
58
+    private readonly _camera: CameraModel;
68 59
 
69
-    /** Aspect ratio of the frustum */
70
-    private readonly _aspect: number;
71
-
72
-    /** Distance of the near plane to the Z = 0 plane in viewer space */
60
+    /** Distance of the near plane to the optical center of the camera */
73 61
     private readonly _near: number;
74 62
 
75
-    /** Distance of the far plane to the Z = 0 plane in viewer space */
63
+    /** Distance of the far plane to the optical center of the camera*/
76 64
     private readonly _far: number;
77 65
 
66
+    /** A 4x4 matrix that projects viewer space into clip space, i.e., [-1,1]^3 */
67
+    private readonly _projectionMatrix: SpeedyMatrix;
68
+
69
+    /** The inverse of the projection matrix, computed lazily */
70
+    private _inverseProjection: Nullable<SpeedyMatrix>;
78 71
 
79 72
 
80 73
 
@@ -86,19 +79,16 @@ export class PerspectiveView implements View
86 79
      */
87 80
     constructor(camera: CameraModel, near: number = DEFAULT_NEAR, far: number = DEFAULT_FAR)
88 81
     {
89
-        const intrinsics = camera.intrinsics;
90
-        const screenSize = camera.screenSize;
91
-
92
-        this._near = Math.max(0, +near);
93
-        this._far = Math.max(0, +far);
82
+        this._near = +near;
83
+        this._far = +far;
94 84
 
95 85
         if(this._near >= this._far)
96 86
             throw new IllegalArgumentError(`View expects near < far (found near = ${this._near} and far = ${this._far})`);
87
+        else if(this._near <= 0)
88
+            throw new IllegalArgumentError(`View expects a positive near (found ${this._near})`);
97 89
 
98
-        this._aspect = screenSize.width / screenSize.height;
99
-        this._tanOfHalfFovx = intrinsics[U0] / intrinsics[FX];
100
-        this._tanOfHalfFovy = intrinsics[V0] / intrinsics[FY];
101
-        this._projectionMatrix = PerspectiveView._computeProjectionMatrix(intrinsics, this._near, this._far);
90
+        this._camera = camera;
91
+        this._projectionMatrix = camera.computeProjectionMatrix(this._near, this._far);
102 92
         this._inverseProjection = null;
103 93
     }
104 94
 
@@ -111,11 +101,23 @@ export class PerspectiveView implements View
111 101
     }
112 102
 
113 103
     /**
104
+     * The inverse of the projection matrix
105
+     * @internal
106
+     */
107
+    get _projectionMatrixInverse(): SpeedyMatrix
108
+    {
109
+        if(this._inverseProjection === null)
110
+            this._inverseProjection = Speedy.Matrix(this._projectionMatrix.inverse());
111
+
112
+        return this._inverseProjection;
113
+    }
114
+
115
+    /**
114 116
      * Aspect ratio of the frustum
115 117
      */
116 118
     get aspect(): number
117 119
     {
118
-        return this._aspect;
120
+        return this._camera.aspectRatio;
119 121
     }
120 122
 
121 123
     /**
@@ -123,7 +125,7 @@ export class PerspectiveView implements View
123 125
      */
124 126
     get fovx(): number
125 127
     {
126
-        return 2 * Math.atan(this._tanOfHalfFovx);
128
+        return this._camera.fovx;
127 129
     }
128 130
 
129 131
     /**
@@ -131,7 +133,7 @@ export class PerspectiveView implements View
131 133
      */
132 134
     get fovy(): number
133 135
     {
134
-        return 2 * Math.atan(this._tanOfHalfFovy);
136
+        return this._camera.fovy;
135 137
     }
136 138
 
137 139
     /**
@@ -149,40 +151,4 @@ export class PerspectiveView implements View
149 151
     {
150 152
         return this._far;
151 153
     }
152
-
153
-    /**
154
-     * The inverse of the projection matrix
155
-     * @internal
156
-     */
157
-    get _projectionMatrixInverse(): SpeedyMatrix
158
-    {
159
-        if(this._inverseProjection === null)
160
-            this._inverseProjection = Speedy.Matrix(this._projectionMatrix.inverse());
161
-
162
-        return this._inverseProjection;
163
-    }
164
-
165
-    /**
166
-     * Compute a perspective projection matrix for WebGL
167
-     * @param K camera intrinsics
168
-     * @param near distance of the near plane
169
-     * @param far distance of the far plane
170
-     */
171
-    private static _computeProjectionMatrix(K: number[], near: number, far: number): SpeedyMatrix
172
-    {
173
-        // we assume that the principal point is at the center of the image
174
-        const top = near * (K[V0] / K[FY]);
175
-        const right = near * (K[U0] / K[FX]);
176
-        const bottom = -top, left = -right; // symmetric frustum
177
-
178
-        // a derivation of this projection matrix can be found at
179
-        // https://www.songho.ca/opengl/gl_projectionmatrix.html
180
-        // http://learnwebgl.brown37.net/08_projections/projections_perspective.html
181
-        return Speedy.Matrix(4, 4, [
182
-            2 * near / (right - left), 0, 0, 0,
183
-            0, 2 * near / (top - bottom), 0, 0,
184
-            (right + left) / (right - left), (top + bottom) / (top - bottom), -(far + near) / (far - near), -1,
185
-            0, 0, -2 * far * near / (far - near), 0
186
-        ]);
187
-    }
188 154
 }

+ 7
- 49
src/geometry/viewer-pose.ts Vedi File

@@ -44,64 +44,22 @@ export class ViewerPose extends Pose
44 44
      */
45 45
     constructor(camera: CameraModel)
46 46
     {
47
-        // compute the view matrix and its inverse in AR screen space
48
-        const viewMatrix = ViewerPose._computeViewMatrix(camera);
49
-        const inverseTransform = new Transform(viewMatrix); // from world space to view space
50
-        const transform = inverseTransform.inverse; // from view space to world space
47
+        const viewMatrix = camera.computeViewMatrix();
48
+        const modelMatrix = Speedy.Matrix(viewMatrix.inverse());
51 49
 
50
+        const transform = new Transform(modelMatrix);
52 51
         super(transform);
52
+
53 53
         this._viewMatrix = viewMatrix;
54 54
     }
55 55
 
56 56
     /**
57
-     * This 4x4 matrix moves 3D points from world space to view space. We
58
-     * assume that the camera is looking in the direction of the negative
59
-     * z-axis (WebGL-friendly)
57
+     * This 4x4 matrix moves 3D points from world space to view space.
58
+     * We assume that the camera is looking in the direction of the
59
+     * negative z-axis (WebGL-friendly)
60 60
      */
61 61
     get viewMatrix(): SpeedyMatrix
62 62
     {
63 63
         return this._viewMatrix;
64 64
     }
65
-
66
-    /**
67
-     * Compute the view matrix in AR screen space, measured in pixels
68
-     * @param camera
69
-     * @returns a 4x4 matrix describing a rotation and a translation
70
-     */
71
-    private static _computeViewMatrix(camera: CameraModel): SpeedyMatrix
72
-    {
73
-        /*
74
-
75
-        // this is the view matrix in AR screen space, measured in pixels
76
-        // we augment the extrinsics matrix, making it 4x4 by adding a
77
-        // [ 0  0  0  1 ] row. Below, E is a 3x4 extrinsics matrix
78
-        const V = Speedy.Matrix(4, 4, [
79
-            E[0], E[1], E[2], 0,
80
-            E[3], E[4], E[5], 0,
81
-            E[6], E[7], E[8], 0,
82
-            E[9], E[10], E[11], 1
83
-        ]);
84
-
85
-        // we premultiply V by F, which performs a rotation around the
86
-        // x-axis by 180 degrees, so that we get the 3D objects in front
87
-        // of the camera pointing in the direction of the negative z-axis
88
-        const F = Speedy.Matrix(4, 4, [
89
-            1, 0, 0, 0,
90
-            0,-1, 0, 0,
91
-            0, 0,-1, 0,
92
-            0, 0, 0, 1
93
-        ]);
94
-
95
-        Matrix F * V is matrix V with the second and third rows negated
96
-
97
-        */
98
-
99
-        const E = camera.extrinsics;
100
-        return Speedy.Matrix(4, 4, [
101
-            E[0],-E[1],-E[2], 0,
102
-            E[3],-E[4],-E[5], 0,
103
-            E[6],-E[7],-E[8], 0,
104
-            E[9],-E[10],-E[11], 1
105
-        ]);
106
-    }
107 65
 }

+ 456
- 0
src/trackers/image-tracker/image-tracker-utils.ts Vedi File

@@ -0,0 +1,456 @@
1
+/*
2
+ * encantar.js
3
+ * GPU-accelerated Augmented Reality for the web
4
+ * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as published
8
+ * by the Free Software Foundation, either version 3 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ * GNU Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public License
17
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
+ *
19
+ * image-tracker-utils.ts
20
+ * Image Tracker: Utilities
21
+ */
22
+
23
+import Speedy from 'speedy-vision';
24
+import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
+import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
26
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
27
+import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
28
+import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
29
+import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
30
+import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
31
+import { ReferenceImageWithMedia } from './reference-image';
32
+import { Utils } from '../../utils/utils';
33
+import { IllegalOperationError, IllegalArgumentError, NumericalError } from '../../utils/errors';
34
+import { NIS_SIZE, TRACK_GRID_GRANULARITY } from './settings';
35
+
36
+/*
37
+
38
+Definitions:
39
+------------
40
+
41
+1. Raster space:
42
+   an image space whose top-left coordinate is (0,0) and whose bottom-right
43
+   coordinate is (w-1,h-1), where (w,h) is its size. The y-axis points down.
44
+
45
+2. AR screen size:
46
+   size in pixels used for image processing operations. It's determined by the
47
+   resolution of the tracker and by the aspect ratio of the input media.
48
+
49
+3. AR screen space (screen):
50
+   a raster space whose size is the AR screen size.
51
+
52
+4. Normalized Image Space (NIS):
53
+   a raster space whose size is N x N, where N = NIS_SIZE.
54
+
55
+5. Normalized Device Coordinates (NDC):
56
+   the normalized 2D space [-1,1]x[-1,1]. The origin is at the center and the
57
+   y-axis points up.
58
+
59
+*/
60
+
61
+/** An ordered pair [src, dest] of keypoints */
62
+export type ImageTrackerKeypointPair = [ Readonly<SpeedyKeypoint>, Readonly<SpeedyKeypoint> ];
63
+
64
+/**
65
+ * Utilities for the Image Tracker
66
+ */
67
+export class ImageTrackerUtils
68
+{
69
+    /**
70
+     * Find a transformation that converts a raster space to NIS
71
+     * @param size size of the raster space
72
+     * @returns a 3x3 matrix
73
+     */
74
+    static rasterToNIS(size: SpeedySize): SpeedyMatrix
75
+    {
76
+        const sx = NIS_SIZE / size.width;
77
+        const sy = NIS_SIZE / size.height;
78
+
79
+        return Speedy.Matrix(3, 3, [
80
+            sx, 0,  0,
81
+            0,  sy, 0,
82
+            0,  0,  1
83
+        ]);
84
+    }
85
+
86
+    /**
87
+     * Find a transformation that converts a raster space to NDC
88
+     * @param size size of the raster space
89
+     * @returns a 3x3 matrix
90
+     */
91
+    static rasterToNDC(size: SpeedySize): SpeedyMatrix
92
+    {
93
+        const w = size.width, h = size.height;
94
+
95
+        return Speedy.Matrix(3, 3, [
96
+            2/w, 0,   0,
97
+            0,  -2/h, 0,
98
+           -1,   1,   1
99
+        ]);
100
+    }
101
+
102
+    /**
103
+     * Find a transformation that converts NDC to a raster space
104
+     * @param size size of the raster space
105
+     * @returns a 3x3 matrix
106
+     */
107
+    static NDCToRaster(size: SpeedySize): SpeedyMatrix
108
+    {
109
+        const w = size.width, h = size.height;
110
+
111
+        return Speedy.Matrix(3, 3, [
112
+            w/2, 0,   0,
113
+            0,  -h/2, 0,
114
+            w/2, h/2, 1
115
+        ]);
116
+    }
117
+
118
+    /**
119
+     * Find a transformation that scales points in NDC
120
+     * @param sx horizontal scale factor
121
+     * @param sy vertical scale factor
122
+     * @returns a 3x3 matrix
123
+     */
124
+    static scaleNDC(sx: number, sy: number = sx): SpeedyMatrix
125
+    {
126
+        // In NDC, the origin is at the center of the space!
127
+        return Speedy.Matrix(3, 3, [
128
+            sx, 0,  0,
129
+            0,  sy, 0,
130
+            0,  0,  1
131
+        ]);
132
+    }
133
+
134
+    /**
135
+     * Find a scale transformation in NDC such that the output has a desired aspect ratio
136
+     * @param aspectRatio desired aspect ratio
137
+     * @param scale optional scale factor in both axes
138
+     * @returns a 3x3 matrix
139
+     */
140
+    static bestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
141
+    {
142
+        if(aspectRatio >= 1)
143
+            return this.scaleNDC(scale, scale / aspectRatio); // s/(s/a) = a, sx >= sy
144
+        else
145
+            return this.scaleNDC(scale * aspectRatio, scale); // (s*a)/s = a, sx < sy
146
+    }
147
+
148
+    /**
149
+     * Find the inverse matrix of bestFitScaleNDC()
150
+     * @param aspectRatio as given to bestFitScaleNDC()
151
+     * @param scale optional, as given to bestFitScaleNDC()
152
+     * @returns a 3x3 matrix
153
+     */
154
+    static inverseBestFitScaleNDC(aspectRatio: number, scale: number = 1): SpeedyMatrix
155
+    {
156
+        if(aspectRatio >= 1)
157
+            return this.scaleNDC(1 / scale, aspectRatio / scale);
158
+        else
159
+            return this.scaleNDC(1 / (scale * aspectRatio), 1 / scale);
160
+    }
161
+
162
+    /**
163
+     * Find the best-fit aspect ratio for the rectification of the reference image in NDC
164
+     * @param screenSize
165
+     * @param referenceImage
166
+     * @returns a best-fit aspect ratio
167
+     */
168
+    static bestFitAspectRatioNDC(screenSize: SpeedySize, referenceImage: ReferenceImageWithMedia): number
169
+    {
170
+        /*
171
+        
172
+        The best-fit aspectRatio (a) is constructed as follows:
173
+
174
+        1) a fully stretched(*) and distorted reference image in NDC:
175
+           a = 1
176
+
177
+        2) a square in NDC:
178
+           a = 1 / screenAspectRatio
179
+
180
+        3) an image with the aspect ratio of the reference image in NDC:
181
+           a = referenceImageAspectRatio * (1 / screenAspectRatio)
182
+
183
+        (*) AR screen space
184
+
185
+        By transforming the reference image twice, first by converting it to AR
186
+        screen space, and then by rectifying it, we lose a little bit of quality.
187
+        Nothing to be too concerned about, though?
188
+
189
+        */
190
+
191
+        const screenAspectRatio = screenSize.width / screenSize.height;
192
+        return referenceImage.aspectRatio / screenAspectRatio;
193
+    }
194
+
195
+    /**
196
+     * Given n > 0 pairs (src_i, dest_i) of keypoints in NIS,
197
+     * convert them to NDC and output a 2 x 2n matrix of the form:
198
+     * [ src_0.x  src_1.x  ... | dest_0.x  dest_1.x  ... ]
199
+     * [ src_0.y  src_1.y  ... | dest_0.y  dest_1.y  ... ]
200
+     * @param pairs pairs of keypoints in NIS
201
+     * @returns 2 x 2n matrix with two 2 x n blocks: [ src | dest ]
202
+     * @throws
203
+     */
204
+    static compilePairsOfKeypointsNDC(pairs: ImageTrackerKeypointPair[]): SpeedyMatrix
205
+    {
206
+        const n = pairs.length;
207
+
208
+        if(n == 0)
209
+            throw new IllegalArgumentError();
210
+
211
+        const scale = 2 / NIS_SIZE;
212
+        const data = new Array<number>(2 * 2*n);
213
+
214
+        for(let i = 0, j = 0, k = 2*n; i < n; i++, j += 2, k += 2) {
215
+            const src = pairs[i][0];
216
+            const dest = pairs[i][1];
217
+
218
+            data[j] = src.x * scale - 1; // convert from NIS to NDC
219
+            data[j+1] = 1 - src.y * scale; // flip y-axis
220
+
221
+            data[k] = dest.x * scale - 1;
222
+            data[k+1] = 1 - dest.y * scale;
223
+        }
224
+
225
+        return Speedy.Matrix(2, 2*n, data);
226
+    }
227
+
228
+    /**
229
+     * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
230
+     * find a perspective warp (homography) from src to dest in NDC
231
+     * @param points compiled pairs of keypoints in NDC
232
+     * @param options to be passed to speedy-vision
233
+     * @returns a pair [ 3x3 transformation matrix, quality score ]
234
+     */
235
+    static findPerspectiveWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
236
+    {
237
+        // too few data points?
238
+        const n = points.columns / 2;
239
+        if(n < 4) {
240
+            return Speedy.Promise.reject(
241
+                new IllegalArgumentError(`Too few data points to compute a perspective warp`)
242
+            );
243
+        }
244
+
245
+        // compute a homography
246
+        const src = points.block(0, 1, 0, n-1);
247
+        const dest = points.block(0, 1, n, 2*n-1);
248
+        const mask = Speedy.Matrix.Zeros(1, n);
249
+
250
+        return Speedy.Matrix.findHomography(
251
+            Speedy.Matrix.Zeros(3),
252
+            src,
253
+            dest,
254
+            Object.assign({ mask }, options)
255
+        ).then(homography => {
256
+
257
+            // check if this is a valid warp
258
+            const a00 = homography.at(0,0);
259
+            if(Number.isNaN(a00))
260
+                throw new NumericalError(`Can't compute a perspective warp: bad keypoints`);
261
+
262
+            // count the number of inliers
263
+            const inliers = mask.read();
264
+            let inlierCount = 0;
265
+            for(let i = inliers.length - 1; i >= 0; i--)
266
+                inlierCount += inliers[i];
267
+            const score = inlierCount / inliers.length;
268
+
269
+            // done!
270
+            return [ homography, score ];
271
+
272
+        });
273
+    }
274
+
275
+    /**
276
+     * Given n > 0 pairs of keypoints in NDC as a 2 x 2n [ src | dest ] matrix,
277
+     * find an affine warp from src to dest in NDC. The affine warp is given as
278
+     * a 3x3 matrix whose last row is [0 0 1]
279
+     * @param points compiled pairs of keypoints in NDC
280
+     * @param options to be passed to speedy-vision
281
+     * @returns a pair [ 3x3 transformation matrix, quality score ]
282
+     */
283
+    static findAffineWarpNDC(points: SpeedyMatrix, options: object): SpeedyPromise<[SpeedyMatrix,number]>
284
+    {
285
+        // too few data points?
286
+        const n = points.columns / 2;
287
+        if(n < 3) {
288
+            return Speedy.Promise.reject(
289
+                new IllegalArgumentError(`Too few data points to compute an affine warp`)
290
+            );
291
+        }
292
+
293
+        // compute an affine transformation
294
+        const model = Speedy.Matrix.Eye(3);
295
+        const src = points.block(0, 1, 0, n-1);
296
+        const dest = points.block(0, 1, n, 2*n-1);
297
+        const mask = Speedy.Matrix.Zeros(1, n);
298
+
299
+        return Speedy.Matrix.findAffineTransform(
300
+            model.block(0, 1, 0, 2), // 2x3 submatrix
301
+            src,
302
+            dest,
303
+            Object.assign({ mask }, options)
304
+        ).then(_ => {
305
+
306
+            // check if this is a valid warp
307
+            const a00 = model.at(0,0);
308
+            if(Number.isNaN(a00))
309
+                throw new NumericalError(`Can't compute an affine warp: bad keypoints`);
310
+
311
+            // count the number of inliers
312
+            const inliers = mask.read();
313
+            let inlierCount = 0;
314
+            for(let i = inliers.length - 1; i >= 0; i--)
315
+                inlierCount += inliers[i];
316
+            const score = inlierCount / inliers.length;
317
+
318
+            // done!
319
+            return [ model, score ];
320
+
321
+        });
322
+    }
323
+
324
+    /**
325
+     * Find a polyline in Normalized Device Coordinates (NDC)
326
+     * @param homography maps the corners of NDC to a quadrilateral in NDC
327
+     * @returns 4 points in NDC
328
+     */
329
+    static findPolylineNDC(homography: SpeedyMatrix): SpeedyPoint2[]
330
+    {
331
+        const h = homography.read();
332
+        const uv = [ -1, +1,    -1, -1,    +1, -1,    +1, +1 ]; // the corners of a reference image in NDC
333
+        const polyline = new Array<SpeedyPoint2>(4);
334
+
335
+        for(let i = 0, j = 0; i < 4; i++, j += 2) {
336
+            const u = uv[j], v = uv[j+1];
337
+
338
+            const x = h[0]*u + h[3]*v + h[6];
339
+            const y = h[1]*u + h[4]*v + h[7];
340
+            const w = h[2]*u + h[5]*v + h[8];
341
+
342
+            polyline[i] = Speedy.Point2(x/w, y/w);
343
+        }
344
+
345
+        return polyline;
346
+    }
347
+
348
+    /**
349
+     * Find a better spatial distribution of the input matches
350
+     * @param pairs in the [src, dest] format
351
+     * @returns refined pairs of quality matches
352
+     */
353
+    static refineMatchingPairs(pairs: ImageTrackerKeypointPair[]): ImageTrackerKeypointPair[]
354
+    {
355
+        // collect all keypoints obtained in this frame
356
+        const m = pairs.length;
357
+        const destKeypoints = new Array<SpeedyKeypoint>(m);
358
+
359
+        for(let j = 0; j < m; j++)
360
+            destKeypoints[j] = pairs[j][1];
361
+
362
+        // find a better spatial distribution of the keypoints
363
+        const indices = this._distributeKeypoints(destKeypoints);
364
+
365
+        // assemble output
366
+        const n = indices.length; // number of refined matches
367
+        const result = new Array<ImageTrackerKeypointPair>(n);
368
+
369
+        for(let i = 0; i < n; i++)
370
+            result[i] = pairs[indices[i]];
371
+
372
+        // done!
373
+        return result;
374
+    }
375
+
376
+    /**
377
+     * Spatially distribute keypoints over a grid
378
+     * @param keypoints keypoints to be distributed
379
+     * @returns a list of indices of keypoints[]
380
+     */
381
+    private static _distributeKeypoints(keypoints: SpeedyKeypoint[]): number[]
382
+    {
383
+        // create a grid
384
+        const gridCells = TRACK_GRID_GRANULARITY; // number of grid elements in each axis
385
+        const numberOfCells = gridCells * gridCells;
386
+        const n = keypoints.length;
387
+
388
+        // get the coordinates of the keypoints
389
+        const points: number[] = new Array(2 * n);
390
+        for(let i = 0, j = 0; i < n; i++, j += 2) {
391
+            points[j] = keypoints[i].x;
392
+            points[j+1] = keypoints[i].y;
393
+        }
394
+
395
+        // normalize the coordinates to [0,1) x [0,1)
396
+        this._normalizePoints(points);
397
+
398
+        // distribute the keypoints over the grid
399
+        const grid = new Array<number>(numberOfCells).fill(-1);
400
+        for(let i = 0, j = 0; i < n; i++, j += 2) {
401
+            // find the grid location of the i-th point
402
+            const xg = Math.floor(points[j] * gridCells); // 0 <= xg,yg < gridCells
403
+            const yg = Math.floor(points[j+1] * gridCells);
404
+
405
+            // store the index of the i-th point in the grid
406
+            const k = yg * gridCells + xg;
407
+            if(grid[k] < 0)
408
+                grid[k] = i;
409
+        }
410
+
411
+        // retrieve points of the grid
412
+        let m = 0;
413
+        const indices = new Array<number>(numberOfCells);
414
+        for(let g = 0; g < numberOfCells; g++) {
415
+            if(grid[g] >= 0)
416
+                indices[m++] = grid[g];
417
+        }
418
+        indices.length = m;
419
+
420
+        // done!
421
+        return indices;
422
+    }
423
+
424
+    /**
425
+     * Normalize points to [0,1)^2
426
+     * @param points 2 x n matrix of points in column-major format
427
+     * @returns points
428
+     */
429
+    private static _normalizePoints(points: number[]): number[]
430
+    {
431
+        Utils.assert(points.length % 2 == 0);
432
+
433
+        const n = points.length / 2;
434
+        if(n == 0)
435
+            return points;
436
+
437
+        let xmin = Number.POSITIVE_INFINITY, xmax = Number.NEGATIVE_INFINITY;
438
+        let ymin = Number.POSITIVE_INFINITY, ymax = Number.NEGATIVE_INFINITY;
439
+        for(let i = 0, j = 0; i < n; i++, j += 2) {
440
+            const x = points[j], y = points[j+1];
441
+            xmin = x < xmin ? x : xmin;
442
+            ymin = y < ymin ? y : ymin;
443
+            xmax = x > xmax ? x : xmax;
444
+            ymax = y > ymax ? y : ymax;
445
+        }
446
+
447
+        const xlen = xmax - xmin + 1; // +1 is a correction factor, so that 0 <= x,y < 1
448
+        const ylen = ymax - ymin + 1;
449
+        for(let i = 0, j = 0; i < n; i++, j += 2) {
450
+            points[j] = (points[j] - xmin) / xlen;
451
+            points[j+1] = (points[j+1] - ymin) / ylen;
452
+        }
453
+
454
+        return points;
455
+    }
456
+}

+ 30
- 31
src/trackers/image-tracker/image-tracker.ts Vedi File

@@ -37,13 +37,14 @@ import { Tracker, TrackerOutput, TrackerResult, Trackable } from '../tracker';
37 37
 import { Session } from '../../core/session';
38 38
 import { IllegalOperationError, IllegalArgumentError } from '../../utils/errors';
39 39
 import { Resolution } from '../../utils/resolution';
40
-import { ReferenceImage } from './reference-image';
40
+import { ReferenceImage, ReferenceImageWithMedia } from './reference-image';
41 41
 import { ReferenceImageDatabase } from './reference-image-database';
42 42
 import { ImageTrackerState } from './states/state';
43 43
 import { ImageTrackerInitialState } from './states/initial';
44 44
 import { ImageTrackerTrainingState } from './states/training';
45 45
 import { ImageTrackerScanningState } from './states/scanning';
46
-import { ImageTrackerPreTrackingState } from './states/pre-tracking';
46
+import { ImageTrackerPreTrackingAState } from './states/pre-tracking-a';
47
+import { ImageTrackerPreTrackingBState } from './states/pre-tracking-b';
47 48
 import { ImageTrackerTrackingState } from './states/tracking';
48 49
 import { Nullable, Utils } from '../../utils/utils';
49 50
 import { AREventTarget } from '../../utils/ar-events';
@@ -52,20 +53,7 @@ import { ImageTrackerEvent, ImageTrackerEventType } from './image-tracker-event'
52 53
 import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
53 54
 import { Viewer } from '../../geometry/viewer';
54 55
 import { Pose } from '../../geometry/pose';
55
-
56
-/*
57
-
58
-A few definitions:
59
-
60
-1. Viewport size:
61
-    size of the drawing buffer of the background canvas = size of the input
62
-    media, in pixels
63
-
64
-2. AR screen size:
65
-    size for image processing operations, determined by the resolution of the
66
-    tracker and by the aspect ratio of the input media
67
-
68
-*/
56
+import { CameraModel } from '../../geometry/camera-model';
69 57
 
70 58
 /** A trackable target */
71 59
 export interface TrackableImage extends Trackable
@@ -96,24 +84,21 @@ export interface ImageTrackerOutput extends TrackerOutput
96 84
     /** tracker result to be consumed by the user */
97 85
     readonly exports?: ImageTrackerResult;
98 86
 
99
-    /** size of the AR screen space, in pixels */
100
-    readonly screenSize?: SpeedySize;
101
-
102
-    /** optional keypoints */
87
+    /** keypoints found in this framestep */
103 88
     readonly keypoints?: SpeedyKeypoint[];
104 89
 
105
-    /** optional polyline for testing */
106
-    readonly polyline?: SpeedyPoint2[];
90
+    /** optional keypoints for visualizing & testing */
91
+    readonly keypointsNIS?: SpeedyKeypoint[];
107 92
 
108
-    /** optional 3x4 camera matrix in AR screen space */
109
-    readonly cameraMatrix?: SpeedyMatrix;
93
+    /** optional polyline for visualizing & testing */
94
+    readonly polylineNDC?: SpeedyPoint2[];
110 95
 
111
-    /** 3x3 homography in AR screen space */
112
-    homography?: SpeedyMatrix;
96
+    /** optional camera model for visualizing & testing */
97
+    readonly camera?: CameraModel;
113 98
 }
114 99
 
115 100
 /** All possible states of an Image Tracker */
116
-export type ImageTrackerStateName = 'initial' | 'training' | 'scanning' | 'pre-tracking' | 'tracking';
101
+export type ImageTrackerStateName = 'initial' | 'training' | 'scanning' | 'pre-tracking-a' | 'pre-tracking-b' | 'tracking';
117 102
 
118 103
 /** A helper */
119 104
 const formatSize = (size: SpeedySize) => `${size.width}x${size.height}`;
@@ -163,7 +148,8 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
163 148
             'initial': new ImageTrackerInitialState(this),
164 149
             'training': new ImageTrackerTrainingState(this),
165 150
             'scanning': new ImageTrackerScanningState(this),
166
-            'pre-tracking': new ImageTrackerPreTrackingState(this),
151
+            'pre-tracking-a': new ImageTrackerPreTrackingAState(this),
152
+            'pre-tracking-b': new ImageTrackerPreTrackingBState(this),
167 153
             'tracking': new ImageTrackerTrackingState(this),
168 154
         };
169 155
 
@@ -311,8 +297,7 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
311 297
         // compute the screen size for image processing purposes
312 298
         // note: this may change over time...!
313 299
         const media = this._source!._internalMedia;
314
-        const aspectRatio = media.width / media.height;
315
-        const screenSize = Utils.resolution(this._resolution, aspectRatio);
300
+        const screenSize = this._computeScreenSize();
316 301
 
317 302
         // run the active state
318 303
         const activeState = this._state[this._activeStateName];
@@ -335,7 +320,7 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
335 320
      * @returns reference image
336 321
      * @internal
337 322
      */
338
-    _referenceImageOfKeypoint(keypointIndex: number): Nullable<ReferenceImage>
323
+    _referenceImageOfKeypoint(keypointIndex: number): Nullable<ReferenceImageWithMedia>
339 324
     {
340 325
         const training = this._state.training as ImageTrackerTrainingState;
341 326
         return training.referenceImageOfKeypoint(keypointIndex);
@@ -364,4 +349,18 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
364 349
         const training = this._state.training as ImageTrackerTrainingState;
365 350
         return training.referenceKeypoint(keypointIndex);
366 351
     }
352
+
353
+    /**
354
+     * Compute the current size of the AR screen space
355
+     * Note that this may change over time
356
+     * @returns size
357
+     */
358
+    private _computeScreenSize(): SpeedySize
359
+    {
360
+        const media = this._source!._internalMedia;
361
+        const aspectRatio = media.width / media.height;
362
+        const screenSize = Utils.resolution(this._resolution, aspectRatio);
363
+
364
+        return screenSize;
365
+    }
367 366
 }

+ 71
- 71
src/trackers/image-tracker/reference-image-database.ts Vedi File

@@ -23,7 +23,7 @@
23 23
 import Speedy from 'speedy-vision';
24 24
 import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
25 25
 import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
26
-import { ReferenceImage } from './reference-image';
26
+import { ReferenceImage, ReferenceImageWithMedia } from './reference-image';
27 27
 import { Utils } from '../../utils/utils';
28 28
 import { IllegalArgumentError, IllegalOperationError } from '../../utils/errors';
29 29
 
@@ -33,28 +33,13 @@ const DEFAULT_CAPACITY = 100; // this number should exceed normal usage
33 33
                               // further testing is needed to verify the appropriateness of this number;
34 34
                               // it depends on the images, on the keypoint descriptors, and even on the target devices
35 35
 
36
-/** Generate a unique name for a reference image */
37
-const generateUniqueName = () => 'target-' + Math.random().toString(16).substr(2);
38
-
39
-/**
40
- * An entry of a Reference Image Database
41
- */
42
-interface ReferenceImageDatabaseEntry
43
-{
44
-    /** reference image */
45
-    readonly referenceImage: ReferenceImage;
46
-
47
-    /** previously loaded media */
48
-    readonly media: SpeedyMedia;
49
-}
50
-
51 36
 /**
52 37
  * A collection of Reference Images
53 38
  */
54 39
 export class ReferenceImageDatabase implements Iterable<ReferenceImage>
55 40
 {
56
-    /** Image database */
57
-    private _database: ReferenceImageDatabaseEntry[];
41
+    /** Entries */
42
+    private _entries: Map<string, ReferenceImageWithMedia>;
58 43
 
59 44
     /** Maximum number of entries */
60 45
     private _capacity: number;
@@ -62,8 +47,6 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
62 47
     /** Is the database locked? */
63 48
     private _locked: boolean;
64 49
 
65
-    /** Are we busy loading an image? */
66
-    private _busy: boolean;
67 50
 
68 51
 
69 52
 
@@ -73,9 +56,8 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
73 56
     constructor()
74 57
     {
75 58
         this._capacity = DEFAULT_CAPACITY;
76
-        this._database = [];
59
+        this._entries = new Map();
77 60
         this._locked = false;
78
-        this._busy = false;
79 61
     }
80 62
 
81 63
     /**
@@ -83,7 +65,7 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
83 65
      */
84 66
     get count(): number
85 67
     {
86
-        return this._database.length;
68
+        return this._entries.size;
87 69
     }
88 70
 
89 71
     /**
@@ -111,10 +93,9 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
111 93
     /**
112 94
      * Iterates over the collection
113 95
      */
114
-    *[Symbol.iterator](): Iterator<ReferenceImage>
96
+    [Symbol.iterator](): Iterator<ReferenceImageWithMedia>
115 97
     {
116
-        const ref = this._database.map(entry => entry.referenceImage);
117
-        yield* ref;
98
+        return this._entries.values();
118 99
     }
119 100
 
120 101
     /**
@@ -127,51 +108,44 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
127 108
      */
128 109
     add(referenceImages: ReferenceImage[]): SpeedyPromise<void>
129 110
     {
130
-        // handle no input
131
-        if(referenceImages.length == 0)
132
-            return Speedy.Promise.resolve();
133
-
134
-        // handle multiple images as input
135
-        if(referenceImages.length > 1) {
136
-            const promises = referenceImages.map(image => this.add([ image ]));
137
-            return Utils.runInSequence(promises);
138
-        }
111
+        return this._preloadMany(referenceImages).then(referenceImagesWithMedia => {
112
+            referenceImagesWithMedia.forEach(referenceImageWithMedia => {
113
+                this._addOne(referenceImageWithMedia);
114
+            });
115
+        });
116
+    }
139 117
 
140
-        // handle a single image as input
141
-        const referenceImage = referenceImages[0];
118
+    /**
119
+     * Add a single preloaded reference image to the database
120
+     * @param referenceImage
121
+     */
122
+    _addOne(referenceImage: ReferenceImageWithMedia): void
123
+    {
124
+        const name = referenceImage.name;
142 125
 
143 126
         // locked database?
144 127
         if(this._locked)
145
-            throw new IllegalOperationError(`Can't add reference image "${referenceImage.name}" to the database: it's locked`);
146
-
147
-        // busy loading another image?
148
-        if(this._busy)
149
-            return Utils.wait(4).then(() => this.add(referenceImages)); // try again later
128
+            throw new IllegalOperationError(`Can't add reference image "${name}" to the database: it's locked`);
150 129
 
151 130
         // reached full capacity?
152 131
         if(this.count >= this.capacity)
153
-            throw new IllegalOperationError(`Can't add reference image "${referenceImage.name}" to the database: the capacity of ${this.capacity} images has been exceeded.`);
132
+            throw new IllegalOperationError(`Can't add reference image "${name}" to the database: the capacity of ${this.capacity} images has been exceeded.`);
154 133
 
155 134
         // check if the image is valid
156
-        if(!(referenceImage.image instanceof HTMLImageElement) && !(referenceImage.image instanceof HTMLCanvasElement) && !(referenceImage.image instanceof ImageBitmap))
157
-            throw new IllegalArgumentError(`Can't add reference image "${referenceImage.name}" to the database: invalid image`);
135
+        if(
136
+            !(referenceImage.image instanceof HTMLImageElement) &&
137
+            !(referenceImage.image instanceof ImageBitmap) &&
138
+            !(referenceImage.image instanceof ImageData)
139
+        )
140
+            throw new IllegalArgumentError(`Can't add reference image "${name}" to the database: invalid image`);
158 141
 
159 142
         // check for duplicate names
160
-        if(this._database.find(entry => entry.referenceImage.name === referenceImage.name) !== undefined)
161
-            throw new IllegalArgumentError(`Can't add reference image "${referenceImage.name}" to the database: found duplicated name`);
143
+        if(this._entries.has(name))
144
+            throw new IllegalArgumentError(`Can't add reference image "${name}" to the database: found duplicated name`);
162 145
 
163
-        // load the media and add the reference image to the database
164
-        this._busy = true;
165
-        return Speedy.load(referenceImage.image).then(media => {
166
-            this._busy = false;
167
-            this._database.push({
168
-                referenceImage: Object.freeze({
169
-                    ...referenceImage,
170
-                    name: referenceImage.name || generateUniqueName()
171
-                }),
172
-                media: media
173
-            });
174
-        });
146
+        // add the reference image to the database
147
+        Utils.log(`Adding reference image "${name}" to the database...`);
148
+        this._entries.set(name, referenceImage);
175 149
     }
176 150
 
177 151
     /**
@@ -180,25 +154,51 @@ export class ReferenceImageDatabase implements Iterable<ReferenceImage>
180 154
      */
181 155
     _lock(): void
182 156
     {
183
-        if(this._busy)
184
-            throw new IllegalOperationError(`Can't lock the reference image database: we're busy loading an image`);
185
-
186 157
         this._locked = true;
187 158
     }
188 159
 
189 160
     /**
190
-     * Get the media object associated to a reference image
191
-     * @param name reference image name
192
-     * @returns media
161
+     * Get reference image by name
162
+     * @param name
163
+     * @returns the reference image with the given name, or null if there isn't any
193 164
      * @internal
194 165
      */
195
-    _findMedia(name: string): SpeedyMedia
166
+    _find(name: string): ReferenceImageWithMedia | null
167
+    {
168
+        return this._entries.get(name) || null;
169
+    }
170
+
171
+    /**
172
+     * Load a reference image
173
+     * @param referenceImage
174
+     * @returns a promise that resolves to a corresponding ReferenceImageWithMedia
175
+     */
176
+    private _preloadOne(referenceImage: ReferenceImage): SpeedyPromise<ReferenceImageWithMedia>
177
+    {
178
+        if(referenceImage.name !== undefined)
179
+            Utils.log(`Loading reference image \"${referenceImage.name}\"...`);
180
+        else
181
+            Utils.log(`Loading reference image...`);
182
+
183
+        if(!referenceImage.image)
184
+            return Speedy.Promise.reject(new IllegalArgumentError('The reference image was not provided!'));
185
+
186
+        return Speedy.load(referenceImage.image).then(media => {
187
+            return new ReferenceImageWithMedia(referenceImage, media);
188
+        });
189
+    }
190
+
191
+    /**
192
+     * Load multiple reference images
193
+     * @param referenceImages
194
+     * @returns a promise that resolves to corresponding ReferenceImageWithMedia objects
195
+     */
196
+    private _preloadMany(referenceImages: ReferenceImage[]): SpeedyPromise<ReferenceImageWithMedia[]>
196 197
     {
197
-        for(let i = 0; i < this._database.length; i++) {
198
-            if(this._database[i].referenceImage.name === name)
199
-                return this._database[i].media;
200
-        }
198
+        const n = referenceImages.length;
199
+        Utils.log(`Loading ${n} reference image${n != 1 ? 's' : ''}...`);
201 200
 
202
-        throw new IllegalArgumentError(`Can't find reference image "${name}"`);
201
+        const promises = referenceImages.map(referenceImage => this._preloadOne(referenceImage));
202
+        return Speedy.Promise.all<ReferenceImageWithMedia>(promises);
203 203
     }
204 204
 }

+ 92
- 2
src/trackers/image-tracker/reference-image.ts Vedi File

@@ -20,14 +20,104 @@
20 20
  * Reference Image for tracking
21 21
  */
22 22
 
23
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
24
+
25
+type ReferenceImageType = HTMLImageElement | ImageBitmap | ImageData;
26
+
27
+
28
+
23 29
 /**
24 30
  * Reference Image for tracking
25 31
  */
26 32
 export interface ReferenceImage
27 33
 {
28 34
     /** Reference Images should have unique names given by the user */
29
-    readonly name: string;
35
+    name?: string;
30 36
 
31 37
     /** Image data */
32
-    readonly image: HTMLImageElement | HTMLCanvasElement | ImageBitmap;
38
+    readonly image: ReferenceImageType;
39
+}
40
+
41
+/**
42
+ * A ReferenceImage decorated with a SpeedyMedia
43
+ */
44
+export class ReferenceImageWithMedia implements ReferenceImage
45
+{
46
+    /** The decorated reference image */
47
+    private readonly _referenceImage: ReferenceImage;
48
+
49
+    /** A SpeedyMedia corresponding to the reference image */
50
+    private readonly _media: SpeedyMedia;
51
+
52
+    /** The aspect ratio of the reference image */
53
+    private readonly _aspectRatio: number;
54
+
55
+
56
+
57
+    /**
58
+     * Constructor
59
+     * @param referenceImage
60
+     * @param media
61
+     */
62
+    constructor(referenceImage: ReferenceImage, media: SpeedyMedia)
63
+    {
64
+        this._referenceImage = Object.assign({}, referenceImage);
65
+        this._media = media;
66
+
67
+        // generate a unique name if none is given
68
+        if(this._referenceImage.name === undefined)
69
+            this._referenceImage.name = this._generateUniqueName();
70
+
71
+        // store the aspect ratio
72
+        this._aspectRatio = media.width / media.height;
73
+    }
74
+
75
+    /**
76
+     * Getter of the name of the reference image
77
+     */
78
+    get name(): string
79
+    {
80
+        return this._referenceImage.name!;
81
+    }
82
+
83
+    /**
84
+     * Setter of the name of the reference image
85
+     */
86
+    set name(name: string)
87
+    {
88
+        this._referenceImage.name = name;
89
+    }
90
+
91
+    /**
92
+     * Image data
93
+     */
94
+    get image(): ReferenceImageType
95
+    {
96
+        return this._referenceImage.image;
97
+    }
98
+
99
+    /**
100
+     * A SpeedyMedia corresponding to the reference media
101
+     */
102
+    get media(): SpeedyMedia
103
+    {
104
+        return this._media;
105
+    }
106
+
107
+    /**
108
+     * The aspect ratio of the reference image
109
+     */
110
+    get aspectRatio(): number
111
+    {
112
+        return this._aspectRatio;
113
+    }
114
+
115
+    /**
116
+     * Generate a unique name for a reference image
117
+     * @returns a unique name
118
+     */
119
+    private _generateUniqueName(): string
120
+    {
121
+        return 'target-' + Math.random().toString(16).substr(2);
122
+    }
33 123
 }

+ 19
- 10
src/trackers/image-tracker/settings.ts Vedi File

@@ -29,8 +29,8 @@ export const TRAIN_MAX_KEYPOINTS = 1024; //512;
29 29
 /** Percentage relative to the screen size adjusted to the aspect ratio of the reference image */
30 30
 export const TRAIN_IMAGE_SCALE = 0.8; // ORB is not scale invariant
31 31
 
32
-/** Normalized width & height of an image target, in pixels */
33
-export const TRAIN_TARGET_NORMALIZED_SIZE = 1024; // keypoint positions are stored as fixed point
32
+/** Width and height of the Normalized Image Space (NIS) */
33
+export const NIS_SIZE = 1024; // keypoint positions are stored as fixed point
34 34
 
35 35
 /** Used to identify the best maches */
36 36
 export const SCAN_MATCH_RATIO = 0.7; // usually a value in [0.6, 0.8]
@@ -47,14 +47,17 @@ export const SCAN_PYRAMID_SCALEFACTOR = 1.19; // 2 ^ 0.25
47 47
 /** Threshold of the FAST corner detector used in the scanning/training states */
48 48
 export const SCAN_FAST_THRESHOLD = 60;
49 49
 
50
-/** Minimum number of accepted matches for us to move out from the scanning state */
50
+/** Minimum number of accepted matches for us to move out of the scanning state */
51 51
 export const SCAN_MIN_MATCHES = 20; //30;
52 52
 
53 53
 /** When in the scanning state, we require the image to be matched during a few consecutive frames before accepting it */
54 54
 export const SCAN_CONSECUTIVE_FRAMES = 30;//15;//45;
55 55
 
56
-/** Reprojection error, in pixels, used when estimating a motion model (scanning state) */
57
-export const SCAN_RANSAC_REPROJECTIONERROR = 5;
56
+/** Reprojection error, in NIS pixels, used when estimating a motion model (scanning state) */
57
+export const SCAN_RANSAC_REPROJECTIONERROR_NIS = (NIS_SIZE * 0.02) | 0;
58
+
59
+/** Reprojection error, in NDC, used when estimating a motion model (scanning state) */
60
+export const SCAN_RANSAC_REPROJECTIONERROR_NDC = SCAN_RANSAC_REPROJECTIONERROR_NIS / (NIS_SIZE / 2);
58 61
 
59 62
 /** Number of tables used in the LSH-based keypoint matching */
60 63
 export const SCAN_LSH_TABLES = 8; // up to 32
@@ -92,6 +95,9 @@ export const SUBPIXEL_GAUSSIAN_SIGMA = 1.0;
92 95
 /** Subpixel refinement method */
93 96
 export const SUBPIXEL_METHOD = 'bilinear-upsample'; // 'quadratic1d';
94 97
 
98
+/** Minimum acceptable number of matched keypoints when in a pre-tracking state */
99
+export const PRE_TRACK_MIN_MATCHES = 4;
100
+
95 101
 /** Minimum acceptable number of matched keypoints when in the tracking state */
96 102
 export const TRACK_MIN_MATCHES = 4;//10; //20;
97 103
 
@@ -113,11 +119,14 @@ export const TRACK_RECTIFIED_BORDER = 0.15; //0.20;
113 119
 /** Relative size (%) used to clip keypoints from the borders of the rectified image */
114 120
 export const TRACK_CLIPPING_BORDER = TRACK_RECTIFIED_BORDER * 1.20; //1.25; //1.15;
115 121
 
116
-/** Number of iterations used to refine the target image before tracking */
117
-export const TRACK_REFINEMENT_ITERATIONS = 3;
122
+/** Scale of the rectified image in NDC, without taking the aspect ratio into consideration */
123
+export const TRACK_RECTIFIED_SCALE = 1 - 2 * TRACK_RECTIFIED_BORDER;
124
+
125
+/** Reprojection error, in NIS pixels, used when estimating a motion model (tracking state) */
126
+export const TRACK_RANSAC_REPROJECTIONERROR_NIS = (NIS_SIZE * 0.0125) | 0;
118 127
 
119
-/** Reprojection error, in pixels, used when estimating a motion model (tracking state) */
120
-export const TRACK_RANSAC_REPROJECTIONERROR = 3; //2.5;
128
+/** Reprojection error, in NDC, used when estimating a motion model (tracking state) */
129
+export const TRACK_RANSAC_REPROJECTIONERROR_NDC = TRACK_RANSAC_REPROJECTIONERROR_NIS / (NIS_SIZE / 2);
121 130
 
122 131
 /** We use a N x N grid to spatially distribute the keypoints in order to compute a better homography */
123 132
 export const TRACK_GRID_GRANULARITY = 10; //20; // the value of N
@@ -126,4 +135,4 @@ export const TRACK_GRID_GRANULARITY = 10; //20; // the value of N
126 135
 export const TRACK_MATCH_RATIO = 0.75; // usually a value in [0.6, 0.8] - low values => strict tracking
127 136
 
128 137
 /** Number of consecutive frames in which we tolerate a  "target lost" situation */
129
-export const TRACK_LOST_TOLERANCE = 10;
138
+export const TRACK_LOST_TOLERANCE = 15;

+ 10
- 0
src/trackers/image-tracker/states/initial.ts Vedi File

@@ -78,6 +78,16 @@ export class ImageTrackerInitialState extends ImageTrackerState
78 78
     }
79 79
 
80 80
     /**
81
+     * Called when leaving the state, after update()
82
+     */
83
+    onLeaveState(): void
84
+    {
85
+        // we don't return to this state, so we can release the pipeline early
86
+        this._pipeline.release();
87
+        this._pipelineReleased = true;
88
+    }
89
+
90
+    /**
81 91
      * Create & setup the pipeline
82 92
      * @returns pipeline
83 93
      */

+ 273
- 0
src/trackers/image-tracker/states/pre-tracking-a.ts Vedi File

@@ -0,0 +1,273 @@
1
+/*
2
+ * encantar.js
3
+ * GPU-accelerated Augmented Reality for the web
4
+ * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as published
8
+ * by the Free Software Foundation, either version 3 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ * GNU Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public License
17
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
+ *
19
+ * pre-tracking-a.ts
20
+ * Image tracker: Pre-Tracking A state
21
+ */
22
+
23
+import Speedy from 'speedy-vision';
24
+import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
26
+import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
27
+import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
28
+import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
29
+import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
30
+import { SpeedyPipelineNodeImageMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/images/multiplexer';
31
+import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink } from 'speedy-vision/types/core/pipeline/nodes/images/portal';
32
+import { SpeedyPipelineNodeKeypointPortalSource, SpeedyPipelineNodeKeypointPortalSink } from 'speedy-vision/types/core/pipeline/nodes/keypoints/portal';
33
+import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/nodes/transforms/resize';
34
+import { SpeedyPipelineNodePerspectiveWarp } from 'speedy-vision/types/core/pipeline/nodes/transforms/perspective-warp';
35
+import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/core/pipeline/nodes/keypoints/border-clipper';
36
+import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
37
+import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
38
+import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
39
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
40
+import { ImageTrackerState, ImageTrackerStateOutput } from './state';
41
+import { ReferenceImage, ReferenceImageWithMedia } from '../reference-image';
42
+import { Nullable, Utils } from '../../../utils/utils';
43
+import { TrackingError } from '../../../utils/errors';
44
+import {
45
+    TRACK_RECTIFIED_SCALE, TRACK_CLIPPING_BORDER,
46
+    NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
47
+    ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
48
+    TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
49
+    SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
50
+    PRE_TRACK_MIN_MATCHES,
51
+    NIGHTVISION_QUALITY,
52
+    SUBPIXEL_METHOD,
53
+} from '../settings';
54
+
55
+
56
+
57
+/**
58
+ * Pre-Tracking A is a new training phase. The reference image that was found
59
+ * in the scanning state is transported to AR screen space, and a new training
60
+ * takes place there, with new keypoints and in a suitable warp.
61
+ */
62
+export class ImageTrackerPreTrackingAState extends ImageTrackerState
63
+{
64
+    /** reference image */
65
+    private _referenceImage: Nullable<ReferenceImageWithMedia>;
66
+
67
+    /** a snapshot of the video from the scanning state and corresponding to the initial homography */
68
+    private _snapshot: Nullable<SpeedyPipelineNodeImagePortalSink>;
69
+
70
+    /** initial homography, from reference image to scanned image, NDC */
71
+    private _homography: SpeedyMatrix;
72
+
73
+
74
+
75
+    /**
76
+     * Constructor
77
+     * @param imageTracker
78
+     */
79
+    constructor(imageTracker: ImageTracker)
80
+    {
81
+        super('pre-tracking-a', imageTracker);
82
+
83
+        this._homography = Speedy.Matrix.Eye(3);
84
+        this._referenceImage = null;
85
+        this._snapshot = null;
86
+    }
87
+
88
+    /**
89
+     * Called as soon as this becomes the active state, just before update() runs for the first time
90
+     * @param settings
91
+     */
92
+    onEnterState(settings: Record<string,any>)
93
+    {
94
+        const homography = settings.homography as SpeedyMatrix;
95
+        const referenceImage = settings.referenceImage as ReferenceImageWithMedia;
96
+        const snapshot = settings.snapshot as SpeedyPipelineNodeImagePortalSink;
97
+
98
+        // set attributes
99
+        this._homography = homography;
100
+        this._referenceImage = referenceImage;
101
+        this._snapshot = snapshot;
102
+    }
103
+
104
+    /**
105
+     * Called just before the GPU processing
106
+     * @returns promise
107
+     */
108
+    protected _beforeUpdate(): SpeedyPromise<void>
109
+    {
110
+        const screenSize = this.screenSize;
111
+        const source = this._pipeline.node('source') as SpeedyPipelineNodeImageSource;
112
+        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
113
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
114
+        const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
115
+
116
+        // set the reference image as the source image
117
+        source.media = this._referenceImage!.media;
118
+
119
+        // clip keypoints from the borders of the target image
120
+        borderClipper.imageSize = screenSize;
121
+        borderClipper.borderSize = Speedy.Vector2(
122
+            screenSize.width * TRACK_CLIPPING_BORDER,
123
+            screenSize.height * TRACK_CLIPPING_BORDER
124
+        );
125
+
126
+        // convert keypoints to NIS
127
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
128
+
129
+        // rectify the image
130
+        const scale = TRACK_RECTIFIED_SCALE;
131
+        const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(screenSize, this._referenceImage!);
132
+        const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
133
+        const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
134
+        const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
135
+
136
+        return imageRectifier.transform.setTo(
137
+            toScreen.times(shrink).times(toNDC)
138
+        ).then(() => void 0);
139
+    }
140
+
141
+    /**
142
+     * Post processing that takes place just after the GPU processing
143
+     * @param result pipeline results
144
+     * @returns state output
145
+     */
146
+    protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
147
+    {
148
+        const referenceImage = this._referenceImage!;
149
+        const keypointPortalSink = this._pipeline.node('keypointPortalSink') as SpeedyPipelineNodeKeypointPortalSink;
150
+        const keypoints = result.keypoints as SpeedyKeypoint[];
151
+        const image = result.image as SpeedyMedia | undefined;
152
+
153
+        // tracker output
154
+        const trackerOutput: ImageTrackerOutput = {
155
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
156
+            image: image,
157
+        };
158
+
159
+        // not enough keypoints? something went wrong!
160
+        if(keypoints.length < PRE_TRACK_MIN_MATCHES) {
161
+            Utils.warning(`Can't pre-track "${referenceImage.name}" in ${this.name}!`);
162
+            return Speedy.Promise.resolve({
163
+                nextState: 'scanning',
164
+                trackerOutput: trackerOutput,
165
+            });
166
+        }
167
+
168
+        // done!
169
+        return Speedy.Promise.resolve({
170
+            nextState: 'pre-tracking-b',
171
+            trackerOutput: trackerOutput,
172
+            nextStateSettings: {
173
+                referenceKeypointPortalSink: keypointPortalSink,
174
+                referenceImage: this._referenceImage,
175
+                snapshot: this._snapshot,
176
+                homography: this._homography,
177
+            }
178
+        });
179
+    }
180
+
181
+    /**
182
+     * Create & setup the pipeline
183
+     * @returns pipeline
184
+     */
185
+    protected _createPipeline(): SpeedyPipeline
186
+    {
187
+        const pipeline = Speedy.Pipeline();
188
+
189
+        const source = Speedy.Image.Source('source');
190
+        const screen = Speedy.Transform.Resize('screen');
191
+        const greyscale = Speedy.Filter.Greyscale();
192
+        const imageRectifier = Speedy.Transform.PerspectiveWarp('imageRectifier');
193
+        const nightvision = Speedy.Filter.Nightvision();
194
+        const nightvisionMux = Speedy.Image.Multiplexer();
195
+        const detector = Speedy.Keypoint.Detector.Harris();
196
+        const descriptor = Speedy.Keypoint.Descriptor.ORB();
197
+        const blur = Speedy.Filter.GaussianBlur();
198
+        const clipper = Speedy.Keypoint.Clipper();
199
+        const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
200
+        const denoiser = Speedy.Filter.GaussianBlur();
201
+        const subpixel = Speedy.Keypoint.SubpixelRefiner();
202
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
203
+        const keypointPortalSink = Speedy.Keypoint.Portal.Sink('keypointPortalSink');
204
+        const keypointSink = Speedy.Keypoint.Sink('keypoints');
205
+        //const imageSink = Speedy.Image.Sink('image');
206
+
207
+        source.media = null;
208
+        imageRectifier.transform = Speedy.Matrix.Eye(3);
209
+        screen.size = Speedy.Size(0,0);
210
+        nightvision.gain = NIGHTVISION_GAIN;
211
+        nightvision.offset = NIGHTVISION_OFFSET;
212
+        nightvision.decay = NIGHTVISION_DECAY;
213
+        nightvision.quality = NIGHTVISION_QUALITY;
214
+        nightvisionMux.port = TRACK_WITH_NIGHTVISION ? 1 : 0; // 1 = enable nightvision
215
+        blur.kernelSize = Speedy.Size(ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_KSIZE);
216
+        blur.sigma = Speedy.Vector2(ORB_GAUSSIAN_SIGMA, ORB_GAUSSIAN_SIGMA);
217
+        denoiser.kernelSize = Speedy.Size(SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_KSIZE);
218
+        denoiser.sigma = Speedy.Vector2(SUBPIXEL_GAUSSIAN_SIGMA, SUBPIXEL_GAUSSIAN_SIGMA);
219
+        detector.quality = TRACK_HARRIS_QUALITY;
220
+        detector.capacity = TRACK_DETECTOR_CAPACITY;
221
+        subpixel.method = SUBPIXEL_METHOD;
222
+        clipper.size = TRACK_MAX_KEYPOINTS;
223
+        borderClipper.imageSize = screen.size;
224
+        borderClipper.borderSize = Speedy.Vector2(0,0);
225
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
226
+        keypointSink.turbo = false;
227
+
228
+        // prepare input
229
+        source.output().connectTo(screen.input());
230
+        screen.output().connectTo(greyscale.input());
231
+
232
+        // preprocess images
233
+        greyscale.output().connectTo(imageRectifier.input());
234
+        imageRectifier.output().connectTo(nightvisionMux.input('in0'));
235
+        imageRectifier.output().connectTo(nightvision.input());
236
+        nightvision.output().connectTo(nightvisionMux.input('in1'));
237
+
238
+        // keypoint detection & clipping
239
+        nightvisionMux.output().connectTo(detector.input());
240
+        detector.output().connectTo(borderClipper.input());
241
+        borderClipper.output().connectTo(clipper.input());
242
+
243
+        // keypoint refinement
244
+        imageRectifier.output().connectTo(denoiser.input());
245
+        denoiser.output().connectTo(subpixel.input('image'));
246
+        clipper.output().connectTo(subpixel.input('keypoints'));
247
+
248
+        // keypoint description
249
+        nightvisionMux.output().connectTo(blur.input());
250
+        blur.output().connectTo(descriptor.input('image'));
251
+        subpixel.output().connectTo(descriptor.input('keypoints'));
252
+
253
+        // prepare output
254
+        descriptor.output().connectTo(keypointScaler.input());
255
+        keypointScaler.output().connectTo(keypointSink.input());
256
+        keypointScaler.output().connectTo(keypointPortalSink.input());
257
+        //imageRectifier.output().connectTo(imageSink.input());
258
+
259
+        // done!
260
+        pipeline.init(
261
+            source, screen,
262
+            greyscale, imageRectifier,
263
+            nightvision, nightvisionMux,
264
+            detector, borderClipper, clipper,
265
+            denoiser, subpixel,
266
+            blur, descriptor,
267
+            keypointScaler, keypointSink, keypointPortalSink,
268
+            //imageSink
269
+        );
270
+
271
+        return pipeline;
272
+    }
273
+}

+ 391
- 0
src/trackers/image-tracker/states/pre-tracking-b.ts Vedi File

@@ -0,0 +1,391 @@
1
+/*
2
+ * encantar.js
3
+ * GPU-accelerated Augmented Reality for the web
4
+ * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as published
8
+ * by the Free Software Foundation, either version 3 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ * GNU Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public License
17
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
+ *
19
+ * pre-tracking-b.ts
20
+ * Image tracker: Pre-Tracking B state
21
+ */
22
+
23
+import Speedy from 'speedy-vision';
24
+import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
26
+import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
27
+import { SpeedyMatrixExpr } from 'speedy-vision/types/core/speedy-matrix-expr';
28
+import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
29
+import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
30
+import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
31
+import { SpeedyPipelineNodeImageMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/images/multiplexer';
32
+import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink } from 'speedy-vision/types/core/pipeline/nodes/images/portal';
33
+import { SpeedyPipelineNodeKeypointPortalSource, SpeedyPipelineNodeKeypointPortalSink } from 'speedy-vision/types/core/pipeline/nodes/keypoints/portal';
34
+import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/nodes/transforms/resize';
35
+import { SpeedyPipelineNodePerspectiveWarp } from 'speedy-vision/types/core/pipeline/nodes/transforms/perspective-warp';
36
+import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/core/pipeline/nodes/keypoints/border-clipper';
37
+import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
38
+import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
39
+import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
40
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
41
+import { ImageTrackerState, ImageTrackerStateOutput } from './state';
42
+import { ReferenceImageWithMedia } from '../reference-image';
43
+import { Nullable, Utils } from '../../../utils/utils';
44
+import { TrackingError } from '../../../utils/errors';
45
+import {
46
+    TRACK_RECTIFIED_SCALE, TRACK_CLIPPING_BORDER,
47
+    NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
48
+    ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
49
+    TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
50
+    SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
51
+    PRE_TRACK_MIN_MATCHES, TRACK_MATCH_RATIO, TRACK_RANSAC_REPROJECTIONERROR_NDC,
52
+    NIGHTVISION_QUALITY,
53
+    SUBPIXEL_METHOD,
54
+} from '../settings';
55
+
56
+
57
+
58
+
59
+/**
60
+ * In Pre-Tracking B, we refine the homography obtained at the scanning state.
61
+ * We find a transformation that warps the snapshot obtained from the scanning
62
+ * state to an image that closely resembles the output of Pre-Tracking A.
63
+ */
64
+export class ImageTrackerPreTrackingBState extends ImageTrackerState
65
+{
66
+    /** reference image */
67
+    private _referenceImage: Nullable<ReferenceImageWithMedia>;
68
+
69
+    /** a snapshot of the video from the scanning state and corresponding to the initial homography */
70
+    private _snapshot: Nullable<SpeedyPipelineNodeImagePortalSink>;
71
+
72
+    /** initial homography, from reference image to scanned image, NDC */
73
+    private _homography: SpeedyMatrix;
74
+
75
+    /** portal with keypoints from Pre-Tracking A */
76
+    private _referenceKeypointPortalSink: Nullable<SpeedyPipelineNodeKeypointPortalSink>;
77
+
78
+
79
+
80
+
81
+
82
+
83
+    /**
84
+     * Constructor
85
+     * @param imageTracker
86
+     */
87
+    constructor(imageTracker: ImageTracker)
88
+    {
89
+        super('pre-tracking-b', imageTracker);
90
+
91
+        this._homography = Speedy.Matrix.Eye(3);
92
+        this._referenceImage = null;
93
+        this._snapshot = null;
94
+        this._referenceKeypointPortalSink = null;
95
+    }
96
+
97
+    /**
98
+     * Called as soon as this becomes the active state, just before update() runs for the first time
99
+     * @param settings
100
+     */
101
+    onEnterState(settings: Record<string,any>)
102
+    {
103
+        const homography = settings.homography as SpeedyMatrix;
104
+        const referenceImage = settings.referenceImage as ReferenceImageWithMedia;
105
+        const snapshot = settings.snapshot as SpeedyPipelineNodeImagePortalSink;
106
+        const referenceKeypointPortalSink = settings.referenceKeypointPortalSink as SpeedyPipelineNodeKeypointPortalSink;
107
+
108
+        // set attributes
109
+        this._homography = homography;
110
+        this._referenceImage = referenceImage;
111
+        this._snapshot = snapshot;
112
+        this._referenceKeypointPortalSink = referenceKeypointPortalSink;
113
+    }
114
+
115
+    /**
116
+     * Called just before the GPU processing
117
+     * @returns promise
118
+     */
119
+    protected _beforeUpdate(): SpeedyPromise<void>
120
+    {
121
+        const screenSize = this.screenSize;
122
+        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
123
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
124
+        const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
125
+        const imagePortalSource = this._pipeline.node('imagePortalSource') as SpeedyPipelineNodeImagePortalSource;
126
+        const referenceKeypointPortalSource = this._pipeline.node('referenceKeypointPortalSource') as SpeedyPipelineNodeKeypointPortalSource;
127
+
128
+        // get the snapshot from the scanning state
129
+        imagePortalSource.source = this._snapshot!;
130
+
131
+        // get the reference keypoints from Pre-Tracking A
132
+        referenceKeypointPortalSource.source = this._referenceKeypointPortalSink!;
133
+
134
+        // clip keypoints from the borders of the target image
135
+        borderClipper.imageSize = screenSize;
136
+        borderClipper.borderSize = Speedy.Vector2(
137
+            screenSize.width * TRACK_CLIPPING_BORDER,
138
+            screenSize.height * TRACK_CLIPPING_BORDER
139
+        );
140
+
141
+        // convert keypoints to NIS
142
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
143
+
144
+        // rectify the image
145
+        const scale = TRACK_RECTIFIED_SCALE;
146
+        const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(screenSize, this._referenceImage!);
147
+        const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
148
+        const undistort = this._homography.inverse();
149
+        const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
150
+        const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
151
+
152
+        return imageRectifier.transform.setTo(
153
+            toScreen.times(shrink.times(undistort)).times(toNDC)
154
+        ).then(() => void 0);
155
+    }
156
+
157
+    /**
158
+     * Post processing that takes place just after the GPU processing
159
+     * @param result pipeline results
160
+     * @returns state output
161
+     */
162
+    protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
163
+    {
164
+        const referenceImage = this._referenceImage!;
165
+        const referenceKeypoints = result.referenceKeypoints as SpeedyKeypoint[]; // from Pre-Tracking A
166
+        const keypoints = result.keypoints as SpeedyMatchedKeypoint[]; // from Pre-Tracking B
167
+        const image = result.image as SpeedyMedia | undefined;
168
+        const keypointPortalSink = this._pipeline.node('keypointPortalSink') as SpeedyPipelineNodeKeypointPortalSink;
169
+
170
+        // tracker output
171
+        const trackerOutput: ImageTrackerOutput = {
172
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
173
+            image: image,
174
+        };
175
+
176
+        return Speedy.Promise.resolve()
177
+        .then(() => {
178
+
179
+            // find matching pairs of keypoints
180
+            const pairs = this._findMatchingPairs(referenceKeypoints, keypoints);
181
+            //const pairs = ImageTrackerUtils.refineMatchingPairs(allPairs);
182
+            if(pairs.length < PRE_TRACK_MIN_MATCHES)
183
+                throw new TrackingError('Not enough data points');
184
+
185
+            // find a warp
186
+            const points = ImageTrackerUtils.compilePairsOfKeypointsNDC(pairs);
187
+            return this._findAffineMotionNDC(points);
188
+
189
+        })
190
+        .then(warp => {
191
+
192
+            // refine the homography
193
+            return this._homography.setTo(warp.times(this._homography));
194
+
195
+        })
196
+        .then(_ => ({
197
+            nextState: 'tracking',
198
+            //nextState: 'pre-tracking-b',
199
+            trackerOutput: trackerOutput,
200
+            nextStateSettings: {
201
+                // we export keypoints obtained in Pre-Tracking B, not in A.
202
+                // lighting conditions match, but what if the snapshot is too blurry?
203
+                templateKeypoints: keypoints,
204
+                templateKeypointPortalSink: keypointPortalSink,
205
+                referenceImage: this._referenceImage,
206
+                homography: this._homography,
207
+                initialScreenSize: this.screenSize,
208
+            }
209
+        }))
210
+        .catch(err => {
211
+            Utils.warning(`Can't pre-track "${referenceImage.name}" in ${this.name}! ${err.toString()}`);
212
+            return {
213
+                nextState: 'scanning',
214
+                trackerOutput: trackerOutput,
215
+            };
216
+        });
217
+    }
218
+
219
+    /**
220
+     * Find an affine motion model in NDC between pairs of keypoints in NDC
221
+     * given as a 2 x 2n [ src | dest ] matrix
222
+     * @param points compiled pairs of keypoints in NDC
223
+     * @returns a promise that resolves to a 3x3 warp in NDC that maps source to destination
224
+     */
225
+    private _findAffineMotionNDC(points: SpeedyMatrix): SpeedyPromise<SpeedyMatrixExpr>
226
+    {
227
+        return ImageTrackerUtils.findAffineWarpNDC(points, {
228
+            method: 'pransac',
229
+            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR_NDC,
230
+            numberOfHypotheses: 512*4,
231
+            bundleSize: 128,
232
+            mask: undefined // score is not needed
233
+        }).then(([ warp, score ]) => {
234
+
235
+            const scale = TRACK_RECTIFIED_SCALE;
236
+            const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this.screenSize, this._referenceImage!);
237
+            const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
238
+            const grow = ImageTrackerUtils.inverseBestFitScaleNDC(aspectRatio, scale);
239
+            const scaledWarp = grow.times(warp).times(shrink);
240
+
241
+            const distort = this._homography;
242
+            const undistort = distort.inverse();
243
+            const correctedWarp = distort.times(scaledWarp).times(undistort);
244
+
245
+            //console.log(Speedy.Matrix(warp).toString());
246
+            //console.log(Speedy.Matrix(scaledWarp).toString());
247
+            //console.log(Speedy.Matrix(correctedWarp).toString());
248
+
249
+            return correctedWarp;
250
+
251
+        });
252
+    }
253
+
254
+    /**
255
+     * Find matching pairs of two sets of keypoints matched via brute force
256
+     * @param srcKeypoints source (database)
257
+     * @param destKeypoints destination
258
+     * @returns an array of matching pairs [src, dest]
259
+     */
260
+    private _findMatchingPairs(srcKeypoints: SpeedyKeypoint[], destKeypoints: SpeedyMatchedKeypoint[]): ImageTrackerKeypointPair[]
261
+    {
262
+        const pairs: ImageTrackerKeypointPair[] = [];
263
+
264
+        for(let i = 0; i < destKeypoints.length; i++) {
265
+            const destKeypoint = destKeypoints[i];
266
+
267
+            if(destKeypoint.matches[0].index >= 0 && destKeypoint.matches[1].index >= 0) {
268
+                const d1 = destKeypoint.matches[0].distance;
269
+                const d2 = destKeypoint.matches[1].distance;
270
+
271
+                // the best match should be "much better" than the second best match,
272
+                // which means that they are "distinct enough"
273
+                if(d1 <= TRACK_MATCH_RATIO * d2) {
274
+                    const srcKeypoint = srcKeypoints[destKeypoint.matches[0].index];
275
+                    pairs.push([srcKeypoint, destKeypoint]);
276
+                }
277
+            }
278
+        }
279
+
280
+        return pairs;
281
+    }
282
+
283
+    /**
284
+     * Create & setup the pipeline
285
+     * @returns pipeline
286
+     */
287
+    protected _createPipeline(): SpeedyPipeline
288
+    {
289
+        const pipeline = Speedy.Pipeline();
290
+
291
+        const source = Speedy.Image.Source('source');
292
+        const imagePortalSource = Speedy.Image.Portal.Source('imagePortalSource');
293
+        const referenceKeypointPortalSource = Speedy.Keypoint.Portal.Source('referenceKeypointPortalSource');
294
+        const screen = Speedy.Transform.Resize('screen');
295
+        const greyscale = Speedy.Filter.Greyscale();
296
+        const imageRectifier = Speedy.Transform.PerspectiveWarp('imageRectifier');
297
+        const nightvision = Speedy.Filter.Nightvision();
298
+        const nightvisionMux = Speedy.Image.Multiplexer();
299
+        const detector = Speedy.Keypoint.Detector.Harris();
300
+        const descriptor = Speedy.Keypoint.Descriptor.ORB();
301
+        const blur = Speedy.Filter.GaussianBlur();
302
+        const clipper = Speedy.Keypoint.Clipper();
303
+        const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
304
+        const denoiser = Speedy.Filter.GaussianBlur();
305
+        const subpixel = Speedy.Keypoint.SubpixelRefiner();
306
+        const matcher = Speedy.Keypoint.Matcher.BFKNN();
307
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
308
+        const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
309
+        const keypointPortalSink = Speedy.Keypoint.Portal.Sink('keypointPortalSink');
310
+        const referenceKeypointSink = Speedy.Keypoint.Sink('referenceKeypoints');
311
+        //const imageSink = Speedy.Image.Sink('image');
312
+
313
+        source.media = null;
314
+        imagePortalSource.source = null;
315
+        referenceKeypointPortalSource.source = null;
316
+        imageRectifier.transform = Speedy.Matrix.Eye(3);
317
+        screen.size = Speedy.Size(0,0);
318
+        nightvision.gain = NIGHTVISION_GAIN;
319
+        nightvision.offset = NIGHTVISION_OFFSET;
320
+        nightvision.decay = NIGHTVISION_DECAY;
321
+        nightvision.quality = NIGHTVISION_QUALITY;
322
+        nightvisionMux.port = TRACK_WITH_NIGHTVISION ? 1 : 0; // 1 = enable nightvision
323
+        blur.kernelSize = Speedy.Size(ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_KSIZE);
324
+        blur.sigma = Speedy.Vector2(ORB_GAUSSIAN_SIGMA, ORB_GAUSSIAN_SIGMA);
325
+        denoiser.kernelSize = Speedy.Size(SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_KSIZE);
326
+        denoiser.sigma = Speedy.Vector2(SUBPIXEL_GAUSSIAN_SIGMA, SUBPIXEL_GAUSSIAN_SIGMA);
327
+        detector.quality = TRACK_HARRIS_QUALITY;
328
+        detector.capacity = TRACK_DETECTOR_CAPACITY;
329
+        subpixel.method = SUBPIXEL_METHOD;
330
+        clipper.size = TRACK_MAX_KEYPOINTS;
331
+        borderClipper.imageSize = screen.size;
332
+        borderClipper.borderSize = Speedy.Vector2(0,0);
333
+        matcher.k = 2;
334
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
335
+        keypointSink.turbo = false;
336
+
337
+        // prepare input
338
+        //source.output(); // ignore, but keep it in the pipeline
339
+        imagePortalSource.output().connectTo(screen.input());
340
+        screen.output().connectTo(greyscale.input());
341
+
342
+        // preprocess images
343
+        greyscale.output().connectTo(imageRectifier.input());
344
+        imageRectifier.output().connectTo(nightvisionMux.input('in0'));
345
+        imageRectifier.output().connectTo(nightvision.input());
346
+        nightvision.output().connectTo(nightvisionMux.input('in1'));
347
+
348
+        // keypoint detection & clipping
349
+        nightvisionMux.output().connectTo(detector.input());
350
+        detector.output().connectTo(borderClipper.input());
351
+        borderClipper.output().connectTo(clipper.input());
352
+
353
+        // keypoint refinement
354
+        imageRectifier.output().connectTo(denoiser.input());
355
+        denoiser.output().connectTo(subpixel.input('image'));
356
+        clipper.output().connectTo(subpixel.input('keypoints'));
357
+
358
+        // keypoint description
359
+        nightvisionMux.output().connectTo(blur.input());
360
+        blur.output().connectTo(descriptor.input('image'));
361
+        subpixel.output().connectTo(descriptor.input('keypoints'));
362
+
363
+        // keypoint matching
364
+        descriptor.output().connectTo(matcher.input('keypoints'));
365
+        referenceKeypointPortalSource.output().connectTo(matcher.input('database'));
366
+
367
+        // prepare output
368
+        descriptor.output().connectTo(keypointScaler.input());
369
+        keypointScaler.output().connectTo(keypointPortalSink.input());
370
+        keypointScaler.output().connectTo(keypointSink.input());
371
+        matcher.output().connectTo(keypointSink.input('matches'));
372
+        referenceKeypointPortalSource.output().connectTo(referenceKeypointSink.input());
373
+        //imageRectifier.output().connectTo(imageSink.input());
374
+
375
+        // done!
376
+        pipeline.init(
377
+            source, screen, imagePortalSource,
378
+            referenceKeypointPortalSource,
379
+            greyscale, imageRectifier,
380
+            nightvision, nightvisionMux,
381
+            detector, borderClipper, clipper,
382
+            denoiser, subpixel,
383
+            blur, descriptor,
384
+            matcher,
385
+            keypointScaler, keypointSink, keypointPortalSink, referenceKeypointSink,
386
+            //imageSink
387
+        );
388
+
389
+        return pipeline;
390
+    }
391
+}

+ 0
- 517
src/trackers/image-tracker/states/pre-tracking.ts Vedi File

@@ -1,517 +0,0 @@
1
-/*
2
- * encantar.js
3
- * GPU-accelerated Augmented Reality for the web
4
- * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU Lesser General Public License as published
8
- * by the Free Software Foundation, either version 3 of the License, or
9
- * (at your option) any later version.
10
- *
11
- * This program is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
- * GNU Lesser General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU Lesser General Public License
17
- * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
- *
19
- * pre-tracking.ts
20
- * Pre-tracking state of the Image Tracker
21
- */
22
-
23
-import Speedy from 'speedy-vision';
24
-import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
-import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
26
-import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
27
-import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
28
-import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
29
-import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
30
-import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
31
-import { SpeedyPipelineNodeImageMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/images/multiplexer';
32
-import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink } from 'speedy-vision/types/core/pipeline/nodes/images/portal';
33
-import { SpeedyPipelineNodeKeypointPortalSource, SpeedyPipelineNodeKeypointPortalSink } from 'speedy-vision/types/core/pipeline/nodes/keypoints/portal';
34
-import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/nodes/transforms/resize';
35
-import { SpeedyPipelineNodePerspectiveWarp } from 'speedy-vision/types/core/pipeline/nodes/transforms/perspective-warp';
36
-import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/core/pipeline/nodes/keypoints/border-clipper';
37
-import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
38
-import { SpeedyPipelineNodeKeypointMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/multiplexer';
39
-import { SpeedyPipelineNodeKeypointBuffer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/buffer';
40
-import { SpeedyPipelineNodeStaticLSHTables } from 'speedy-vision/types/core/pipeline/nodes/keypoints/matchers/lsh-static-tables';
41
-import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
42
-import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
43
-import { ImageTrackerTrackingState } from './tracking';
44
-import { ImageTrackerState, ImageTrackerStateOutput } from './state';
45
-import { Nullable, Utils } from '../../../utils/utils';
46
-import { IllegalOperationError, TrackingError } from '../../../utils/errors';
47
-import { ReferenceImage } from '../reference-image';
48
-import {
49
-    TRACK_RECTIFIED_BORDER, TRACK_CLIPPING_BORDER, TRACK_REFINEMENT_ITERATIONS,
50
-    NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
51
-    ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
52
-    TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
53
-    SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
54
-    TRACK_RANSAC_REPROJECTIONERROR,
55
-    TRAIN_TARGET_NORMALIZED_SIZE,
56
-    TRACK_MATCH_RATIO,
57
-    NIGHTVISION_QUALITY,
58
-    SUBPIXEL_METHOD,
59
-} from '../settings';
60
-
61
-
62
-/** The pre-tracking follows a fixed sequence of steps */
63
-type PreTrackingStep = 'read-reference-image' | 'warp-camera-image' | 'train-camera-image';
64
-
65
-/** Default target space size (used when training) */
66
-const DEFAULT_TARGET_SPACE_SIZE = Speedy.Size(TRAIN_TARGET_NORMALIZED_SIZE, TRAIN_TARGET_NORMALIZED_SIZE);
67
-
68
-/** Use the camera stream as the input of the pipeline */
69
-const PORT_CAMERA_IMAGE = 1;
70
-
71
-/** Use the reference image as the input of the pipeline */
72
-const PORT_REFERENCE_IMAGE = 0;
73
-
74
-
75
-
76
-/**
77
- * The pre-tracking state of the Image Tracker is a new training
78
- * phase for the particular, actual target we'll be tracking
79
- */
80
-export class ImageTrackerPreTrackingState extends ImageTrackerState
81
-{
82
-    /** reference image */
83
-    private _referenceImage: Nullable<ReferenceImage>;
84
-
85
-    /** initial homography mapping the target image space to the AR screen space */
86
-    private _homography: SpeedyMatrix;
87
-
88
-    /** current step */
89
-    private _step: PreTrackingStep;
90
-
91
-    /** stored keypoints of the reference image */
92
-    private _referenceKeypoints: SpeedyKeypoint[];
93
-
94
-    /** current number of iterations for warp refinement */
95
-    private _iterations: number;
96
-
97
-
98
-
99
-    /**
100
-     * Constructor
101
-     * @param imageTracker
102
-     */
103
-    constructor(imageTracker: ImageTracker)
104
-    {
105
-        super('pre-tracking', imageTracker);
106
-
107
-        this._homography = Speedy.Matrix.Eye(3);
108
-        this._referenceImage = null;
109
-        this._step = 'read-reference-image';
110
-        this._referenceKeypoints = [];
111
-        this._iterations = 0;
112
-    }
113
-
114
-    /**
115
-     * Called as soon as this becomes the active state, just before update() runs for the first time
116
-     * @param settings
117
-     */
118
-    onEnterState(settings: Record<string,any>)
119
-    {
120
-        const imagePortalSource = this._pipeline.node('imagePortalSource') as SpeedyPipelineNodeImagePortalSource;
121
-        const muxOfReferenceKeypoints = this._pipeline.node('muxOfReferenceKeypoints') as SpeedyPipelineNodeKeypointMultiplexer;
122
-        const muxOfBufferOfReferenceKeypoints = this._pipeline.node('muxOfBufferOfReferenceKeypoints') as SpeedyPipelineNodeKeypointMultiplexer;
123
-        const bufferOfReferenceKeypoints = this._pipeline.node('bufferOfReferenceKeypoints') as SpeedyPipelineNodeKeypointBuffer;
124
-        const homography = settings.homography as SpeedyMatrix;
125
-        const referenceImage = settings.referenceImage as Nullable<ReferenceImage>;
126
-        const snapshot = settings.snapshot as SpeedyPipelineNodeImagePortalSink;
127
-
128
-        // this shouldn't happen
129
-        if(!referenceImage)
130
-            throw new TrackingError(`Can't track a null reference image`);
131
-
132
-        // set attributes
133
-        this._homography = homography;
134
-        this._referenceImage = referenceImage;
135
-        this._step = 'read-reference-image';
136
-        this._referenceKeypoints = [];
137
-        this._iterations = 0;
138
-
139
-        // setup the pipeline
140
-        imagePortalSource.source = snapshot;
141
-        muxOfReferenceKeypoints.port = 0;
142
-        muxOfBufferOfReferenceKeypoints.port = 0;
143
-        bufferOfReferenceKeypoints.frozen = false;
144
-    }
145
-
146
-    /**
147
-     * Called just before the GPU processing
148
-     * @returns promise
149
-     */
150
-    protected _beforeUpdate(): SpeedyPromise<void>
151
-    {
152
-        const referenceImage = this._referenceImage as ReferenceImage;
153
-        const source = this._pipeline.node('source') as SpeedyPipelineNodeImageSource;
154
-        const sourceMux = this._pipeline.node('sourceMux') as SpeedyPipelineNodeImageMultiplexer;
155
-        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
156
-        const keypointRectifier = this._pipeline.node('keypointRectifier') as SpeedyPipelineNodeKeypointTransformer;
157
-        const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
158
-        const screenSize = this.screenSize;
159
-
160
-        // set the source media to the reference image we're going to track
161
-        const targetMedia = this._imageTracker.database._findMedia(referenceImage.name);
162
-        source.media = targetMedia;
163
-
164
-        // setup the source multiplexer
165
-        if(this._step == 'read-reference-image')
166
-            sourceMux.port = PORT_REFERENCE_IMAGE;
167
-        else
168
-            sourceMux.port = PORT_CAMERA_IMAGE;
169
-
170
-        // clip keypoints from the borders of the target image
171
-        borderClipper.imageSize = screenSize;
172
-        borderClipper.borderSize = Speedy.Vector2(
173
-            screenSize.width * TRACK_CLIPPING_BORDER,
174
-            screenSize.height * TRACK_CLIPPING_BORDER
175
-        );
176
-
177
-        // rectify the image
178
-        const rectify = (this._step == 'read-reference-image') ?
179
-            this._findRectificationMatrixOfFullscreenImage(targetMedia, screenSize) :
180
-            this._findRectificationMatrixOfCameraImage(this._homography, DEFAULT_TARGET_SPACE_SIZE, targetMedia, screenSize);
181
-
182
-        return rectify.then(rectificationMatrix => {
183
-            imageRectifier.transform = rectificationMatrix;
184
-        });
185
-    }
186
-
187
-    /**
188
-     * Post processing that takes place just after the GPU processing
189
-     * @param result pipeline results
190
-     * @returns state output
191
-     */
192
-    protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
193
-    {
194
-        const referenceImage = this._referenceImage as ReferenceImage;
195
-        const imagePortalSink = this._pipeline.node('imagePortal') as SpeedyPipelineNodeImagePortalSink;
196
-        const keypointPortalSink = this._pipeline.node('keypointPortalSink') as SpeedyPipelineNodeKeypointPortalSink;
197
-        const muxOfReferenceKeypoints = this._pipeline.node('muxOfReferenceKeypoints') as SpeedyPipelineNodeKeypointMultiplexer;
198
-        const muxOfBufferOfReferenceKeypoints = this._pipeline.node('muxOfBufferOfReferenceKeypoints') as SpeedyPipelineNodeKeypointMultiplexer;
199
-        const bufferOfReferenceKeypoints = this._pipeline.node('bufferOfReferenceKeypoints') as SpeedyPipelineNodeKeypointBuffer;
200
-        const keypoints = result.keypoints as SpeedyMatchedKeypoint[];
201
-        const image = result.image as SpeedyMedia | undefined;
202
-
203
-        // tracker output
204
-        const trackerOutput: ImageTrackerOutput = {
205
-            keypoints: image !== undefined ? keypoints : undefined, // debug only
206
-            image: image,
207
-            screenSize: this.screenSize,
208
-        };
209
-
210
-        // decide what to do next
211
-        switch(this._step) {
212
-            case 'read-reference-image': {
213
-                // enable matching
214
-                muxOfReferenceKeypoints.port = 1;
215
-
216
-                // store reference keypoints
217
-                this._referenceKeypoints = keypoints;
218
-
219
-                // next step
220
-                this._step = 'warp-camera-image';
221
-                return Speedy.Promise.resolve({
222
-                    nextState: 'pre-tracking',
223
-                    trackerOutput: trackerOutput,
224
-                });
225
-            }
226
-
227
-            case 'warp-camera-image': {
228
-                // freeze reference keypoints
229
-                bufferOfReferenceKeypoints.frozen = true;
230
-                muxOfBufferOfReferenceKeypoints.port = 1;
231
-
232
-                // refine warp?
233
-                if(++this._iterations < TRACK_REFINEMENT_ITERATIONS)
234
-                    this._step = 'warp-camera-image';
235
-                else
236
-                    this._step = 'train-camera-image';
237
-
238
-                // warp image & go to next step
239
-                return this._findWarp(keypoints, this._referenceKeypoints).then(warp =>
240
-                    this._homography.setTo(this._homography.times(warp))
241
-                ).then(_ => ({
242
-                    nextState: 'pre-tracking',
243
-                    trackerOutput: trackerOutput,
244
-                })).catch(err => {
245
-                    Utils.warning(`Can't pre-track target image "${referenceImage.name}". ${err.toString()}`);
246
-                    return {
247
-                        nextState: 'scanning',
248
-                        trackerOutput: trackerOutput,
249
-                    };
250
-                });
251
-            }
252
-
253
-            case 'train-camera-image': {
254
-                // log
255
-                Utils.log(`Took a snapshot of target image "${referenceImage.name}". Found ${keypoints.length} keypoints.`);
256
-
257
-                // change the coordinates
258
-                return this._changeSpace(this._homography, this.screenSize).then(homography => {
259
-
260
-                    // we're ready to track the target!
261
-                    return Speedy.Promise.resolve({
262
-                        //nextState: 'pre-tracking',
263
-                        nextState: 'tracking',
264
-                        trackerOutput: trackerOutput,
265
-                        nextStateSettings: {
266
-                            homography: homography,
267
-                            referenceImage: referenceImage,
268
-                            templateKeypoints: keypoints,
269
-                            keypointPortalSink: keypointPortalSink,
270
-                            imagePortalSink: imagePortalSink,
271
-                            screenSize: this.screenSize,
272
-                        },
273
-                    });
274
-
275
-                });
276
-            }
277
-        }
278
-    }
279
-
280
-    /**
281
-     * Find an adjustment warp between the camera image and the reference image
282
-     * @param dstKeypoints destination
283
-     * @param srcKeypoints source
284
-     * @returns a promise that resolves to a 3x3 homography
285
-     */
286
-    private _findWarp(dstKeypoints: SpeedyMatchedKeypoint[], srcKeypoints: SpeedyKeypoint[]): SpeedyPromise<SpeedyMatrix>
287
-    {
288
-        //return Speedy.Promise.resolve(Speedy.Matrix.Eye(3));
289
-        const srcCoords: number[] = [];
290
-        const dstCoords: number[] = [];
291
-
292
-        // find matching coordinates of the keypoints
293
-        for(let i = 0; i < dstKeypoints.length; i++) {
294
-            const dstKeypoint = dstKeypoints[i];
295
-            if(dstKeypoint.matches[0].index >= 0 && dstKeypoint.matches[1].index >= 0) {
296
-                const d1 = dstKeypoint.matches[0].distance, d2 = dstKeypoint.matches[1].distance;
297
-
298
-                // the best match should be "much better" than the second best match,
299
-                // which means that they are "distinct enough"
300
-                if(d1 <= TRACK_MATCH_RATIO * d2) {
301
-                    const srcKeypoint = srcKeypoints[dstKeypoint.matches[0].index];
302
-                    srcCoords.push(srcKeypoint.x);
303
-                    srcCoords.push(srcKeypoint.y);
304
-                    dstCoords.push(dstKeypoint.x);
305
-                    dstCoords.push(dstKeypoint.y);
306
-                }
307
-            }
308
-        }
309
-
310
-        // too few points?
311
-        const n = srcCoords.length / 2;
312
-        if(n < 4) {
313
-            return Speedy.Promise.reject(
314
-                new TrackingError('Too few points to compute a warp')
315
-            );
316
-        }
317
-
318
-        // compute warp
319
-        const model = Speedy.Matrix.Eye(3);
320
-        return this._findKeypointWarp().then(transform =>
321
-
322
-            // rectify keypoints
323
-            Speedy.Matrix.applyAffineTransform(
324
-                Speedy.Matrix.Zeros(2, 2*n),
325
-                Speedy.Matrix(2, 2*n, srcCoords.concat(dstCoords)),
326
-                transform.block(0,1,0,2)
327
-            )
328
-
329
-        ).then(points =>
330
-
331
-            // find warp
332
-            Speedy.Matrix.findAffineTransform(
333
-                model.block(0,1,0,2),
334
-                points.block(0,1,0,n-1),
335
-                points.block(0,1,n,2*n-1), {
336
-                method: 'pransac',
337
-                reprojectionError: TRACK_RANSAC_REPROJECTIONERROR,
338
-                numberOfHypotheses: 512*4,
339
-                bundleSize: 128,
340
-            })
341
-
342
-        ).then(_ => {
343
-
344
-            // validate the model
345
-            const a00 = model.at(0,0);
346
-            if(Number.isNaN(a00))
347
-                throw new TrackingError(`Can't compute warp: bad keypoints`);
348
-
349
-            // done!
350
-            return model;
351
-
352
-        });
353
-    }
354
-
355
-    /**
356
-     * Find a warp to be applied to the keypoints
357
-     * @returns affine transform
358
-     */
359
-    private _findKeypointWarp(): SpeedyPromise<SpeedyMatrix>
360
-    {
361
-        const referenceImage = this._referenceImage as ReferenceImage;
362
-        const media = this._imageTracker.database._findMedia(referenceImage.name);
363
-        const screenSize = this.screenSize;
364
-
365
-        // no rotation is needed
366
-        if(!this._mustRotateWarpedImage(media, screenSize))
367
-            return Speedy.Promise.resolve(Speedy.Matrix.Eye(3));
368
-
369
-        // rotate by 90 degrees clockwise around the pivot
370
-        const px = screenSize.width / 2, py = screenSize.height / 2; // pivot
371
-        return Speedy.Promise.resolve(Speedy.Matrix(3, 3, [
372
-            0, 1, 0,
373
-            -1, 0, 0,
374
-            py+px, py-px, 1,
375
-        ]));
376
-    }
377
-
378
-    /**
379
-     * Change the space of the homography in order to improve tracking quality
380
-     * @param homography mapping coordinates from normalized target space to AR screen space
381
-     * @param screenSize AR screen size
382
-     * @returns homography mapping coordinates from AR screen space to AR screen space
383
-     */
384
-    private _changeSpace(homography: SpeedyMatrix, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
385
-    {
386
-        const sw = screenSize.width, sh = screenSize.height;
387
-        const screen = Speedy.Matrix(2, 4, [ 0, 0, sw, 0, sw, sh, 0, sh ]);
388
-
389
-        const mat = Speedy.Matrix.Zeros(3);
390
-        return this._findPolylineCoordinates(homography, DEFAULT_TARGET_SPACE_SIZE).then(polyline =>
391
-            Speedy.Matrix.perspective(mat, screen, polyline)
392
-        );
393
-    }
394
-
395
-    /**
396
-     * Create & setup the pipeline
397
-     * @returns pipeline
398
-     */
399
-    protected _createPipeline(): SpeedyPipeline
400
-    {
401
-        const pipeline = Speedy.Pipeline();
402
-
403
-        const source = Speedy.Image.Source('source');
404
-        const imagePortalSource = Speedy.Image.Portal.Source('imagePortalSource');
405
-        const sourceMux = Speedy.Image.Multiplexer('sourceMux');
406
-        const screen = Speedy.Transform.Resize('screen');
407
-        const greyscale = Speedy.Filter.Greyscale();
408
-        const imageRectifier = Speedy.Transform.PerspectiveWarp('imageRectifier');
409
-        const nightvision = Speedy.Filter.Nightvision();
410
-        const nightvisionMux = Speedy.Image.Multiplexer();
411
-        const detector = Speedy.Keypoint.Detector.Harris();
412
-        const descriptor = Speedy.Keypoint.Descriptor.ORB();
413
-        const blur = Speedy.Filter.GaussianBlur();
414
-        const clipper = Speedy.Keypoint.Clipper();
415
-        const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
416
-        const denoiser = Speedy.Filter.GaussianBlur();
417
-        const subpixel = Speedy.Keypoint.SubpixelRefiner();
418
-        const matcher = Speedy.Keypoint.Matcher.BFKNN();
419
-        const keypointRectifier = Speedy.Keypoint.Transformer('keypointRectifier');
420
-        const keypointPortalSink = Speedy.Keypoint.Portal.Sink('keypointPortalSink');
421
-        const keypointPortalSource = Speedy.Keypoint.Portal.Source('keypointPortalSource');
422
-        const muxOfReferenceKeypoints = Speedy.Keypoint.Multiplexer('muxOfReferenceKeypoints');
423
-        const bufferOfReferenceKeypoints = Speedy.Keypoint.Buffer('bufferOfReferenceKeypoints');
424
-        const muxOfBufferOfReferenceKeypoints = Speedy.Keypoint.Multiplexer('muxOfBufferOfReferenceKeypoints');
425
-        const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
426
-        const imageSink = Speedy.Image.Sink('image');
427
-
428
-        source.media = null;
429
-        screen.size = Speedy.Size(0,0);
430
-        imagePortalSource.source = null;
431
-        imageRectifier.transform = Speedy.Matrix.Eye(3);
432
-        sourceMux.port = PORT_REFERENCE_IMAGE;
433
-        nightvision.gain = NIGHTVISION_GAIN;
434
-        nightvision.offset = NIGHTVISION_OFFSET;
435
-        nightvision.decay = NIGHTVISION_DECAY;
436
-        nightvision.quality = NIGHTVISION_QUALITY;
437
-        nightvisionMux.port = TRACK_WITH_NIGHTVISION ? 1 : 0; // 1 = enable nightvision
438
-        blur.kernelSize = Speedy.Size(ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_KSIZE);
439
-        blur.sigma = Speedy.Vector2(ORB_GAUSSIAN_SIGMA, ORB_GAUSSIAN_SIGMA);
440
-        denoiser.kernelSize = Speedy.Size(SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_KSIZE);
441
-        denoiser.sigma = Speedy.Vector2(SUBPIXEL_GAUSSIAN_SIGMA, SUBPIXEL_GAUSSIAN_SIGMA);
442
-        detector.quality = TRACK_HARRIS_QUALITY;
443
-        detector.capacity = TRACK_DETECTOR_CAPACITY;
444
-        subpixel.method = SUBPIXEL_METHOD;
445
-        clipper.size = TRACK_MAX_KEYPOINTS;
446
-        borderClipper.imageSize = screen.size;
447
-        borderClipper.borderSize = Speedy.Vector2(0,0);
448
-        matcher.k = 2;
449
-        keypointRectifier.transform = Speedy.Matrix.Eye(3);
450
-        keypointPortalSource.source = keypointPortalSink;
451
-        muxOfReferenceKeypoints.port = 0;
452
-        muxOfBufferOfReferenceKeypoints.port = 0;
453
-        bufferOfReferenceKeypoints.frozen = false;
454
-        keypointSink.turbo = false;
455
-
456
-        // prepare input
457
-        source.output().connectTo(sourceMux.input('in0')); // port 0: reference image
458
-        imagePortalSource.output().connectTo(sourceMux.input('in1')); // port 1: camera image (via portal)
459
-        sourceMux.output().connectTo(screen.input());
460
-        screen.output().connectTo(greyscale.input());
461
-
462
-        // preprocess images
463
-        greyscale.output().connectTo(imageRectifier.input());
464
-        imageRectifier.output().connectTo(nightvisionMux.input('in0'));
465
-        imageRectifier.output().connectTo(nightvision.input());
466
-        nightvision.output().connectTo(nightvisionMux.input('in1'));
467
-        nightvisionMux.output().connectTo(blur.input());
468
-
469
-        // keypoint detection & clipping
470
-        nightvisionMux.output().connectTo(detector.input());
471
-        detector.output().connectTo(borderClipper.input());
472
-        borderClipper.output().connectTo(clipper.input());
473
-
474
-        // keypoint refinement
475
-        imageRectifier.output().connectTo(denoiser.input());
476
-        denoiser.output().connectTo(subpixel.input('image'));
477
-        clipper.output().connectTo(subpixel.input('keypoints'));
478
-
479
-        // keypoint description
480
-        blur.output().connectTo(descriptor.input('image'));
481
-        subpixel.output().connectTo(descriptor.input('keypoints'));
482
-
483
-        // keypoint matching
484
-        descriptor.output().connectTo(muxOfReferenceKeypoints.input('in0'));
485
-        muxOfBufferOfReferenceKeypoints.output().connectTo(muxOfReferenceKeypoints.input('in1'));
486
-        muxOfReferenceKeypoints.output().connectTo(matcher.input('database'));
487
-        descriptor.output().connectTo(matcher.input('keypoints'));
488
-
489
-        // store reference keypoints
490
-        keypointPortalSource.output().connectTo(muxOfBufferOfReferenceKeypoints.input('in0'));
491
-        bufferOfReferenceKeypoints.output().connectTo(muxOfBufferOfReferenceKeypoints.input('in1'));
492
-        keypointPortalSource.output().connectTo(bufferOfReferenceKeypoints.input());
493
-
494
-        // portals
495
-        descriptor.output().connectTo(keypointPortalSink.input());
496
-
497
-        // prepare output
498
-        descriptor.output().connectTo(keypointRectifier.input());
499
-        keypointRectifier.output().connectTo(keypointSink.input());
500
-        matcher.output().connectTo(keypointSink.input('matches'));
501
-        //imageRectifier.output().connectTo(imageSink.input());
502
-
503
-        // done!
504
-        pipeline.init(
505
-            source, imagePortalSource, sourceMux, screen,
506
-            greyscale, imageRectifier, nightvision, nightvisionMux, blur,
507
-            detector, subpixel, clipper, borderClipper, denoiser, descriptor,
508
-            keypointPortalSource, muxOfReferenceKeypoints, matcher,
509
-            bufferOfReferenceKeypoints, muxOfBufferOfReferenceKeypoints,
510
-            keypointRectifier, keypointSink,
511
-            keypointPortalSink,
512
-            //imageSink
513
-        );
514
-
515
-        return pipeline;
516
-    }
517
-}

+ 137
- 145
src/trackers/image-tracker/states/scanning.ts Vedi File

@@ -37,25 +37,21 @@ import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink
37 37
 import { SpeedyPipelineNodeStaticLSHTables } from 'speedy-vision/types/core/pipeline/nodes/keypoints/matchers/lsh-static-tables';
38 38
 import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
39 39
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
40
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
40 41
 import { ImageTrackerState, ImageTrackerStateOutput } from './state';
41
-import { ImageTrackerPreTrackingState } from './pre-tracking';
42 42
 import { Nullable, Utils } from '../../../utils/utils';
43
-import { IllegalOperationError, IllegalArgumentError, DetectionError } from '../../../utils/errors';
43
+import { DetectionError } from '../../../utils/errors';
44 44
 import { 
45 45
     SCAN_MATCH_RATIO, SCAN_MIN_MATCHES, SCAN_CONSECUTIVE_FRAMES,
46 46
     ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
47 47
     NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY,
48 48
     SCAN_WITH_NIGHTVISION, SCAN_PYRAMID_LEVELS, SCAN_PYRAMID_SCALEFACTOR,
49 49
     SCAN_FAST_THRESHOLD, SCAN_MAX_KEYPOINTS, SCAN_LSH_TABLES, SCAN_LSH_HASHSIZE,
50
-    SCAN_RANSAC_REPROJECTIONERROR,
51
-    TRAIN_TARGET_NORMALIZED_SIZE,
50
+    SCAN_RANSAC_REPROJECTIONERROR_NDC,
52 51
     NIGHTVISION_QUALITY,
53 52
 } from '../settings';
54 53
 
55 54
 
56
-/** Default target space size (used when training) */
57
-const DEFAULT_TARGET_SPACE_SIZE = Speedy.Size(TRAIN_TARGET_NORMALIZED_SIZE, TRAIN_TARGET_NORMALIZED_SIZE);
58
-
59 55
 /** Port of the portal multiplexer: get new data from the camera */
60 56
 const PORT_CAMERA = 0;
61 57
 
@@ -65,7 +61,7 @@ const PORT_MEMORY = 1;
65 61
 
66 62
 
67 63
 /**
68
- * Scanning state of the Image Tracker
64
+ * In the scanning state we look for a reference image in the video
69 65
  */
70 66
 export class ImageTrackerScanningState extends ImageTrackerState
71 67
 {
@@ -101,7 +97,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
101 97
     {
102 98
         const imagePortalMux = this._pipeline.node('imagePortalMux') as SpeedyPipelineNodeImageMultiplexer;
103 99
         const lshTables = this._pipeline.node('lshTables') as SpeedyPipelineNodeStaticLSHTables;
104
-        const keypoints = settings.keypoints as SpeedyKeypoint[] | undefined;
100
+        const database = settings.database as SpeedyKeypoint[] | undefined;
105 101
 
106 102
         // set attributes
107 103
         this._counter = 0;
@@ -111,8 +107,24 @@ export class ImageTrackerScanningState extends ImageTrackerState
111 107
         imagePortalMux.port = PORT_CAMERA;
112 108
 
113 109
         // prepare the keypoint matcher
114
-        if(keypoints !== undefined)
115
-            lshTables.keypoints = keypoints;
110
+        if(database !== undefined)
111
+            lshTables.keypoints = database;
112
+    }
113
+
114
+    /**
115
+     * Called just before the GPU processing
116
+     * @returns promise
117
+     */
118
+    protected _beforeUpdate(): SpeedyPromise<void>
119
+    {
120
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
121
+        const screenSize = this.screenSize;
122
+
123
+        // convert keypoints to NIS
124
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
125
+
126
+        // done!
127
+        return Speedy.Promise.resolve();
116 128
     }
117 129
 
118 130
     /**
@@ -124,116 +136,123 @@ export class ImageTrackerScanningState extends ImageTrackerState
124 136
     {
125 137
         const imagePortalMux = this._pipeline.node('imagePortalMux') as SpeedyPipelineNodeImageMultiplexer;
126 138
         const keypoints = result.keypoints as SpeedyMatchedKeypoint[];
127
-        const matchedKeypoints = this._goodMatches(keypoints);
139
+        const image = result.image as SpeedyMedia | undefined;
128 140
 
129 141
         // tracker output
130 142
         const trackerOutput: ImageTrackerOutput = {
131
-            keypoints: keypoints,
132
-            screenSize: this.screenSize
143
+            keypointsNIS: keypoints,
144
+            polylineNDC: [],
145
+            image: image,
133 146
         };
134 147
 
135 148
         // keep the last memorized image
136 149
         imagePortalMux.port = PORT_MEMORY;
137 150
 
138
-        // have we found enough matches...?
139
-        if(matchedKeypoints.length >= SCAN_MIN_MATCHES) {
140
-            return this._findHomography(matchedKeypoints).then(([homography, score]) => {
151
+        // find high quality matches
152
+        const matchedKeypoints = this._selectGoodMatches(keypoints);
153
+        if(matchedKeypoints.length < SCAN_MIN_MATCHES) {
154
+
155
+            // not enough high quality matches?
156
+            // we'll continue to scan the scene
157
+            this._counter = 0;
158
+            this._bestScore = 0;
141 159
 
142
-                // have we found the best homography so far?
143
-                if(score >= this._bestScore) {
144
-                    // store it only if we'll be running the pipeline again
145
-                    if(this._counter < SCAN_CONSECUTIVE_FRAMES - 1) {
146
-                        this._bestScore = score;
147
-                        this._bestHomography = homography;
160
+            return Speedy.Promise.resolve({
161
+                nextState: 'scanning',
162
+                trackerOutput: trackerOutput,
163
+            });
148 164
 
149
-                        // memorize the last image, corresponding to the best homography(*)
150
-                        imagePortalMux.port = PORT_CAMERA;
165
+        }
151 166
 
152
-                        /*
167
+        // we have enough high quality matches!
168
+        const pairs = this._findMatchingPairs(matchedKeypoints);
169
+        const points = ImageTrackerUtils.compilePairsOfKeypointsNDC(pairs);
153 170
 
154
-                        (*) technically speaking, this is not exactly the case. Since we're
155
-                            using turbo to download the keypoints, there's a slight difference
156
-                            between the data used to compute the homography and the last image.
157
-                            Still, assuming continuity of the video stream, this logic is
158
-                            good enough.
171
+        // find a homography
172
+        return this._findHomographyNDC(points).then(([homography, score]) => {
159 173
 
160
-                        */
161
-                    }
162
-                }
174
+            // have we found the best homography so far?
175
+            if(score >= this._bestScore) {
163 176
 
164
-                // find a polyline surrounding the target
165
-                return this._findPolyline(homography, DEFAULT_TARGET_SPACE_SIZE);
177
+                // store it only if we'll be running the pipeline again
178
+                if(this._counter < SCAN_CONSECUTIVE_FRAMES - 1) {
179
+                    this._bestScore = score;
180
+                    this._bestHomography = homography;
166 181
 
167
-            }).then(polyline => {
182
+                    // memorize the last image, corresponding to the best homography(*)
183
+                    imagePortalMux.port = PORT_CAMERA;
168 184
 
169
-                // continue a little longer in the scanning state
170
-                if(++this._counter < SCAN_CONSECUTIVE_FRAMES) {
171
-                    return {
172
-                        nextState: this.name,
173
-                        trackerOutput: {
174
-                            polyline: polyline,
175
-                            ...trackerOutput,
176
-                        },
177
-                    };
178
-                }
185
+                    /*
179 186
 
180
-                // this image should correspond to the best homography
181
-                const snapshot = this._pipeline.node('imagePortalSink') as SpeedyPipelineNodeImagePortalSink;
187
+                    (*) technically speaking, this is not exactly the case. Since we're
188
+                        using turbo to download the keypoints, there's a slight difference
189
+                        between the data used to compute the homography and the last image.
190
+                        Still, assuming continuity of the video stream, this logic is
191
+                        good enough.
182 192
 
183
-                // the reference image that we'll track
184
-                const referenceImage = this._imageTracker._referenceImageOfKeypoint(
185
-                    matchedKeypoints[0].matches[0].index
186
-                );
193
+                    */
194
+                }
187 195
 
188
-                // let's track the target!
189
-                return {
190
-                    nextState: 'pre-tracking',
191
-                    nextStateSettings: {
192
-                        homography: this._bestHomography,
193
-                        snapshot: snapshot,
194
-                        referenceImage: referenceImage,
195
-                    },
196
-                    trackerOutput: {
197
-                        polyline: polyline,
198
-                        ...trackerOutput,
199
-                    },
200
-                };
196
+            }
201 197
 
202
-            }).catch(() => {
198
+            // find a polyline surrounding the target
199
+            const polylineNDC = ImageTrackerUtils.findPolylineNDC(homography);
200
+            trackerOutput.polylineNDC!.push(...polylineNDC);
203 201
 
204
-                // continue in the scanning state
202
+            // continue a little longer in the scanning state
203
+            if(++this._counter < SCAN_CONSECUTIVE_FRAMES) {
205 204
                 return {
206
-                    nextState: this.name,
207
-                    trackerOutput: trackerOutput,
205
+                    nextState: 'scanning',
206
+                    trackerOutput: trackerOutput
208 207
                 };
208
+            }
209 209
 
210
-            });
211
-        }
212
-        else {
210
+            // this image should correspond to the best homography
211
+            const snapshot = this._pipeline.node('imagePortalSink') as SpeedyPipelineNodeImagePortalSink;
213 212
 
214
-            // not enough matches...!
215
-            this._counter = 0;
216
-            this._bestScore = 0;
213
+            // the reference image that we'll track
214
+            const referenceImage = this._imageTracker._referenceImageOfKeypoint(
215
+                matchedKeypoints[0].matches[0].index
216
+            );
217 217
 
218
-        }
218
+            // this shouldn't happen
219
+            if(!referenceImage)
220
+                throw new DetectionError(`Can't track an unknown reference image`);
221
+
222
+            // let's track the target!
223
+            return {
224
+                nextState: 'pre-tracking-a',
225
+                nextStateSettings: {
226
+                    homography: this._bestHomography,
227
+                    snapshot: snapshot,
228
+                    referenceImage: referenceImage,
229
+                },
230
+                trackerOutput: trackerOutput
231
+            };
232
+
233
+        })
234
+        .catch(err => {
235
+
236
+            // continue in the scanning state
237
+            Utils.warning(`Error when scanning: ${err.toString()}`)
238
+            return {
239
+                nextState: 'scanning',
240
+                trackerOutput: trackerOutput,
241
+            };
219 242
 
220
-        // we'll continue to scan the scene
221
-        return Speedy.Promise.resolve({
222
-            nextState: this.name,
223
-            trackerOutput: trackerOutput,
224 243
         });
225 244
     }
226 245
 
227 246
     /**
228
-     * Find "high quality" matches of a single reference image
229
-     * @param keypoints
230
-     * @returns high quality matches
247
+     * Select high quality matches of a single reference image
248
+     * @param keypoints matched keypoints of any quality, to any reference image
249
+     * @returns high quality matches of a single reference image
231 250
      */
232
-    private _goodMatches(keypoints: SpeedyMatchedKeypoint[]): SpeedyMatchedKeypoint[]
251
+    private _selectGoodMatches(keypoints: SpeedyMatchedKeypoint[]): SpeedyMatchedKeypoint[]
233 252
     {
234 253
         const matchedKeypointsPerImageIndex: Record<number,SpeedyMatchedKeypoint[]> = Object.create(null);
235 254
 
236
-        // filter "good matches"
255
+        // find high quality matches, regardless of reference image
237 256
         for(let j = keypoints.length - 1; j >= 0; j--) {
238 257
             const keypoint = keypoints[j];
239 258
             if(keypoint.matches[0].index >= 0 && keypoint.matches[1].index >= 0) {
@@ -255,7 +274,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
255 274
             }
256 275
         }
257 276
 
258
-        // find the image with the most matches
277
+        // find the reference image with the most high quality matches
259 278
         let matchedKeypoints: SpeedyMatchedKeypoint[] = [];
260 279
         for(const imageIndex in matchedKeypointsPerImageIndex) {
261 280
             if(matchedKeypointsPerImageIndex[imageIndex].length > matchedKeypoints.length)
@@ -267,71 +286,41 @@ export class ImageTrackerScanningState extends ImageTrackerState
267 286
     }
268 287
 
269 288
     /**
270
-     * Find a homography matrix using matched keypoints
271
-     * @param matchedKeypoints "good" matches only
272
-     * @returns homography from reference image space to AR screen space & homography "quality" score
289
+     * Find a homography matrix using matched keypoints in NDC
290
+     * @param points compiled pairs of keypoints in NDC
291
+     * @returns homography (from reference to matched, NDC) & "quality" score
273 292
      */
274
-    private _findHomography(matchedKeypoints: SpeedyMatchedKeypoint[]): SpeedyPromise<[SpeedyMatrix,number]>
293
+    private _findHomographyNDC(points: SpeedyMatrix): SpeedyPromise<[SpeedyMatrix,number]>
275 294
     {
276
-        const srcCoords: number[] = [];
277
-        const dstCoords: number[] = [];
278
-
279
-        // find matching coordinates of the keypoints
280
-        for(let i = matchedKeypoints.length - 1; i >= 0; i--) {
281
-            const matchedKeypoint = matchedKeypoints[i];
282
-            const referenceKeypoint = this._imageTracker._referenceKeypoint(matchedKeypoint.matches[0].index);
283
-            if(referenceKeypoint != null) {
284
-                srcCoords.push(referenceKeypoint.x);
285
-                srcCoords.push(referenceKeypoint.y);
286
-                dstCoords.push(matchedKeypoint.x);
287
-                dstCoords.push(matchedKeypoint.y);
288
-            }
289
-            else {
290
-                // this shouldn't happen
291
-                return Speedy.Promise.reject(
292
-                    new DetectionError(`Invalid keypoint match index: ${matchedKeypoint.matches[0].index} from ${matchedKeypoint.toString()}`)
293
-                );
294
-            }
295
-        }
296
-
297
-        // too few points?
298
-        const n = srcCoords.length / 2;
299
-        if(n < 4) {
300
-            return Speedy.Promise.reject(
301
-                new DetectionError(`Too few points to compute a homography`)
302
-            );
303
-        }
304
-
305
-        // compute a homography
306
-        const src = Speedy.Matrix(2, n, srcCoords);
307
-        const dst = Speedy.Matrix(2, n, dstCoords);
308
-        const mask = Speedy.Matrix.Zeros(1, n);
309
-
310
-        const homography = Speedy.Matrix.Zeros(3);
311
-        return Speedy.Matrix.findHomography(homography, src, dst, {
295
+        return ImageTrackerUtils.findPerspectiveWarpNDC(points, {
312 296
             method: 'pransac',
313
-            reprojectionError: SCAN_RANSAC_REPROJECTIONERROR,
297
+            reprojectionError: SCAN_RANSAC_REPROJECTIONERROR_NDC,
314 298
             numberOfHypotheses: 512,
315 299
             bundleSize: 128,
316
-            mask: mask,
317
-        }).then(homography => {
300
+        });
301
+    }
318 302
 
319
-            // check if this is a valid homography
320
-            const a00 = homography.at(0,0);
321
-            if(Number.isNaN(a00))
322
-                throw new DetectionError(`Can't compute homography`);
303
+    /**
304
+     * Find matching pairs of keypoints from reference image (src) to matched image (dest)
305
+     * @param matchedKeypoints
306
+     * @returns an array of matching pairs [src, dest]
307
+     */
308
+    private _findMatchingPairs(matchedKeypoints: SpeedyMatchedKeypoint[]): ImageTrackerKeypointPair[]
309
+    {
310
+        const pairs = new Array<ImageTrackerKeypointPair>(matchedKeypoints.length);
323 311
 
324
-            // count the number of inliers
325
-            const inliers = mask.read();
326
-            let inlierCount = 0;
327
-            for(let i = inliers.length - 1; i >= 0; i--)
328
-                inlierCount += inliers[i];
329
-            const score = inlierCount / inliers.length;
312
+        for(let i = matchedKeypoints.length - 1; i >= 0; i--) {
313
+            const matchedKeypoint = matchedKeypoints[i];
314
+            const referenceKeypoint = this._imageTracker._referenceKeypoint(matchedKeypoint.matches[0].index);
330 315
 
331
-            // done!
332
-            return [ homography, score ];
316
+            // this shouldn't happen
317
+            if(referenceKeypoint == null)
318
+                throw new DetectionError(`Invalid keypoint match index: ${matchedKeypoint.matches[0].index} from ${matchedKeypoint.toString()}`);
333 319
 
334
-        });
320
+            pairs[i] = [ referenceKeypoint, matchedKeypoint ];
321
+        }
322
+
323
+        return pairs;
335 324
     }
336 325
 
337 326
     /**
@@ -354,6 +343,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
354 343
         const clipper = Speedy.Keypoint.Clipper();
355 344
         const lshTables = Speedy.Keypoint.Matcher.StaticLSHTables('lshTables');
356 345
         const knn = Speedy.Keypoint.Matcher.LSHKNN();
346
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
357 347
         const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
358 348
         const imagePortalSink = Speedy.Image.Portal.Sink('imagePortalSink');
359 349
         const imagePortalSource = Speedy.Image.Portal.Source('imagePortalSource');
@@ -386,6 +376,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
386 376
         imagePortalMux.port = PORT_CAMERA; // 0 = camera stream; 1 = lock image
387 377
         imagePortalCopy.size = Speedy.Size(0,0);
388 378
         imagePortalCopy.scale = Speedy.Vector2(1,1);
379
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
389 380
         keypointSink.turbo = true;
390 381
 
391 382
         // prepare input
@@ -412,7 +403,8 @@ export class ImageTrackerScanningState extends ImageTrackerState
412 403
         lshTables.output().connectTo(knn.input('lsh'));
413 404
 
414 405
         // prepare output
415
-        clipper.output().connectTo(keypointSink.input());
406
+        clipper.output().connectTo(keypointScaler.input());
407
+        keypointScaler.output().connectTo(keypointSink.input());
416 408
         knn.output().connectTo(keypointSink.input('matches'));
417 409
         //pyramid.output().connectTo(imageSink.input());
418 410
 
@@ -429,7 +421,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
429 421
             greyscale, blur, nightvision, nightvisionMux, pyramid,
430 422
             detector, descriptor, clipper,
431 423
             lshTables, knn,
432
-            keypointSink,
424
+            keypointScaler, keypointSink,
433 425
             imagePortalSink, imagePortalSource,
434 426
             imagePortalMux, imagePortalBuffer, imagePortalCopy,
435 427
             //, imageSink

+ 14
- 155
src/trackers/image-tracker/states/state.ts Vedi File

@@ -33,10 +33,10 @@ import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/node
33 33
 import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
34 34
 import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
35 35
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
36
+import { ReferenceImage } from '../reference-image';
36 37
 import { TrackerOutput } from '../../tracker';
37 38
 import { Nullable } from '../../../utils/utils';
38
-import { IllegalOperationError } from '../../../utils/errors';
39
-import { TRACK_RECTIFIED_BORDER } from '../settings';
39
+import { IllegalOperationError, IllegalArgumentError } from '../../../utils/errors';
40 40
 
41 41
 /** State output */
42 42
 export interface ImageTrackerStateOutput
@@ -46,7 +46,6 @@ export interface ImageTrackerStateOutput
46 46
     readonly nextStateSettings?: Record<string,any>;
47 47
 }
48 48
 
49
-
50 49
 /**
51 50
  * Abstract state of the Image Tracker
52 51
  */
@@ -61,6 +60,9 @@ export abstract class ImageTrackerState
61 60
     /** pipeline */
62 61
     protected _pipeline: SpeedyPipeline;
63 62
 
63
+    /** a flag telling whether or not the pipeline has been released */
64
+    protected _pipelineReleased: boolean;
65
+
64 66
 
65 67
     /**
66 68
      * Constructor
@@ -72,6 +74,7 @@ export abstract class ImageTrackerState
72 74
         this._name = name;
73 75
         this._imageTracker = imageTracker;
74 76
         this._pipeline = this._createPipeline();
77
+        this._pipelineReleased = false;
75 78
     }
76 79
 
77 80
     /**
@@ -84,6 +87,7 @@ export abstract class ImageTrackerState
84 87
 
85 88
     /**
86 89
      * AR screen size
90
+     * It may change over time, as when flipping a phone
87 91
      */
88 92
     get screenSize(): SpeedySize
89 93
     {
@@ -107,7 +111,12 @@ export abstract class ImageTrackerState
107 111
      */
108 112
     release(): null
109 113
     {
110
-        return this._pipeline.release();
114
+        if(!this._pipelineReleased) {
115
+            this._pipeline.release();
116
+            this._pipelineReleased = true;
117
+        }
118
+
119
+        return null;
111 120
     }
112 121
 
113 122
     /**
@@ -183,154 +192,4 @@ export abstract class ImageTrackerState
183 192
      * @returns pipeline
184 193
      */
185 194
     protected abstract _createPipeline(): SpeedyPipeline;
186
-
187
-
188
-
189
-    //
190
-    // Some utility methods common to various states
191
-    //
192
-
193
-    /**
194
-     * Find the coordinates of a polyline surrounding the target image
195
-     * @param homography maps the target image to the AR screen
196
-     * @param targetSize size of the target space
197
-     * @returns promise that resolves to 4 points in AR screen space
198
-     */
199
-    protected _findPolylineCoordinates(homography: SpeedyMatrix, targetSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
200
-    {
201
-        const w = targetSize.width, h = targetSize.height;
202
-        const referenceImageCoordinates = Speedy.Matrix(2, 4, [
203
-            0, 0,
204
-            w, 0,
205
-            w, h,
206
-            0, h,
207
-        ]);
208
-
209
-        const polylineCoordinates = Speedy.Matrix.Zeros(2, 4);
210
-        return Speedy.Matrix.applyPerspectiveTransform(
211
-            polylineCoordinates,
212
-            referenceImageCoordinates,
213
-            homography
214
-        );
215
-    }
216
-
217
-    /**
218
-     * Find a polyline surrounding the target image
219
-     * @param homography maps the target image to the AR screen
220
-     * @param targetSize size of the target space
221
-     * @returns promise that resolves to 4 points in AR screen space
222
-     */
223
-    protected _findPolyline(homography: SpeedyMatrix, targetSize: SpeedySize): SpeedyPromise<SpeedyPoint2[]>
224
-    {
225
-        return this._findPolylineCoordinates(homography, targetSize).then(polylineCoordinates => {
226
-            const polydata = polylineCoordinates.read();
227
-            const polyline = Array.from({ length: 4 }, (_, i) => Speedy.Point2(polydata[2*i], polydata[2*i+1]));
228
-
229
-            return polyline;
230
-        });
231
-    }
232
-
233
-    /**
234
-     * Whether or not to rotate the warped image in order to best fit the AR screen
235
-     * @param media media associated with the reference image
236
-     * @param screenSize AR screen
237
-     * @returns boolean
238
-     */
239
-    protected _mustRotateWarpedImage(media: SpeedyMedia, screenSize: SpeedySize): boolean
240
-    {
241
-        const screenAspectRatio = screenSize.width / screenSize.height;
242
-        const mediaAspectRatio = media.width / media.height;
243
-        const eps = 0.1;
244
-
245
-        return (mediaAspectRatio >= 1+eps && screenAspectRatio < 1-eps) || (mediaAspectRatio < 1-eps && screenAspectRatio >= 1+eps);
246
-    }
247
-
248
-    /**
249
-     * Find a rectification matrix to be applied to an image fitting the entire AR screen
250
-     * @param media media associated with the reference image
251
-     * @param screenSize AR screen
252
-     * @returns promise that resolves to a rectification matrix
253
-     */
254
-    protected _findRectificationMatrixOfFullscreenImage(media: SpeedyMedia, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
255
-    {
256
-        const b = TRACK_RECTIFIED_BORDER;
257
-        const sw = screenSize.width, sh = screenSize.height;
258
-        const mediaAspectRatio = media.width / media.height;
259
-        const mustRotate = this._mustRotateWarpedImage(media, screenSize);
260
-
261
-        // compute the vertices of the target in screen space
262
-        // we suppose portrait or landscape mode for both screen & media
263
-        const c = mustRotate ? 1 / mediaAspectRatio : mediaAspectRatio;
264
-        const top = sw >= sh ? b * sh : (sh - sw * (1-2*b) / c) / 2;
265
-        const left = sw >= sh ? (sw - sh * (1-2*b) * c) / 2 : b * sw;
266
-        const right = sw - left;
267
-        const bottom = sh - top;
268
-
269
-        const targetVertices = Speedy.Matrix(2, 4, [
270
-            left, top,
271
-            right, top,
272
-            right, bottom,
273
-            left, bottom,
274
-        ]);
275
-
276
-        const screenVertices = Speedy.Matrix(2, 4, [
277
-            0, 0,
278
-            sw, 0,
279
-            sw, sh,
280
-            0, sh
281
-        ]);
282
-
283
-        const preRectificationMatrix = Speedy.Matrix.Eye(3);
284
-        const alignmentMatrix = Speedy.Matrix.Zeros(3);
285
-        const rectificationMatrix = Speedy.Matrix.Zeros(3);
286
-
287
-        return (mustRotate ? Speedy.Matrix.perspective(
288
-            // pre-rectifation: rotate by 90 degrees counterclockwise and scale to screenSize
289
-            preRectificationMatrix,
290
-            screenVertices,
291
-            Speedy.Matrix(2, 4, [ 0,sh , 0,0 , sw,0 , sw,sh ])
292
-        ) : Speedy.Promise.resolve(preRectificationMatrix)).then(_ =>
293
-            // alignment: align the target to the center of the screen
294
-            Speedy.Matrix.perspective(
295
-                alignmentMatrix,
296
-                screenVertices,
297
-                targetVertices
298
-            )
299
-        ).then(_ =>
300
-            // pre-rectify and then align
301
-            rectificationMatrix.setTo(alignmentMatrix.times(preRectificationMatrix))
302
-        );
303
-    }
304
-
305
-    /**
306
-     * Find a rectification matrix to be applied to the target image
307
-     * @param homography maps a reference image to the AR screen
308
-     * @param targetSize size of the target space
309
-     * @param media media associated with the reference image
310
-     * @param screenSize AR screen
311
-     * @returns promise that resolves to a rectification matrix
312
-     */
313
-    protected _findRectificationMatrixOfCameraImage(homography: SpeedyMatrix, targetSize: SpeedySize, media: SpeedyMedia, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
314
-    {
315
-        const sw = screenSize.width, sh = screenSize.height;
316
-        const screen = Speedy.Matrix(2, 4, [ 0, 0, sw, 0, sw, sh, 0, sh ]);
317
-
318
-        const rectificationMatrix = Speedy.Matrix.Zeros(3);
319
-        return this._findPolylineCoordinates(homography, targetSize).then(polyline =>
320
-
321
-            // from target space to (full)screen
322
-            Speedy.Matrix.perspective(rectificationMatrix, polyline, screen)
323
-
324
-        ).then(_ =>
325
-
326
-            // from (full)screen to rectified coordinates
327
-            this._findRectificationMatrixOfFullscreenImage(media, screenSize)
328
-
329
-        ).then(mat =>
330
-
331
-            // function composition
332
-            rectificationMatrix.setTo(mat.times(rectificationMatrix))
333
-
334
-        );
335
-    }
336
-}
195
+}

+ 249
- 481
src/trackers/image-tracker/states/tracking.ts
File diff soppresso perché troppo grande
Vedi File


+ 59
- 94
src/trackers/image-tracker/states/training.ts Vedi File

@@ -31,9 +31,9 @@ import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/
31 31
 import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
32 32
 import { Resolution } from '../../../utils/resolution';
33 33
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
34
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
34 35
 import { ImageTrackerState, ImageTrackerStateOutput } from './state';
35
-import { ReferenceImage } from '../reference-image';
36
-import { ReferenceImageDatabase } from '../reference-image-database';
36
+import { ReferenceImage, ReferenceImageWithMedia } from '../reference-image';
37 37
 import { Nullable, Utils } from '../../../utils/utils';
38 38
 import { IllegalOperationError, TrainingError } from '../../../utils/errors';
39 39
 import {
@@ -43,7 +43,6 @@ import {
43 43
     SCAN_WITH_NIGHTVISION, NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY,
44 44
     SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
45 45
     TRAIN_IMAGE_SCALE,
46
-    TRAIN_TARGET_NORMALIZED_SIZE,
47 46
     NIGHTVISION_QUALITY,
48 47
     SUBPIXEL_METHOD,
49 48
 } from '../settings';
@@ -53,14 +52,14 @@ import {
53 52
 /** The training map maps keypoints to reference images */
54 53
 interface TrainingMap
55 54
 {
55
+    /** the collection of all keypoints (of all images) */
56
+    readonly keypoints: SpeedyKeypoint[];
57
+
56 58
     /** maps a keypoint index to an image index */
57 59
     readonly referenceImageIndex: number[];
58 60
 
59
-    /** maps an image index to a reference image */
60
-    readonly referenceImage: ReferenceImage[];
61
-
62
-    /** the collection of all keypoints (of all images) */
63
-    readonly keypoints: SpeedyKeypoint[];
61
+    /** reference images */
62
+    readonly referenceImages: ReferenceImageWithMedia[];
64 63
 }
65 64
 
66 65
 
@@ -70,8 +69,10 @@ interface TrainingMap
70 69
  */
71 70
 export class ImageTrackerTrainingState extends ImageTrackerState
72 71
 {
72
+    /** index of the image being used to train the tracker */
73 73
     private _currentImageIndex = 0;
74
-    private _image: ReferenceImage[] = [];
74
+
75
+    /** training map */
75 76
     private _trainingMap: TrainingMap;
76 77
 
77 78
 
@@ -86,9 +87,9 @@ export class ImageTrackerTrainingState extends ImageTrackerState
86 87
 
87 88
         // initialize the training map
88 89
         this._trainingMap = {
90
+            keypoints: [],
89 91
             referenceImageIndex: [],
90
-            referenceImage: [],
91
-            keypoints: []
92
+            referenceImages: [],
92 93
         };
93 94
     }
94 95
 
@@ -106,10 +107,9 @@ export class ImageTrackerTrainingState extends ImageTrackerState
106 107
 
107 108
         // prepare to train...
108 109
         this._currentImageIndex = 0;
109
-        this._image.length = 0;
110
-        this._trainingMap.referenceImageIndex.length = 0;
111
-        this._trainingMap.referenceImage.length = 0;
112 110
         this._trainingMap.keypoints.length = 0;
111
+        this._trainingMap.referenceImageIndex.length = 0;
112
+        this._trainingMap.referenceImages.length = 0;
113 113
 
114 114
         // lock the database
115 115
         Utils.log(`Image Tracker: training using ${database.count} reference image${database.count != 1 ? 's' : ''}`);
@@ -117,7 +117,17 @@ export class ImageTrackerTrainingState extends ImageTrackerState
117 117
 
118 118
         // collect all images
119 119
         for(const referenceImage of database)
120
-            this._image.push(referenceImage);
120
+            this._trainingMap.referenceImages.push(referenceImage);
121
+    }
122
+
123
+    /**
124
+     * Called when leaving the state, after update()
125
+     */
126
+    onLeaveState(): void
127
+    {
128
+        // we don't return to this state, so we can release the pipeline early
129
+        this._pipeline.release();
130
+        this._pipelineReleased = true;
121 131
     }
122 132
 
123 133
     /**
@@ -126,66 +136,25 @@ export class ImageTrackerTrainingState extends ImageTrackerState
126 136
      */
127 137
     protected _beforeUpdate(): SpeedyPromise<void>
128 138
     {
129
-        const arScreenSize = this.screenSize;
130 139
         const source = this._pipeline.node('source') as SpeedyPipelineNodeImageSource;
131 140
         const screen = this._pipeline.node('screen') as SpeedyPipelineNodeResize;
132 141
         const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
133 142
 
134
-        // this shouldn't happen
135
-        if(this._currentImageIndex >= this._image.length)
136
-            return Speedy.Promise.reject(new IllegalOperationError());
137
-
138 143
         // set the appropriate training media
139
-        const database = this._imageTracker.database;
140
-        const referenceImage = this._image[this._currentImageIndex];
141
-        const media = database._findMedia(referenceImage.name);
142
-        source.media = media;
144
+        const referenceImage = this._trainingMap.referenceImages[this._currentImageIndex];
145
+        source.media = referenceImage.media;
143 146
 
144 147
         // compute the appropriate size of the training image space
145 148
         const resolution = this._imageTracker.resolution;
146 149
         const scale = TRAIN_IMAGE_SCALE; // ORB is not scale-invariant
147
-        const aspectRatioOfTrainingImage = media.width / media.height;
150
+        const aspectRatioOfTrainingImage = referenceImage.aspectRatio;
148 151
 
149
-        /*
150
-        let sin = 0, cos = 1;
151
-
152
-        if((aspectRatioOfSourceVideo - 1) * (aspectRatioOfTrainingImage - 1) >= 0) {
153
-            // training image and source video: both in landscape mode or both in portrait mode
154
-            screen.size = Utils.resolution(resolution, aspectRatioOfTrainingImage);
155
-            screen.size.width = Math.round(screen.size.width * scale);
156
-            screen.size.height = Math.round(screen.size.height * scale);
157
-        }
158
-        else if(aspectRatioOfTrainingImage > aspectRatioOfSourceVideo) {
159
-            // training image: portrait mode; source video: landscape mode
160
-            screen.size = Utils.resolution(resolution, 1 / aspectRatioOfTrainingImage);
161
-            screen.size.width = Math.round(screen.size.width * scale);
162
-            screen.size.height = Math.round(screen.size.height * scale);
163
-            sin = 1; cos = 0; // rotate 90deg
164
-        }
165
-        else {
166
-            // training image: landscape mode; source video: portrait mode
167
-        }
168
-        */
169 152
         screen.size = Utils.resolution(resolution, aspectRatioOfTrainingImage);
170 153
         screen.size.width = Math.round(screen.size.width * scale);
171 154
         screen.size.height = Math.round(screen.size.height * scale);
172 155
 
173
-
174
-        // convert keypoints from the training image space to AR screen space
175
-        // let's pretend that trained keypoints belong to the AR screen space,
176
-        // regardless of the size of the target image. This will make things
177
-        // easier when computing the homography.
178
-        /*
179
-        const sw = arScreenSize.width / screen.size.width;
180
-        const sh = arScreenSize.height / screen.size.height;
181
-        */
182
-        const sw = TRAIN_TARGET_NORMALIZED_SIZE / screen.size.width;
183
-        const sh = TRAIN_TARGET_NORMALIZED_SIZE / screen.size.height;
184
-        keypointScaler.transform = Speedy.Matrix(3, 3, [
185
-            sw, 0,  0,
186
-            0,  sh, 0,
187
-            0,  0,  1,
188
-        ]);
156
+        // convert keypoints to NIS
157
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screen.size);
189 158
 
190 159
         // log
191 160
         Utils.log(`Image Tracker: training using reference image "${referenceImage.name}" at ${screen.size.width}x${screen.size.height}...`);
@@ -201,15 +170,20 @@ export class ImageTrackerTrainingState extends ImageTrackerState
201 170
      */
202 171
     protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
203 172
     {
204
-        const referenceImage = this._image[this._currentImageIndex];
173
+        const referenceImage = this._trainingMap.referenceImages[this._currentImageIndex];
205 174
         const keypoints = result.keypoints as SpeedyKeypoint[];
206 175
         const image = result.image as SpeedyMedia | undefined;
207 176
 
208 177
         // log
209 178
         Utils.log(`Image Tracker: found ${keypoints.length} keypoints in reference image "${referenceImage.name}"`);
210 179
 
180
+        // tracker output
181
+        const trackerOutput: ImageTrackerOutput = {
182
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
183
+            image: image,
184
+        };
185
+
211 186
         // set the training map, so that we can map all keypoints of the current image to the current image
212
-        this._trainingMap.referenceImage.push(referenceImage);
213 187
         for(let i = 0; i < keypoints.length; i++) {
214 188
             this._trainingMap.keypoints.push(keypoints[i]);
215 189
             this._trainingMap.referenceImageIndex.push(this._currentImageIndex);
@@ -218,31 +192,22 @@ export class ImageTrackerTrainingState extends ImageTrackerState
218 192
         // the current image has been processed!
219 193
         ++this._currentImageIndex;
220 194
 
221
-        // set output
222
-        if(this._currentImageIndex >= this._image.length) {
223
-
224
-            // finished training!
225
-            return Speedy.Promise.resolve({
226
-                //nextState: 'training',
227
-                nextState: 'scanning',
228
-                nextStateSettings: {
229
-                    keypoints: this._trainingMap.keypoints,
230
-                },
231
-                trackerOutput: { },
232
-                //trackerOutput: { image, keypoints, screenSize: this.screenSize },
233
-            });
234
-
235
-        }
236
-        else {
237
-
238
-            // we're not done yet
195
+        // we're not done yet
196
+        if(this._currentImageIndex < this._trainingMap.referenceImages.length) {
239 197
             return Speedy.Promise.resolve({
240 198
                 nextState: 'training',
241
-                trackerOutput: { },
242
-                //trackerOutput: { image, keypoints, screenSize: this.screenSize },
199
+                trackerOutput: trackerOutput
243 200
             });
244
-
245 201
         }
202
+
203
+        // finished training!
204
+        return Speedy.Promise.resolve({
205
+            nextState: 'scanning',
206
+            trackerOutput: trackerOutput,
207
+            nextStateSettings: {
208
+                database: this._trainingMap.keypoints,
209
+            }
210
+        });
246 211
     }
247 212
 
248 213
     /**
@@ -268,7 +233,7 @@ export class ImageTrackerTrainingState extends ImageTrackerState
268 233
         const clipper = Speedy.Keypoint.Clipper();
269 234
         const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
270 235
         const keypointSink = Speedy.Keypoint.Sink('keypoints');
271
-        const imageSink = Speedy.Image.Sink('image');
236
+        //const imageSink = Speedy.Image.Sink('image');
272 237
 
273 238
         source.media = null;
274 239
         screen.size = Speedy.Size(0,0);
@@ -313,12 +278,12 @@ export class ImageTrackerTrainingState extends ImageTrackerState
313 278
         // keypoint description
314 279
         greyscale.output().connectTo(blur.input());
315 280
         blur.output().connectTo(descriptor.input('image'));
316
-        clipper.output().connectTo(descriptor.input('keypoints'));
281
+        subpixel.output().connectTo(descriptor.input('keypoints'));
317 282
 
318 283
         // prepare output
319 284
         descriptor.output().connectTo(keypointScaler.input());
320 285
         keypointScaler.output().connectTo(keypointSink.input());
321
-        nightvisionMux.output().connectTo(imageSink.input());
286
+        //nightvisionMux.output().connectTo(imageSink.input());
322 287
 
323 288
         // done!
324 289
         pipeline.init(
@@ -327,27 +292,27 @@ export class ImageTrackerTrainingState extends ImageTrackerState
327 292
             pyramid, detector, blur, descriptor, clipper,
328 293
             denoiser, blurredPyramid, subpixel,
329 294
             keypointScaler, keypointSink,
330
-            imageSink
295
+            //imageSink
331 296
         );
332 297
         return pipeline;
333 298
     }
334 299
 
335 300
     /**
336
-     * Get reference image
301
+     * Get the reference image associated with a keypoint index in the training map
337 302
      * @param keypointIndex -1 if not found
338 303
      * @returns reference image
339 304
      */
340
-    referenceImageOfKeypoint(keypointIndex: number): Nullable<ReferenceImage>
305
+    referenceImageOfKeypoint(keypointIndex: number): Nullable<ReferenceImageWithMedia>
341 306
     {
342 307
         const imageIndex = this.referenceImageIndexOfKeypoint(keypointIndex);
343 308
         if(imageIndex < 0)
344 309
             return null;
345 310
 
346
-        return this._trainingMap.referenceImage[imageIndex];
311
+        return this._trainingMap.referenceImages[imageIndex];
347 312
     }
348 313
 
349 314
     /**
350
-     * Get reference image index
315
+     * Get the reference image index associated with a keypoint index in the training map
351 316
      * @param keypointIndex -1 if not found
352 317
      * @returns reference image index, or -1 if not found
353 318
      */
@@ -358,14 +323,14 @@ export class ImageTrackerTrainingState extends ImageTrackerState
358 323
             return -1;
359 324
 
360 325
         const imageIndex = this._trainingMap.referenceImageIndex[keypointIndex];
361
-        if(imageIndex < 0 || imageIndex >= this._trainingMap.referenceImage.length)
326
+        if(imageIndex < 0 || imageIndex >= this._trainingMap.referenceImages.length)
362 327
             return -1;
363 328
 
364 329
         return imageIndex;
365 330
     }
366 331
 
367 332
     /**
368
-     * Get keypoint of the trained set
333
+     * Get a keypoint of the trained set
369 334
      * @param keypointIndex -1 if not found
370 335
      * @returns a keypoint
371 336
      */

+ 49
- 47
src/ui/gizmos.ts Vedi File

@@ -26,8 +26,10 @@ import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
26 26
 import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
27 27
 import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
28 28
 import { Viewport } from '../core/viewport';
29
+import { CameraModel } from '../geometry/camera-model';
29 30
 import { Tracker, TrackerOutput } from '../trackers/tracker';
30 31
 import { ImageTrackerOutput } from '../trackers/image-tracker/image-tracker';
32
+import { NIS_SIZE } from '../trackers/image-tracker/settings';
31 33
 
32 34
 
33 35
 
@@ -122,10 +124,9 @@ class ImageTrackerGizmos implements GizmosRenderer
122 124
             return;
123 125
 
124 126
         const viewportSize = viewport._realSize;
125
-        const screenSize = output.screenSize;
126
-        const keypoints = output.keypoints;
127
-        const polyline = output.polyline;
128
-        const cameraMatrix = output.cameraMatrix;
127
+        const keypointsNIS = output.keypointsNIS;
128
+        const polylineNDC = output.polylineNDC;
129
+        const camera = output.camera;
129 130
 
130 131
         // debug
131 132
         //ctx.fillStyle = '#000';
@@ -133,43 +134,48 @@ class ImageTrackerGizmos implements GizmosRenderer
133 134
         //ctx.clearRect(0, 0, canvas.width, canvas.height);
134 135
 
135 136
         // render keypoints
136
-        if(keypoints !== undefined && screenSize !== undefined)
137
-            this._splitAndRenderKeypoints(ctx, keypoints, screenSize, viewportSize);
137
+        if(keypointsNIS !== undefined)
138
+            this._splitAndRenderKeypointsNIS(ctx, keypointsNIS, viewportSize);
138 139
 
139 140
         // render polylines
140
-        if(polyline !== undefined && screenSize !== undefined)
141
-            this._renderPolyline(ctx, polyline, screenSize, viewportSize);
141
+        if(polylineNDC !== undefined)
142
+            this._renderPolylineNDC(ctx, polylineNDC, viewportSize);
142 143
 
143 144
         // render the axes of the 3D coordinate system
144
-        if(cameraMatrix !== undefined && screenSize !== undefined)
145
-            this._renderAxes(ctx, cameraMatrix, screenSize, viewportSize);
145
+        if(camera !== undefined)
146
+            this._renderAxes(ctx, camera, viewportSize);
146 147
     }
147 148
 
148 149
     /**
149 150
      * Split keypoints in matched/unmatched categories and
150 151
      * render them for testing & development purposes
151 152
      * @param ctx canvas 2D context
152
-     * @param keypoints keypoints to render
153
-     * @param screenSize AR screen size
153
+     * @param keypoints keypoints in Normalized Image Space (NIS)
154 154
      * @param viewportSize viewport size
155 155
      * @param size base keypoint rendering size
156 156
      */
157
-    private _splitAndRenderKeypoints(ctx: CanvasRenderingContext2D, keypoints: SpeedyKeypoint[], screenSize: SpeedySize, viewportSize: SpeedySize, size = 1): void
157
+    private _splitAndRenderKeypointsNIS(ctx: CanvasRenderingContext2D, keypoints: SpeedyKeypoint[], viewportSize: SpeedySize, size = 1): void
158 158
     {
159 159
         if(keypoints.length == 0)
160 160
             return;
161 161
 
162 162
         if(!Object.prototype.hasOwnProperty.call(keypoints[0], '_matches')) { // hack...
163
-            this._renderKeypoints(ctx, keypoints, screenSize, viewportSize, '#f00', size);
163
+            this._renderKeypointsNIS(ctx, keypoints, viewportSize, '#f00', size);
164 164
             return;
165 165
         }
166 166
 
167
-        const matchedKeypoints = keypoints as SpeedyMatchedKeypoint[];
168
-        const goodMatches = matchedKeypoints.filter(keypoint => this._isGoodMatch(keypoint));
169
-        const badMatches = matchedKeypoints.filter(keypoint => !this._isGoodMatch(keypoint));
167
+        const goodMatches = [], badMatches = [];
168
+        for(let i = 0; i < keypoints.length; i++) {
169
+            const keypoint = keypoints[i] as SpeedyMatchedKeypoint;
170 170
 
171
-        this._renderKeypoints(ctx, badMatches, screenSize, viewportSize, '#f00', size);
172
-        this._renderKeypoints(ctx, goodMatches, screenSize, viewportSize, '#0f0', size);
171
+            if(this._isGoodMatch(keypoint))
172
+                goodMatches.push(keypoint);
173
+            else
174
+                badMatches.push(keypoint);
175
+        }
176
+
177
+        this._renderKeypointsNIS(ctx, badMatches, viewportSize, '#f00', size);
178
+        this._renderKeypointsNIS(ctx, goodMatches, viewportSize, '#0f0', size);
173 179
     }
174 180
 
175 181
     /**
@@ -198,16 +204,15 @@ class ImageTrackerGizmos implements GizmosRenderer
198 204
     /**
199 205
      * Render keypoints for testing & development purposes
200 206
      * @param ctx canvas 2D context
201
-     * @param keypoints keypoints to render
202
-     * @param screenSize AR screen size
207
+     * @param keypoints keypoints in Normalized Image Space (NIS)
203 208
      * @param viewportSize viewport size
204 209
      * @param color color of the rendered keypoints
205 210
      * @param size base keypoint rendering size
206 211
      */
207
-    private _renderKeypoints(ctx: CanvasRenderingContext2D, keypoints: SpeedyKeypoint[], screenSize: SpeedySize, viewportSize: SpeedySize, color = 'red', size = 1): void
212
+    private _renderKeypointsNIS(ctx: CanvasRenderingContext2D, keypoints: SpeedyKeypoint[], viewportSize: SpeedySize, color = 'red', size = 1): void
208 213
     {
209
-        const sx = viewportSize.width / screenSize.width;
210
-        const sy = viewportSize.height / screenSize.height;
214
+        const sx = viewportSize.width / NIS_SIZE;
215
+        const sy = viewportSize.height / NIS_SIZE;
211 216
 
212 217
         ctx.beginPath();
213 218
 
@@ -226,30 +231,26 @@ class ImageTrackerGizmos implements GizmosRenderer
226 231
     }
227 232
 
228 233
     /**
229
-     * Render polyline for testing & development purposes
234
+     * Render a polyline for testing & development purposes
230 235
      * @param ctx canvas 2D context
231
-     * @param polyline vertices
232
-     * @param screenSize AR screen size
236
+     * @param polyline vertices in NDC
233 237
      * @param viewportSize viewport size
234 238
      * @param color color of the rendered polyline
235 239
      * @param lineWidth
236 240
      */
237
-    private _renderPolyline(ctx: CanvasRenderingContext2D, polyline: SpeedyPoint2[], screenSize: SpeedySize, viewportSize: SpeedySize, color = '#0f0', lineWidth = 2): void
241
+    private _renderPolylineNDC(ctx: CanvasRenderingContext2D, polyline: SpeedyPoint2[], viewportSize: SpeedySize, color = '#0f0', lineWidth = 2): void
238 242
     {
239
-        if(polyline.length == 0)
240
-            return;
241
-
242 243
         const n = polyline.length;
243
-        const sx = viewportSize.width / screenSize.width;
244
-        const sy = viewportSize.height / screenSize.height;
244
+        const w = viewportSize.width;
245
+        const h = viewportSize.height;
245 246
 
246
-        // render polyline
247
-        ctx.beginPath();
247
+        if(n == 0)
248
+            return;
248 249
 
249
-        ctx.moveTo(polyline[n - 1].x * sx, polyline[n - 1].y * sy);
250
+        ctx.beginPath();
251
+        ctx.moveTo((polyline[n-1].x * 0.5 + 0.5) * w, (polyline[n-1].y * -0.5 + 0.5) * h);
250 252
         for(let j = 0; j < n; j++)
251
-            ctx.lineTo(polyline[j].x * sx, polyline[j].y * sy);
252
-
253
+            ctx.lineTo((polyline[j].x * 0.5 + 0.5) * w, (polyline[j].y * -0.5 + 0.5) * h);
253 254
         ctx.strokeStyle = color;
254 255
         ctx.lineWidth = lineWidth;
255 256
         ctx.stroke();
@@ -258,22 +259,23 @@ class ImageTrackerGizmos implements GizmosRenderer
258 259
     /**
259 260
      * Render the axes of a 3D coordinate system
260 261
      * @param ctx canvas 2D context
261
-     * @param cameraMatrix 3x4 camera matrix that maps normalized coordinates [-1,1]^3 to AR screen space
262
-     * @param screenSize AR screen size
262
+     * @param camera camera model
263 263
      * @param viewportSize viewport size
264 264
      * @param lineWidth
265 265
      */
266
-    private _renderAxes(ctx: CanvasRenderingContext2D, cameraMatrix: SpeedyMatrix, screenSize: SpeedySize, viewportSize: SpeedySize, lineWidth = 4): void
266
+    private _renderAxes(ctx: CanvasRenderingContext2D, camera: CameraModel, viewportSize: SpeedySize, lineWidth = 4): void
267 267
     {
268 268
         const RED = '#f00', GREEN = '#0f0', BLUE = '#00f';
269 269
         const color = [ RED, GREEN, BLUE ]; // color of each axis: (X,Y,Z)
270 270
         const length = 1; // length of each axis-corresponding line, given in normalized space units
271
-        const sx = viewportSize.width / screenSize.width;
272
-        const sy = viewportSize.height / screenSize.height;
271
+        const w = viewportSize.width;
272
+        const h = viewportSize.height;
273
+        const iw = 1 / (camera.imageSize.width / 2);
274
+        const ih = -1 / (camera.imageSize.height / 2);
273 275
 
274 276
         /*
275 277
 
276
-        Multiply the 3x4 camera matrix P by:
278
+        Multiply the 3x4 camera matrix by:
277 279
 
278 280
         [ 0  L  0  0 ]
279 281
         [ 0  0  L  0 ] , where L = length in normalized space of the lines
@@ -287,7 +289,7 @@ class ImageTrackerGizmos implements GizmosRenderer
287 289
 
288 290
         */
289 291
 
290
-        const p = cameraMatrix.read();
292
+        const p = camera.matrix.read();
291 293
         const l = length;
292 294
         const o = [ p[9], p[10], p[11] ]; // origin of the coordinate system
293 295
         const x = [ l*p[0]+p[9], l*p[1]+p[10], l*p[2]+p[11] ]; // x-axis
@@ -302,8 +304,8 @@ class ImageTrackerGizmos implements GizmosRenderer
302 304
             const x = q[0] / q[2], y = q[1] / q[2];
303 305
 
304 306
             ctx.beginPath();
305
-            ctx.moveTo(ox * sx, oy * sy);
306
-            ctx.lineTo(x * sx, y * sy);
307
+            ctx.moveTo((ox * iw * 0.5 + 0.5) * w, (oy * ih * 0.5 + 0.5) * h);
308
+            ctx.lineTo((x * iw * 0.5 + 0.5) * w, (y * ih * 0.5 + 0.5) * h);
307 309
             ctx.strokeStyle = color[i];
308 310
             ctx.lineWidth = lineWidth;
309 311
             ctx.stroke();

+ 21
- 10
src/utils/errors.ts Vedi File

@@ -23,7 +23,7 @@
23 23
 /**
24 24
  * Base error class
25 25
  */
26
-export abstract class BaseError extends Error
26
+export abstract class ARError extends Error
27 27
 {
28 28
     /**
29 29
      * Constructor
@@ -61,7 +61,7 @@ export abstract class BaseError extends Error
61 61
 /**
62 62
  * A method has received one or more illegal arguments
63 63
  */
64
-export class IllegalArgumentError extends BaseError
64
+export class IllegalArgumentError extends ARError
65 65
 {
66 66
     public get name(): string
67 67
     {
@@ -73,7 +73,7 @@ export class IllegalArgumentError extends BaseError
73 73
  * The method arguments are valid, but the method can't be called due to the
74 74
  * current state of the object
75 75
  */
76
-export class IllegalOperationError extends BaseError
76
+export class IllegalOperationError extends ARError
77 77
 {
78 78
     public get name(): string
79 79
     {
@@ -84,7 +84,7 @@ export class IllegalOperationError extends BaseError
84 84
 /**
85 85
  * The requested operation is not supported
86 86
  */
87
-export class NotSupportedError extends BaseError
87
+export class NotSupportedError extends ARError
88 88
 {
89 89
     public get name(): string
90 90
     {
@@ -95,7 +95,7 @@ export class NotSupportedError extends BaseError
95 95
 /**
96 96
  * Access denied
97 97
  */
98
-export class AccessDeniedError extends BaseError
98
+export class AccessDeniedError extends ARError
99 99
 {
100 100
     public get name(): string
101 101
     {
@@ -106,7 +106,7 @@ export class AccessDeniedError extends BaseError
106 106
 /**
107 107
  * Timeout
108 108
  */
109
-export class TimeoutError extends BaseError
109
+export class TimeoutError extends ARError
110 110
 {
111 111
     public get name(): string
112 112
     {
@@ -117,7 +117,7 @@ export class TimeoutError extends BaseError
117 117
 /**
118 118
  * Assertion error
119 119
  */
120
-export class AssertionError extends BaseError
120
+export class AssertionError extends ARError
121 121
 {
122 122
     public get name(): string
123 123
     {
@@ -126,9 +126,20 @@ export class AssertionError extends BaseError
126 126
 }
127 127
 
128 128
 /**
129
+ * Numerical error
130
+ */
131
+export class NumericalError extends ARError
132
+{
133
+    public get name(): string
134
+    {
135
+        return 'NumericalError';
136
+    }
137
+}
138
+
139
+/**
129 140
  * Tracking error
130 141
  */
131
-export class TrackingError extends BaseError
142
+export class TrackingError extends ARError
132 143
 {
133 144
     public get name(): string
134 145
     {
@@ -139,7 +150,7 @@ export class TrackingError extends BaseError
139 150
 /**
140 151
  * Detection error
141 152
  */
142
-export class DetectionError extends BaseError
153
+export class DetectionError extends ARError
143 154
 {
144 155
     public get name(): string
145 156
     {
@@ -150,7 +161,7 @@ export class DetectionError extends BaseError
150 161
 /**
151 162
  * Training error
152 163
  */
153
-export class TrainingError extends BaseError
164
+export class TrainingError extends ARError
154 165
 {
155 166
     public get name(): string
156 167
     {

Loading…
Annulla
Salva