Browse Source

Refactor the Image Tracker. Simplify and refine the calculations

customisations
alemart 10 months ago
parent
commit
e594e69844

+ 19
- 46
src/trackers/image-tracker/image-tracker.ts View File

@@ -43,7 +43,8 @@ import { ImageTrackerState } from './states/state';
43 43
 import { ImageTrackerInitialState } from './states/initial';
44 44
 import { ImageTrackerTrainingState } from './states/training';
45 45
 import { ImageTrackerScanningState } from './states/scanning';
46
-import { ImageTrackerPreTrackingState } from './states/pre-tracking';
46
+import { ImageTrackerPreTrackingAState } from './states/pre-tracking-a';
47
+import { ImageTrackerPreTrackingBState } from './states/pre-tracking-b';
47 48
 import { ImageTrackerTrackingState } from './states/tracking';
48 49
 import { Nullable, Utils } from '../../utils/utils';
49 50
 import { AREventTarget } from '../../utils/ar-events';
@@ -53,34 +54,6 @@ import { SpeedyPoint2 } from 'speedy-vision/types/core/speedy-point';
53 54
 import { Viewer } from '../../geometry/viewer';
54 55
 import { Pose } from '../../geometry/pose';
55 56
 
56
-/*
57
-
58
-A few definitions:
59
-
60
-1. Viewport size:
61
-   size of the drawing buffer of the background canvas, which is the same as
62
-   the size in pixels of the input media (typically a video).
63
-
64
-2. AR screen size:
65
-   size in pixels used for image processing operations. It's determined by the
66
-   resolution of the tracker and by the aspect ratio of the input media.
67
-
68
-3. Raster space:
69
-   an image space whose top-left coordinate is (0,0) and whose bottom-right
70
-   coordinate is (w-1,h-1), where (w,h) is its size. The y-axis grows downwards.
71
-
72
-4. AR Screen Space (ASS):
73
-   a raster space whose size is the AR screen size.
74
-
75
-5. Normalized Image Space (NIS):
76
-   a raster space whose size is N x N, where N = NIS_SIZE.
77
-
78
-6. Normalized Device Coordinates (NDC):
79
-   the normalized 2D space [-1,1]x[-1,1]. The origin is at the center. Also,
80
-   the y-axis grows upwards.
81
-
82
-*/
83
-
84 57
 /** A trackable target */
85 58
 export interface TrackableImage extends Trackable
86 59
 {
@@ -133,7 +106,7 @@ export interface ImageTrackerOutput extends TrackerOutput
133 106
 }
134 107
 
135 108
 /** All possible states of an Image Tracker */
136
-export type ImageTrackerStateName = 'initial' | 'training' | 'scanning' | 'pre-tracking' | 'tracking';
109
+export type ImageTrackerStateName = 'initial' | 'training' | 'scanning' | 'pre-tracking-a' | 'pre-tracking-b' | 'tracking';
137 110
 
138 111
 /** A helper */
139 112
 const formatSize = (size: SpeedySize) => `${size.width}x${size.height}`;
@@ -183,7 +156,8 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
183 156
             'initial': new ImageTrackerInitialState(this),
184 157
             'training': new ImageTrackerTrainingState(this),
185 158
             'scanning': new ImageTrackerScanningState(this),
186
-            'pre-tracking': new ImageTrackerPreTrackingState(this),
159
+            'pre-tracking-a': new ImageTrackerPreTrackingAState(this),
160
+            'pre-tracking-b': new ImageTrackerPreTrackingBState(this),
187 161
             'tracking': new ImageTrackerTrackingState(this),
188 162
         };
189 163
 
@@ -349,21 +323,6 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
349 323
     }
350 324
 
351 325
     /**
352
-     * Compute the current size of the AR screen space
353
-     * Note that this may change over time
354
-     * @returns size
355
-     * @internal
356
-     */
357
-    _computeScreenSize(): SpeedySize
358
-    {
359
-        const media = this._source!._internalMedia;
360
-        const aspectRatio = media.width / media.height;
361
-        const screenSize = Utils.resolution(this._resolution, aspectRatio);
362
-
363
-        return screenSize;
364
-    }
365
-
366
-    /**
367 326
      * Get reference image
368 327
      * @param keypointIndex -1 if not found
369 328
      * @returns reference image
@@ -398,4 +357,18 @@ export class ImageTracker extends AREventTarget<ImageTrackerEventType> implement
398 357
         const training = this._state.training as ImageTrackerTrainingState;
399 358
         return training.referenceKeypoint(keypointIndex);
400 359
     }
360
+
361
+    /**
362
+     * Compute the current size of the AR screen space
363
+     * Note that this may change over time
364
+     * @returns size
365
+     */
366
+    private _computeScreenSize(): SpeedySize
367
+    {
368
+        const media = this._source!._internalMedia;
369
+        const aspectRatio = media.width / media.height;
370
+        const screenSize = Utils.resolution(this._resolution, aspectRatio);
371
+
372
+        return screenSize;
373
+    }
401 374
 }

+ 7
- 15
src/trackers/image-tracker/settings.ts View File

@@ -32,9 +32,6 @@ export const TRAIN_IMAGE_SCALE = 0.8; // ORB is not scale invariant
32 32
 /** Width and height of the Normalized Image Space (NIS) */
33 33
 export const NIS_SIZE = 1024; // keypoint positions are stored as fixed point
34 34
 
35
-/** Normalized width & height of an image target, in pixels */
36
-export const TRAIN_TARGET_NORMALIZED_SIZE = NIS_SIZE; // keypoint positions are stored as fixed point
37
-
38 35
 /** Used to identify the best maches */
39 36
 export const SCAN_MATCH_RATIO = 0.7; // usually a value in [0.6, 0.8]
40 37
 
@@ -50,15 +47,12 @@ export const SCAN_PYRAMID_SCALEFACTOR = 1.19; // 2 ^ 0.25
50 47
 /** Threshold of the FAST corner detector used in the scanning/training states */
51 48
 export const SCAN_FAST_THRESHOLD = 60;
52 49
 
53
-/** Minimum number of accepted matches for us to move out from the scanning state */
50
+/** Minimum number of accepted matches for us to move out of the scanning state */
54 51
 export const SCAN_MIN_MATCHES = 20; //30;
55 52
 
56 53
 /** When in the scanning state, we require the image to be matched during a few consecutive frames before accepting it */
57 54
 export const SCAN_CONSECUTIVE_FRAMES = 30;//15;//45;
58 55
 
59
-/** Reprojection error, in pixels, used when estimating a motion model (scanning state) */
60
-//export const SCAN_RANSAC_REPROJECTIONERROR = 5;
61
-
62 56
 /** Reprojection error, in NIS pixels, used when estimating a motion model (scanning state) */
63 57
 export const SCAN_RANSAC_REPROJECTIONERROR_NIS = (NIS_SIZE * 0.02) | 0;
64 58
 
@@ -101,6 +95,9 @@ export const SUBPIXEL_GAUSSIAN_SIGMA = 1.0;
101 95
 /** Subpixel refinement method */
102 96
 export const SUBPIXEL_METHOD = 'bilinear-upsample'; // 'quadratic1d';
103 97
 
98
+/** Minimum acceptable number of matched keypoints when in a pre-tracking state */
99
+export const PRE_TRACK_MIN_MATCHES = 4;
100
+
104 101
 /** Minimum acceptable number of matched keypoints when in the tracking state */
105 102
 export const TRACK_MIN_MATCHES = 4;//10; //20;
106 103
 
@@ -122,13 +119,8 @@ export const TRACK_RECTIFIED_BORDER = 0.15; //0.20;
122 119
 /** Relative size (%) used to clip keypoints from the borders of the rectified image */
123 120
 export const TRACK_CLIPPING_BORDER = TRACK_RECTIFIED_BORDER * 1.20; //1.25; //1.15;
124 121
 
125
-/** Number of iterations used to refine the target image before tracking */
126
-export const TRACK_REFINEMENT_ITERATIONS = 3;
127
-
128
-/** Reprojection error, in pixels, used when estimating a motion model (tracking state) */
129
-export const TRACK_RANSAC_REPROJECTIONERROR = 3; //2.5;
130
-
131
-// FIXME
122
+/** Scale of the rectified image in NDC, without taking the aspect ratio into consideration */
123
+export const TRACK_RECTIFIED_SCALE = 1 - 2 * TRACK_RECTIFIED_BORDER;
132 124
 
133 125
 /** Reprojection error, in NIS pixels, used when estimating a motion model (tracking state) */
134 126
 export const TRACK_RANSAC_REPROJECTIONERROR_NIS = (NIS_SIZE * 0.0125) | 0;
@@ -143,4 +135,4 @@ export const TRACK_GRID_GRANULARITY = 10; //20; // the value of N
143 135
 export const TRACK_MATCH_RATIO = 0.75; // usually a value in [0.6, 0.8] - low values => strict tracking
144 136
 
145 137
 /** Number of consecutive frames in which we tolerate a  "target lost" situation */
146
-export const TRACK_LOST_TOLERANCE = 10;
138
+export const TRACK_LOST_TOLERANCE = 15;

+ 10
- 0
src/trackers/image-tracker/states/initial.ts View File

@@ -78,6 +78,16 @@ export class ImageTrackerInitialState extends ImageTrackerState
78 78
     }
79 79
 
80 80
     /**
81
+     * Called when leaving the state, after update()
82
+     */
83
+    onLeaveState(): void
84
+    {
85
+        // we don't return to this state, so we can release the pipeline early
86
+        this._pipeline.release();
87
+        this._pipelineReleased = true;
88
+    }
89
+
90
+    /**
81 91
      * Create & setup the pipeline
82 92
      * @returns pipeline
83 93
      */

+ 273
- 0
src/trackers/image-tracker/states/pre-tracking-a.ts View File

@@ -0,0 +1,273 @@
1
+/*
2
+ * encantar.js
3
+ * GPU-accelerated Augmented Reality for the web
4
+ * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as published
8
+ * by the Free Software Foundation, either version 3 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ * GNU Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public License
17
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
+ *
19
+ * pre-tracking-a.ts
20
+ * Image tracker: Pre-Tracking A state
21
+ */
22
+
23
+import Speedy from 'speedy-vision';
24
+import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
26
+import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
27
+import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
28
+import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
29
+import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
30
+import { SpeedyPipelineNodeImageMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/images/multiplexer';
31
+import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink } from 'speedy-vision/types/core/pipeline/nodes/images/portal';
32
+import { SpeedyPipelineNodeKeypointPortalSource, SpeedyPipelineNodeKeypointPortalSink } from 'speedy-vision/types/core/pipeline/nodes/keypoints/portal';
33
+import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/nodes/transforms/resize';
34
+import { SpeedyPipelineNodePerspectiveWarp } from 'speedy-vision/types/core/pipeline/nodes/transforms/perspective-warp';
35
+import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/core/pipeline/nodes/keypoints/border-clipper';
36
+import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
37
+import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
38
+import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
39
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
40
+import { ImageTrackerState, ImageTrackerStateOutput } from './state';
41
+import { ReferenceImage } from '../reference-image';
42
+import { Nullable, Utils } from '../../../utils/utils';
43
+import { TrackingError } from '../../../utils/errors';
44
+import {
45
+    TRACK_RECTIFIED_SCALE, TRACK_CLIPPING_BORDER,
46
+    NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
47
+    ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
48
+    TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
49
+    SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
50
+    PRE_TRACK_MIN_MATCHES,
51
+    NIGHTVISION_QUALITY,
52
+    SUBPIXEL_METHOD,
53
+} from '../settings';
54
+
55
+
56
+
57
+/**
58
+ * Pre-Tracking A is a new training phase. The reference image that was found
59
+ * in the scanning state is transported to AR screen space, and a new training
60
+ * takes place there, with new keypoints and in a suitable warp.
61
+ */
62
+export class ImageTrackerPreTrackingAState extends ImageTrackerState
63
+{
64
+    /** reference image */
65
+    private _referenceImage: Nullable<ReferenceImage>;
66
+
67
+    /** a snapshot of the video from the scanning state and corresponding to the initial homography */
68
+    private _snapshot: Nullable<SpeedyPipelineNodeImagePortalSink>;
69
+
70
+    /** initial homography, from reference image to scanned image, NDC */
71
+    private _homography: SpeedyMatrix;
72
+
73
+
74
+
75
+    /**
76
+     * Constructor
77
+     * @param imageTracker
78
+     */
79
+    constructor(imageTracker: ImageTracker)
80
+    {
81
+        super('pre-tracking-a', imageTracker);
82
+
83
+        this._homography = Speedy.Matrix.Eye(3);
84
+        this._referenceImage = null;
85
+        this._snapshot = null;
86
+    }
87
+
88
+    /**
89
+     * Called as soon as this becomes the active state, just before update() runs for the first time
90
+     * @param settings
91
+     */
92
+    onEnterState(settings: Record<string,any>)
93
+    {
94
+        const homography = settings.homography as SpeedyMatrix;
95
+        const referenceImage = settings.referenceImage as ReferenceImage;
96
+        const snapshot = settings.snapshot as SpeedyPipelineNodeImagePortalSink;
97
+
98
+        // set attributes
99
+        this._homography = homography;
100
+        this._referenceImage = referenceImage;
101
+        this._snapshot = snapshot;
102
+    }
103
+
104
+    /**
105
+     * Called just before the GPU processing
106
+     * @returns promise
107
+     */
108
+    protected _beforeUpdate(): SpeedyPromise<void>
109
+    {
110
+        const screenSize = this.screenSize;
111
+        const source = this._pipeline.node('source') as SpeedyPipelineNodeImageSource;
112
+        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
113
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
114
+        const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
115
+
116
+        // set the reference image as the source image
117
+        source.media = this._imageTracker.database._findMedia(this._referenceImage!.name);
118
+
119
+        // clip keypoints from the borders of the target image
120
+        borderClipper.imageSize = screenSize;
121
+        borderClipper.borderSize = Speedy.Vector2(
122
+            screenSize.width * TRACK_CLIPPING_BORDER,
123
+            screenSize.height * TRACK_CLIPPING_BORDER
124
+        );
125
+
126
+        // convert keypoints to NIS
127
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
128
+
129
+        // rectify the image
130
+        const scale = TRACK_RECTIFIED_SCALE;
131
+        const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
132
+        const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
133
+        const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
134
+        const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
135
+
136
+        return imageRectifier.transform.setTo(
137
+            toScreen.times(shrink).times(toNDC)
138
+        ).then(() => void 0);
139
+    }
140
+
141
+    /**
142
+     * Post processing that takes place just after the GPU processing
143
+     * @param result pipeline results
144
+     * @returns state output
145
+     */
146
+    protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
147
+    {
148
+        const referenceImage = this._referenceImage!;
149
+        const keypointPortalSink = this._pipeline.node('keypointPortalSink') as SpeedyPipelineNodeKeypointPortalSink;
150
+        const keypoints = result.keypoints as SpeedyKeypoint[];
151
+        const image = result.image as SpeedyMedia | undefined;
152
+
153
+        // tracker output
154
+        const trackerOutput: ImageTrackerOutput = {
155
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
156
+            image: image,
157
+        };
158
+
159
+        // not enough keypoints? something went wrong!
160
+        if(keypoints.length < PRE_TRACK_MIN_MATCHES) {
161
+            Utils.warning(`Can't pre-track "${referenceImage.name}" in ${this.name}!`);
162
+            return Speedy.Promise.resolve({
163
+                nextState: 'scanning',
164
+                trackerOutput: trackerOutput,
165
+            });
166
+        }
167
+
168
+        // done!
169
+        return Speedy.Promise.resolve({
170
+            nextState: 'pre-tracking-b',
171
+            trackerOutput: trackerOutput,
172
+            nextStateSettings: {
173
+                referenceKeypointPortalSink: keypointPortalSink,
174
+                referenceImage: this._referenceImage,
175
+                snapshot: this._snapshot,
176
+                homography: this._homography,
177
+            }
178
+        });
179
+    }
180
+
181
+    /**
182
+     * Create & setup the pipeline
183
+     * @returns pipeline
184
+     */
185
+    protected _createPipeline(): SpeedyPipeline
186
+    {
187
+        const pipeline = Speedy.Pipeline();
188
+
189
+        const source = Speedy.Image.Source('source');
190
+        const screen = Speedy.Transform.Resize('screen');
191
+        const greyscale = Speedy.Filter.Greyscale();
192
+        const imageRectifier = Speedy.Transform.PerspectiveWarp('imageRectifier');
193
+        const nightvision = Speedy.Filter.Nightvision();
194
+        const nightvisionMux = Speedy.Image.Multiplexer();
195
+        const detector = Speedy.Keypoint.Detector.Harris();
196
+        const descriptor = Speedy.Keypoint.Descriptor.ORB();
197
+        const blur = Speedy.Filter.GaussianBlur();
198
+        const clipper = Speedy.Keypoint.Clipper();
199
+        const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
200
+        const denoiser = Speedy.Filter.GaussianBlur();
201
+        const subpixel = Speedy.Keypoint.SubpixelRefiner();
202
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
203
+        const keypointPortalSink = Speedy.Keypoint.Portal.Sink('keypointPortalSink');
204
+        const keypointSink = Speedy.Keypoint.Sink('keypoints');
205
+        //const imageSink = Speedy.Image.Sink('image');
206
+
207
+        source.media = null;
208
+        imageRectifier.transform = Speedy.Matrix.Eye(3);
209
+        screen.size = Speedy.Size(0,0);
210
+        nightvision.gain = NIGHTVISION_GAIN;
211
+        nightvision.offset = NIGHTVISION_OFFSET;
212
+        nightvision.decay = NIGHTVISION_DECAY;
213
+        nightvision.quality = NIGHTVISION_QUALITY;
214
+        nightvisionMux.port = TRACK_WITH_NIGHTVISION ? 1 : 0; // 1 = enable nightvision
215
+        blur.kernelSize = Speedy.Size(ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_KSIZE);
216
+        blur.sigma = Speedy.Vector2(ORB_GAUSSIAN_SIGMA, ORB_GAUSSIAN_SIGMA);
217
+        denoiser.kernelSize = Speedy.Size(SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_KSIZE);
218
+        denoiser.sigma = Speedy.Vector2(SUBPIXEL_GAUSSIAN_SIGMA, SUBPIXEL_GAUSSIAN_SIGMA);
219
+        detector.quality = TRACK_HARRIS_QUALITY;
220
+        detector.capacity = TRACK_DETECTOR_CAPACITY;
221
+        subpixel.method = SUBPIXEL_METHOD;
222
+        clipper.size = TRACK_MAX_KEYPOINTS;
223
+        borderClipper.imageSize = screen.size;
224
+        borderClipper.borderSize = Speedy.Vector2(0,0);
225
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
226
+        keypointSink.turbo = false;
227
+
228
+        // prepare input
229
+        source.output().connectTo(screen.input());
230
+        screen.output().connectTo(greyscale.input());
231
+
232
+        // preprocess images
233
+        greyscale.output().connectTo(imageRectifier.input());
234
+        imageRectifier.output().connectTo(nightvisionMux.input('in0'));
235
+        imageRectifier.output().connectTo(nightvision.input());
236
+        nightvision.output().connectTo(nightvisionMux.input('in1'));
237
+
238
+        // keypoint detection & clipping
239
+        nightvisionMux.output().connectTo(detector.input());
240
+        detector.output().connectTo(borderClipper.input());
241
+        borderClipper.output().connectTo(clipper.input());
242
+
243
+        // keypoint refinement
244
+        imageRectifier.output().connectTo(denoiser.input());
245
+        denoiser.output().connectTo(subpixel.input('image'));
246
+        clipper.output().connectTo(subpixel.input('keypoints'));
247
+
248
+        // keypoint description
249
+        nightvisionMux.output().connectTo(blur.input());
250
+        blur.output().connectTo(descriptor.input('image'));
251
+        subpixel.output().connectTo(descriptor.input('keypoints'));
252
+
253
+        // prepare output
254
+        descriptor.output().connectTo(keypointScaler.input());
255
+        keypointScaler.output().connectTo(keypointSink.input());
256
+        keypointScaler.output().connectTo(keypointPortalSink.input());
257
+        //imageRectifier.output().connectTo(imageSink.input());
258
+
259
+        // done!
260
+        pipeline.init(
261
+            source, screen,
262
+            greyscale, imageRectifier,
263
+            nightvision, nightvisionMux,
264
+            detector, borderClipper, clipper,
265
+            denoiser, subpixel,
266
+            blur, descriptor,
267
+            keypointScaler, keypointSink, keypointPortalSink,
268
+            //imageSink
269
+        );
270
+
271
+        return pipeline;
272
+    }
273
+}

+ 391
- 0
src/trackers/image-tracker/states/pre-tracking-b.ts View File

@@ -0,0 +1,391 @@
1
+/*
2
+ * encantar.js
3
+ * GPU-accelerated Augmented Reality for the web
4
+ * Copyright (C) 2022-2024 Alexandre Martins <alemartf(at)gmail.com>
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as published
8
+ * by the Free Software Foundation, either version 3 of the License, or
9
+ * (at your option) any later version.
10
+ *
11
+ * This program is distributed in the hope that it will be useful,
12
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
+ * GNU Lesser General Public License for more details.
15
+ *
16
+ * You should have received a copy of the GNU Lesser General Public License
17
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
18
+ *
19
+ * pre-tracking-b.ts
20
+ * Image tracker: Pre-Tracking B state
21
+ */
22
+
23
+import Speedy from 'speedy-vision';
24
+import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
25
+import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
26
+import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
27
+import { SpeedyMatrixExpr } from 'speedy-vision/types/core/speedy-matrix-expr';
28
+import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
29
+import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
30
+import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
31
+import { SpeedyPipelineNodeImageMultiplexer } from 'speedy-vision/types/core/pipeline/nodes/images/multiplexer';
32
+import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink } from 'speedy-vision/types/core/pipeline/nodes/images/portal';
33
+import { SpeedyPipelineNodeKeypointPortalSource, SpeedyPipelineNodeKeypointPortalSink } from 'speedy-vision/types/core/pipeline/nodes/keypoints/portal';
34
+import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/nodes/transforms/resize';
35
+import { SpeedyPipelineNodePerspectiveWarp } from 'speedy-vision/types/core/pipeline/nodes/transforms/perspective-warp';
36
+import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/core/pipeline/nodes/keypoints/border-clipper';
37
+import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
38
+import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
39
+import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
40
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
41
+import { ImageTrackerState, ImageTrackerStateOutput } from './state';
42
+import { ReferenceImage } from '../reference-image';
43
+import { Nullable, Utils } from '../../../utils/utils';
44
+import { TrackingError } from '../../../utils/errors';
45
+import {
46
+    TRACK_RECTIFIED_SCALE, TRACK_CLIPPING_BORDER,
47
+    NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
48
+    ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
49
+    TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
50
+    SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
51
+    PRE_TRACK_MIN_MATCHES, TRACK_MATCH_RATIO, TRACK_RANSAC_REPROJECTIONERROR_NDC,
52
+    NIGHTVISION_QUALITY,
53
+    SUBPIXEL_METHOD,
54
+} from '../settings';
55
+
56
+
57
+
58
+
59
+/**
60
+ * In Pre-Tracking B, we refine the homography obtained at the scanning state.
61
+ * We find a transformation that warps the snapshot obtained from the scanning
62
+ * state to an image that closely resembles the output of Pre-Tracking A.
63
+ */
64
+export class ImageTrackerPreTrackingBState extends ImageTrackerState
65
+{
66
+    /** reference image */
67
+    private _referenceImage: Nullable<ReferenceImage>;
68
+
69
+    /** a snapshot of the video from the scanning state and corresponding to the initial homography */
70
+    private _snapshot: Nullable<SpeedyPipelineNodeImagePortalSink>;
71
+
72
+    /** initial homography, from reference image to scanned image, NDC */
73
+    private _homography: SpeedyMatrix;
74
+
75
+    /** portal with keypoints from Pre-Tracking A */
76
+    private _referenceKeypointPortalSink: Nullable<SpeedyPipelineNodeKeypointPortalSink>;
77
+
78
+
79
+
80
+
81
+
82
+
83
+    /**
84
+     * Constructor
85
+     * @param imageTracker
86
+     */
87
+    constructor(imageTracker: ImageTracker)
88
+    {
89
+        super('pre-tracking-b', imageTracker);
90
+
91
+        this._homography = Speedy.Matrix.Eye(3);
92
+        this._referenceImage = null;
93
+        this._snapshot = null;
94
+        this._referenceKeypointPortalSink = null;
95
+    }
96
+
97
+    /**
98
+     * Called as soon as this becomes the active state, just before update() runs for the first time
99
+     * @param settings
100
+     */
101
+    onEnterState(settings: Record<string,any>)
102
+    {
103
+        const homography = settings.homography as SpeedyMatrix;
104
+        const referenceImage = settings.referenceImage as ReferenceImage;
105
+        const snapshot = settings.snapshot as SpeedyPipelineNodeImagePortalSink;
106
+        const referenceKeypointPortalSink = settings.referenceKeypointPortalSink as SpeedyPipelineNodeKeypointPortalSink;
107
+
108
+        // set attributes
109
+        this._homography = homography;
110
+        this._referenceImage = referenceImage;
111
+        this._snapshot = snapshot;
112
+        this._referenceKeypointPortalSink = referenceKeypointPortalSink;
113
+    }
114
+
115
+    /**
116
+     * Called just before the GPU processing
117
+     * @returns promise
118
+     */
119
+    protected _beforeUpdate(): SpeedyPromise<void>
120
+    {
121
+        const screenSize = this.screenSize;
122
+        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
123
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
124
+        const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
125
+        const imagePortalSource = this._pipeline.node('imagePortalSource') as SpeedyPipelineNodeImagePortalSource;
126
+        const referenceKeypointPortalSource = this._pipeline.node('referenceKeypointPortalSource') as SpeedyPipelineNodeKeypointPortalSource;
127
+
128
+        // get the snapshot from the scanning state
129
+        imagePortalSource.source = this._snapshot!;
130
+
131
+        // get the reference keypoints from Pre-Tracking A
132
+        referenceKeypointPortalSource.source = this._referenceKeypointPortalSink!;
133
+
134
+        // clip keypoints from the borders of the target image
135
+        borderClipper.imageSize = screenSize;
136
+        borderClipper.borderSize = Speedy.Vector2(
137
+            screenSize.width * TRACK_CLIPPING_BORDER,
138
+            screenSize.height * TRACK_CLIPPING_BORDER
139
+        );
140
+
141
+        // convert keypoints to NIS
142
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
143
+
144
+        // rectify the image
145
+        const scale = TRACK_RECTIFIED_SCALE;
146
+        const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
147
+        const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
148
+        const undistort = this._homography.inverse();
149
+        const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
150
+        const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
151
+
152
+        return imageRectifier.transform.setTo(
153
+            toScreen.times(shrink.times(undistort)).times(toNDC)
154
+        ).then(() => void 0);
155
+    }
156
+
157
+    /**
158
+     * Post processing that takes place just after the GPU processing
159
+     * @param result pipeline results
160
+     * @returns state output
161
+     */
162
+    protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
163
+    {
164
+        const referenceImage = this._referenceImage!;
165
+        const referenceKeypoints = result.referenceKeypoints as SpeedyKeypoint[]; // from Pre-Tracking A
166
+        const keypoints = result.keypoints as SpeedyMatchedKeypoint[]; // from Pre-Tracking B
167
+        const image = result.image as SpeedyMedia | undefined;
168
+        const keypointPortalSink = this._pipeline.node('keypointPortalSink') as SpeedyPipelineNodeKeypointPortalSink;
169
+
170
+        // tracker output
171
+        const trackerOutput: ImageTrackerOutput = {
172
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
173
+            image: image,
174
+        };
175
+
176
+        return Speedy.Promise.resolve()
177
+        .then(() => {
178
+
179
+            // find matching pairs of keypoints
180
+            const pairs = this._findMatchingPairs(referenceKeypoints, keypoints);
181
+            //const pairs = ImageTrackerUtils.refineMatchingPairs(allPairs);
182
+            if(pairs.length < PRE_TRACK_MIN_MATCHES)
183
+                throw new TrackingError('Not enough data points');
184
+
185
+            // find a warp
186
+            const points = ImageTrackerUtils.compilePairsOfKeypointsNDC(pairs);
187
+            return this._findAffineMotionNDC(points);
188
+
189
+        })
190
+        .then(warp => {
191
+
192
+            // refine the homography
193
+            return this._homography.setTo(warp.times(this._homography));
194
+
195
+        })
196
+        .then(_ => ({
197
+            nextState: 'tracking',
198
+            //nextState: 'pre-tracking-b',
199
+            trackerOutput: trackerOutput,
200
+            nextStateSettings: {
201
+                // we export keypoints obtained in Pre-Tracking B, not in A.
202
+                // lighting conditions match, but what if the snapshot is too blurry?
203
+                templateKeypoints: keypoints,
204
+                templateKeypointPortalSink: keypointPortalSink,
205
+                referenceImage: this._referenceImage,
206
+                homography: this._homography,
207
+                initialScreenSize: this.screenSize,
208
+            }
209
+        }))
210
+        .catch(err => {
211
+            Utils.warning(`Can't pre-track "${referenceImage.name}" in ${this.name}! ${err.toString()}`);
212
+            return {
213
+                nextState: 'scanning',
214
+                trackerOutput: trackerOutput,
215
+            };
216
+        });
217
+    }
218
+
219
+    /**
220
+     * Find an affine motion model in NDC between pairs of keypoints in NDC
221
+     * given as a 2 x 2n [ src | dest ] matrix
222
+     * @param points compiled pairs of keypoints in NDC
223
+     * @returns a promise that resolves to a 3x3 warp in NDC that maps source to destination
224
+     */
225
+    private _findAffineMotionNDC(points: SpeedyMatrix): SpeedyPromise<SpeedyMatrixExpr>
226
+    {
227
+        return ImageTrackerUtils.findAffineWarpNDC(points, {
228
+            method: 'pransac',
229
+            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR_NDC,
230
+            numberOfHypotheses: 512*4,
231
+            bundleSize: 128,
232
+            mask: undefined // score is not needed
233
+        }).then(([ warp, score ]) => {
234
+
235
+            const scale = TRACK_RECTIFIED_SCALE;
236
+            const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
237
+            const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
238
+            const grow = ImageTrackerUtils.inverseBestFitScaleNDC(aspectRatio, scale);
239
+            const scaledWarp = grow.times(warp).times(shrink);
240
+
241
+            const distort = this._homography;
242
+            const undistort = distort.inverse();
243
+            const correctedWarp = distort.times(scaledWarp).times(undistort);
244
+
245
+            //console.log(Speedy.Matrix(warp).toString());
246
+            //console.log(Speedy.Matrix(scaledWarp).toString());
247
+            //console.log(Speedy.Matrix(correctedWarp).toString());
248
+
249
+            return correctedWarp;
250
+
251
+        });
252
+    }
253
+
254
+    /**
255
+     * Find matching pairs of two sets of keypoints matched via brute force
256
+     * @param srcKeypoints source (database)
257
+     * @param destKeypoints destination
258
+     * @returns an array of matching pairs [src, dest]
259
+     */
260
+    private _findMatchingPairs(srcKeypoints: SpeedyKeypoint[], destKeypoints: SpeedyMatchedKeypoint[]): ImageTrackerKeypointPair[]
261
+    {
262
+        const pairs: ImageTrackerKeypointPair[] = [];
263
+
264
+        for(let i = 0; i < destKeypoints.length; i++) {
265
+            const destKeypoint = destKeypoints[i];
266
+
267
+            if(destKeypoint.matches[0].index >= 0 && destKeypoint.matches[1].index >= 0) {
268
+                const d1 = destKeypoint.matches[0].distance;
269
+                const d2 = destKeypoint.matches[1].distance;
270
+
271
+                // the best match should be "much better" than the second best match,
272
+                // which means that they are "distinct enough"
273
+                if(d1 <= TRACK_MATCH_RATIO * d2) {
274
+                    const srcKeypoint = srcKeypoints[destKeypoint.matches[0].index];
275
+                    pairs.push([srcKeypoint, destKeypoint]);
276
+                }
277
+            }
278
+        }
279
+
280
+        return pairs;
281
+    }
282
+
283
+    /**
284
+     * Create & setup the pipeline
285
+     * @returns pipeline
286
+     */
287
+    protected _createPipeline(): SpeedyPipeline
288
+    {
289
+        const pipeline = Speedy.Pipeline();
290
+
291
+        const source = Speedy.Image.Source('source');
292
+        const imagePortalSource = Speedy.Image.Portal.Source('imagePortalSource');
293
+        const referenceKeypointPortalSource = Speedy.Keypoint.Portal.Source('referenceKeypointPortalSource');
294
+        const screen = Speedy.Transform.Resize('screen');
295
+        const greyscale = Speedy.Filter.Greyscale();
296
+        const imageRectifier = Speedy.Transform.PerspectiveWarp('imageRectifier');
297
+        const nightvision = Speedy.Filter.Nightvision();
298
+        const nightvisionMux = Speedy.Image.Multiplexer();
299
+        const detector = Speedy.Keypoint.Detector.Harris();
300
+        const descriptor = Speedy.Keypoint.Descriptor.ORB();
301
+        const blur = Speedy.Filter.GaussianBlur();
302
+        const clipper = Speedy.Keypoint.Clipper();
303
+        const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
304
+        const denoiser = Speedy.Filter.GaussianBlur();
305
+        const subpixel = Speedy.Keypoint.SubpixelRefiner();
306
+        const matcher = Speedy.Keypoint.Matcher.BFKNN();
307
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
308
+        const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
309
+        const keypointPortalSink = Speedy.Keypoint.Portal.Sink('keypointPortalSink');
310
+        const referenceKeypointSink = Speedy.Keypoint.Sink('referenceKeypoints');
311
+        //const imageSink = Speedy.Image.Sink('image');
312
+
313
+        source.media = null;
314
+        imagePortalSource.source = null;
315
+        referenceKeypointPortalSource.source = null;
316
+        imageRectifier.transform = Speedy.Matrix.Eye(3);
317
+        screen.size = Speedy.Size(0,0);
318
+        nightvision.gain = NIGHTVISION_GAIN;
319
+        nightvision.offset = NIGHTVISION_OFFSET;
320
+        nightvision.decay = NIGHTVISION_DECAY;
321
+        nightvision.quality = NIGHTVISION_QUALITY;
322
+        nightvisionMux.port = TRACK_WITH_NIGHTVISION ? 1 : 0; // 1 = enable nightvision
323
+        blur.kernelSize = Speedy.Size(ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_KSIZE);
324
+        blur.sigma = Speedy.Vector2(ORB_GAUSSIAN_SIGMA, ORB_GAUSSIAN_SIGMA);
325
+        denoiser.kernelSize = Speedy.Size(SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_KSIZE);
326
+        denoiser.sigma = Speedy.Vector2(SUBPIXEL_GAUSSIAN_SIGMA, SUBPIXEL_GAUSSIAN_SIGMA);
327
+        detector.quality = TRACK_HARRIS_QUALITY;
328
+        detector.capacity = TRACK_DETECTOR_CAPACITY;
329
+        subpixel.method = SUBPIXEL_METHOD;
330
+        clipper.size = TRACK_MAX_KEYPOINTS;
331
+        borderClipper.imageSize = screen.size;
332
+        borderClipper.borderSize = Speedy.Vector2(0,0);
333
+        matcher.k = 2;
334
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
335
+        keypointSink.turbo = false;
336
+
337
+        // prepare input
338
+        //source.output(); // ignore, but keep it in the pipeline
339
+        imagePortalSource.output().connectTo(screen.input());
340
+        screen.output().connectTo(greyscale.input());
341
+
342
+        // preprocess images
343
+        greyscale.output().connectTo(imageRectifier.input());
344
+        imageRectifier.output().connectTo(nightvisionMux.input('in0'));
345
+        imageRectifier.output().connectTo(nightvision.input());
346
+        nightvision.output().connectTo(nightvisionMux.input('in1'));
347
+
348
+        // keypoint detection & clipping
349
+        nightvisionMux.output().connectTo(detector.input());
350
+        detector.output().connectTo(borderClipper.input());
351
+        borderClipper.output().connectTo(clipper.input());
352
+
353
+        // keypoint refinement
354
+        imageRectifier.output().connectTo(denoiser.input());
355
+        denoiser.output().connectTo(subpixel.input('image'));
356
+        clipper.output().connectTo(subpixel.input('keypoints'));
357
+
358
+        // keypoint description
359
+        nightvisionMux.output().connectTo(blur.input());
360
+        blur.output().connectTo(descriptor.input('image'));
361
+        subpixel.output().connectTo(descriptor.input('keypoints'));
362
+
363
+        // keypoint matching
364
+        descriptor.output().connectTo(matcher.input('keypoints'));
365
+        referenceKeypointPortalSource.output().connectTo(matcher.input('database'));
366
+
367
+        // prepare output
368
+        descriptor.output().connectTo(keypointScaler.input());
369
+        keypointScaler.output().connectTo(keypointPortalSink.input());
370
+        keypointScaler.output().connectTo(keypointSink.input());
371
+        matcher.output().connectTo(keypointSink.input('matches'));
372
+        referenceKeypointPortalSource.output().connectTo(referenceKeypointSink.input());
373
+        //imageRectifier.output().connectTo(imageSink.input());
374
+
375
+        // done!
376
+        pipeline.init(
377
+            source, screen, imagePortalSource,
378
+            referenceKeypointPortalSource,
379
+            greyscale, imageRectifier,
380
+            nightvision, nightvisionMux,
381
+            detector, borderClipper, clipper,
382
+            denoiser, subpixel,
383
+            blur, descriptor,
384
+            matcher,
385
+            keypointScaler, keypointSink, keypointPortalSink, referenceKeypointSink,
386
+            //imageSink
387
+        );
388
+
389
+        return pipeline;
390
+    }
391
+}

+ 137
- 145
src/trackers/image-tracker/states/scanning.ts View File

@@ -37,25 +37,21 @@ import { SpeedyPipelineNodeImagePortalSource, SpeedyPipelineNodeImagePortalSink
37 37
 import { SpeedyPipelineNodeStaticLSHTables } from 'speedy-vision/types/core/pipeline/nodes/keypoints/matchers/lsh-static-tables';
38 38
 import { SpeedyKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
39 39
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
40
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
40 41
 import { ImageTrackerState, ImageTrackerStateOutput } from './state';
41
-import { ImageTrackerPreTrackingState } from './pre-tracking';
42 42
 import { Nullable, Utils } from '../../../utils/utils';
43
-import { IllegalOperationError, IllegalArgumentError, DetectionError } from '../../../utils/errors';
43
+import { DetectionError } from '../../../utils/errors';
44 44
 import { 
45 45
     SCAN_MATCH_RATIO, SCAN_MIN_MATCHES, SCAN_CONSECUTIVE_FRAMES,
46 46
     ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
47 47
     NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY,
48 48
     SCAN_WITH_NIGHTVISION, SCAN_PYRAMID_LEVELS, SCAN_PYRAMID_SCALEFACTOR,
49 49
     SCAN_FAST_THRESHOLD, SCAN_MAX_KEYPOINTS, SCAN_LSH_TABLES, SCAN_LSH_HASHSIZE,
50
-    SCAN_RANSAC_REPROJECTIONERROR,
51
-    TRAIN_TARGET_NORMALIZED_SIZE,
50
+    SCAN_RANSAC_REPROJECTIONERROR_NDC,
52 51
     NIGHTVISION_QUALITY,
53 52
 } from '../settings';
54 53
 
55 54
 
56
-/** Default target space size (used when training) */
57
-const DEFAULT_TARGET_SPACE_SIZE = Speedy.Size(TRAIN_TARGET_NORMALIZED_SIZE, TRAIN_TARGET_NORMALIZED_SIZE);
58
-
59 55
 /** Port of the portal multiplexer: get new data from the camera */
60 56
 const PORT_CAMERA = 0;
61 57
 
@@ -65,7 +61,7 @@ const PORT_MEMORY = 1;
65 61
 
66 62
 
67 63
 /**
68
- * Scanning state of the Image Tracker
64
+ * In the scanning state we look for a reference image in the video
69 65
  */
70 66
 export class ImageTrackerScanningState extends ImageTrackerState
71 67
 {
@@ -101,7 +97,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
101 97
     {
102 98
         const imagePortalMux = this._pipeline.node('imagePortalMux') as SpeedyPipelineNodeImageMultiplexer;
103 99
         const lshTables = this._pipeline.node('lshTables') as SpeedyPipelineNodeStaticLSHTables;
104
-        const keypoints = settings.keypoints as SpeedyKeypoint[] | undefined;
100
+        const database = settings.database as SpeedyKeypoint[] | undefined;
105 101
 
106 102
         // set attributes
107 103
         this._counter = 0;
@@ -111,8 +107,24 @@ export class ImageTrackerScanningState extends ImageTrackerState
111 107
         imagePortalMux.port = PORT_CAMERA;
112 108
 
113 109
         // prepare the keypoint matcher
114
-        if(keypoints !== undefined)
115
-            lshTables.keypoints = keypoints;
110
+        if(database !== undefined)
111
+            lshTables.keypoints = database;
112
+    }
113
+
114
+    /**
115
+     * Called just before the GPU processing
116
+     * @returns promise
117
+     */
118
+    protected _beforeUpdate(): SpeedyPromise<void>
119
+    {
120
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
121
+        const screenSize = this.screenSize;
122
+
123
+        // convert keypoints to NIS
124
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
125
+
126
+        // done!
127
+        return Speedy.Promise.resolve();
116 128
     }
117 129
 
118 130
     /**
@@ -124,116 +136,123 @@ export class ImageTrackerScanningState extends ImageTrackerState
124 136
     {
125 137
         const imagePortalMux = this._pipeline.node('imagePortalMux') as SpeedyPipelineNodeImageMultiplexer;
126 138
         const keypoints = result.keypoints as SpeedyMatchedKeypoint[];
127
-        const matchedKeypoints = this._goodMatches(keypoints);
139
+        const image = result.image as SpeedyMedia | undefined;
128 140
 
129 141
         // tracker output
130 142
         const trackerOutput: ImageTrackerOutput = {
131
-            keypoints: keypoints,
132
-            screenSize: this.screenSize
143
+            keypointsNIS: keypoints,
144
+            polylineNDC: [],
145
+            image: image,
133 146
         };
134 147
 
135 148
         // keep the last memorized image
136 149
         imagePortalMux.port = PORT_MEMORY;
137 150
 
138
-        // have we found enough matches...?
139
-        if(matchedKeypoints.length >= SCAN_MIN_MATCHES) {
140
-            return this._findHomography(matchedKeypoints).then(([homography, score]) => {
151
+        // find high quality matches
152
+        const matchedKeypoints = this._selectGoodMatches(keypoints);
153
+        if(matchedKeypoints.length < SCAN_MIN_MATCHES) {
154
+
155
+            // not enough high quality matches?
156
+            // we'll continue to scan the scene
157
+            this._counter = 0;
158
+            this._bestScore = 0;
141 159
 
142
-                // have we found the best homography so far?
143
-                if(score >= this._bestScore) {
144
-                    // store it only if we'll be running the pipeline again
145
-                    if(this._counter < SCAN_CONSECUTIVE_FRAMES - 1) {
146
-                        this._bestScore = score;
147
-                        this._bestHomography = homography;
160
+            return Speedy.Promise.resolve({
161
+                nextState: 'scanning',
162
+                trackerOutput: trackerOutput,
163
+            });
148 164
 
149
-                        // memorize the last image, corresponding to the best homography(*)
150
-                        imagePortalMux.port = PORT_CAMERA;
165
+        }
151 166
 
152
-                        /*
167
+        // we have enough high quality matches!
168
+        const pairs = this._findMatchingPairs(matchedKeypoints);
169
+        const points = ImageTrackerUtils.compilePairsOfKeypointsNDC(pairs);
153 170
 
154
-                        (*) technically speaking, this is not exactly the case. Since we're
155
-                            using turbo to download the keypoints, there's a slight difference
156
-                            between the data used to compute the homography and the last image.
157
-                            Still, assuming continuity of the video stream, this logic is
158
-                            good enough.
171
+        // find a homography
172
+        return this._findHomographyNDC(points).then(([homography, score]) => {
159 173
 
160
-                        */
161
-                    }
162
-                }
174
+            // have we found the best homography so far?
175
+            if(score >= this._bestScore) {
163 176
 
164
-                // find a polyline surrounding the target
165
-                return this._findPolyline(homography, DEFAULT_TARGET_SPACE_SIZE);
177
+                // store it only if we'll be running the pipeline again
178
+                if(this._counter < SCAN_CONSECUTIVE_FRAMES - 1) {
179
+                    this._bestScore = score;
180
+                    this._bestHomography = homography;
166 181
 
167
-            }).then(polyline => {
182
+                    // memorize the last image, corresponding to the best homography(*)
183
+                    imagePortalMux.port = PORT_CAMERA;
168 184
 
169
-                // continue a little longer in the scanning state
170
-                if(++this._counter < SCAN_CONSECUTIVE_FRAMES) {
171
-                    return {
172
-                        nextState: this.name,
173
-                        trackerOutput: {
174
-                            polyline: polyline,
175
-                            ...trackerOutput,
176
-                        },
177
-                    };
178
-                }
185
+                    /*
179 186
 
180
-                // this image should correspond to the best homography
181
-                const snapshot = this._pipeline.node('imagePortalSink') as SpeedyPipelineNodeImagePortalSink;
187
+                    (*) technically speaking, this is not exactly the case. Since we're
188
+                        using turbo to download the keypoints, there's a slight difference
189
+                        between the data used to compute the homography and the last image.
190
+                        Still, assuming continuity of the video stream, this logic is
191
+                        good enough.
182 192
 
183
-                // the reference image that we'll track
184
-                const referenceImage = this._imageTracker._referenceImageOfKeypoint(
185
-                    matchedKeypoints[0].matches[0].index
186
-                );
193
+                    */
194
+                }
187 195
 
188
-                // let's track the target!
189
-                return {
190
-                    nextState: 'pre-tracking',
191
-                    nextStateSettings: {
192
-                        homography: this._bestHomography,
193
-                        snapshot: snapshot,
194
-                        referenceImage: referenceImage,
195
-                    },
196
-                    trackerOutput: {
197
-                        polyline: polyline,
198
-                        ...trackerOutput,
199
-                    },
200
-                };
196
+            }
201 197
 
202
-            }).catch(() => {
198
+            // find a polyline surrounding the target
199
+            const polylineNDC = ImageTrackerUtils.findPolylineNDC(homography);
200
+            trackerOutput.polylineNDC!.push(...polylineNDC);
203 201
 
204
-                // continue in the scanning state
202
+            // continue a little longer in the scanning state
203
+            if(++this._counter < SCAN_CONSECUTIVE_FRAMES) {
205 204
                 return {
206
-                    nextState: this.name,
207
-                    trackerOutput: trackerOutput,
205
+                    nextState: 'scanning',
206
+                    trackerOutput: trackerOutput
208 207
                 };
208
+            }
209 209
 
210
-            });
211
-        }
212
-        else {
210
+            // this image should correspond to the best homography
211
+            const snapshot = this._pipeline.node('imagePortalSink') as SpeedyPipelineNodeImagePortalSink;
213 212
 
214
-            // not enough matches...!
215
-            this._counter = 0;
216
-            this._bestScore = 0;
213
+            // the reference image that we'll track
214
+            const referenceImage = this._imageTracker._referenceImageOfKeypoint(
215
+                matchedKeypoints[0].matches[0].index
216
+            );
217 217
 
218
-        }
218
+            // this shouldn't happen
219
+            if(!referenceImage)
220
+                throw new DetectionError(`Can't track an unknown reference image`);
221
+
222
+            // let's track the target!
223
+            return {
224
+                nextState: 'pre-tracking-a',
225
+                nextStateSettings: {
226
+                    homography: this._bestHomography,
227
+                    snapshot: snapshot,
228
+                    referenceImage: referenceImage,
229
+                },
230
+                trackerOutput: trackerOutput
231
+            };
232
+
233
+        })
234
+        .catch(err => {
235
+
236
+            // continue in the scanning state
237
+            Utils.warning(`Error when scanning: ${err.toString()}`)
238
+            return {
239
+                nextState: 'scanning',
240
+                trackerOutput: trackerOutput,
241
+            };
219 242
 
220
-        // we'll continue to scan the scene
221
-        return Speedy.Promise.resolve({
222
-            nextState: this.name,
223
-            trackerOutput: trackerOutput,
224 243
         });
225 244
     }
226 245
 
227 246
     /**
228
-     * Find "high quality" matches of a single reference image
229
-     * @param keypoints
230
-     * @returns high quality matches
247
+     * Select high quality matches of a single reference image
248
+     * @param keypoints matched keypoints of any quality, to any reference image
249
+     * @returns high quality matches of a single reference image
231 250
      */
232
-    private _goodMatches(keypoints: SpeedyMatchedKeypoint[]): SpeedyMatchedKeypoint[]
251
+    private _selectGoodMatches(keypoints: SpeedyMatchedKeypoint[]): SpeedyMatchedKeypoint[]
233 252
     {
234 253
         const matchedKeypointsPerImageIndex: Record<number,SpeedyMatchedKeypoint[]> = Object.create(null);
235 254
 
236
-        // filter "good matches"
255
+        // find high quality matches, regardless of reference image
237 256
         for(let j = keypoints.length - 1; j >= 0; j--) {
238 257
             const keypoint = keypoints[j];
239 258
             if(keypoint.matches[0].index >= 0 && keypoint.matches[1].index >= 0) {
@@ -255,7 +274,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
255 274
             }
256 275
         }
257 276
 
258
-        // find the image with the most matches
277
+        // find the reference image with the most high quality matches
259 278
         let matchedKeypoints: SpeedyMatchedKeypoint[] = [];
260 279
         for(const imageIndex in matchedKeypointsPerImageIndex) {
261 280
             if(matchedKeypointsPerImageIndex[imageIndex].length > matchedKeypoints.length)
@@ -267,71 +286,41 @@ export class ImageTrackerScanningState extends ImageTrackerState
267 286
     }
268 287
 
269 288
     /**
270
-     * Find a homography matrix using matched keypoints
271
-     * @param matchedKeypoints "good" matches only
272
-     * @returns homography from reference image space to AR screen space & homography "quality" score
289
+     * Find a homography matrix using matched keypoints in NDC
290
+     * @param points compiled pairs of keypoints in NDC
291
+     * @returns homography (from reference to matched, NDC) & "quality" score
273 292
      */
274
-    private _findHomography(matchedKeypoints: SpeedyMatchedKeypoint[]): SpeedyPromise<[SpeedyMatrix,number]>
293
+    private _findHomographyNDC(points: SpeedyMatrix): SpeedyPromise<[SpeedyMatrix,number]>
275 294
     {
276
-        const srcCoords: number[] = [];
277
-        const dstCoords: number[] = [];
278
-
279
-        // find matching coordinates of the keypoints
280
-        for(let i = matchedKeypoints.length - 1; i >= 0; i--) {
281
-            const matchedKeypoint = matchedKeypoints[i];
282
-            const referenceKeypoint = this._imageTracker._referenceKeypoint(matchedKeypoint.matches[0].index);
283
-            if(referenceKeypoint != null) {
284
-                srcCoords.push(referenceKeypoint.x);
285
-                srcCoords.push(referenceKeypoint.y);
286
-                dstCoords.push(matchedKeypoint.x);
287
-                dstCoords.push(matchedKeypoint.y);
288
-            }
289
-            else {
290
-                // this shouldn't happen
291
-                return Speedy.Promise.reject(
292
-                    new DetectionError(`Invalid keypoint match index: ${matchedKeypoint.matches[0].index} from ${matchedKeypoint.toString()}`)
293
-                );
294
-            }
295
-        }
296
-
297
-        // too few points?
298
-        const n = srcCoords.length / 2;
299
-        if(n < 4) {
300
-            return Speedy.Promise.reject(
301
-                new DetectionError(`Too few points to compute a homography`)
302
-            );
303
-        }
304
-
305
-        // compute a homography
306
-        const src = Speedy.Matrix(2, n, srcCoords);
307
-        const dst = Speedy.Matrix(2, n, dstCoords);
308
-        const mask = Speedy.Matrix.Zeros(1, n);
309
-
310
-        const homography = Speedy.Matrix.Zeros(3);
311
-        return Speedy.Matrix.findHomography(homography, src, dst, {
295
+        return ImageTrackerUtils.findPerspectiveWarpNDC(points, {
312 296
             method: 'pransac',
313
-            reprojectionError: SCAN_RANSAC_REPROJECTIONERROR,
297
+            reprojectionError: SCAN_RANSAC_REPROJECTIONERROR_NDC,
314 298
             numberOfHypotheses: 512,
315 299
             bundleSize: 128,
316
-            mask: mask,
317
-        }).then(homography => {
300
+        });
301
+    }
318 302
 
319
-            // check if this is a valid homography
320
-            const a00 = homography.at(0,0);
321
-            if(Number.isNaN(a00))
322
-                throw new DetectionError(`Can't compute homography`);
303
+    /**
304
+     * Find matching pairs of keypoints from reference image (src) to matched image (dest)
305
+     * @param matchedKeypoints
306
+     * @returns an array of matching pairs [src, dest]
307
+     */
308
+    private _findMatchingPairs(matchedKeypoints: SpeedyMatchedKeypoint[]): ImageTrackerKeypointPair[]
309
+    {
310
+        const pairs = new Array<ImageTrackerKeypointPair>(matchedKeypoints.length);
323 311
 
324
-            // count the number of inliers
325
-            const inliers = mask.read();
326
-            let inlierCount = 0;
327
-            for(let i = inliers.length - 1; i >= 0; i--)
328
-                inlierCount += inliers[i];
329
-            const score = inlierCount / inliers.length;
312
+        for(let i = matchedKeypoints.length - 1; i >= 0; i--) {
313
+            const matchedKeypoint = matchedKeypoints[i];
314
+            const referenceKeypoint = this._imageTracker._referenceKeypoint(matchedKeypoint.matches[0].index);
330 315
 
331
-            // done!
332
-            return [ homography, score ];
316
+            // this shouldn't happen
317
+            if(referenceKeypoint == null)
318
+                throw new DetectionError(`Invalid keypoint match index: ${matchedKeypoint.matches[0].index} from ${matchedKeypoint.toString()}`);
333 319
 
334
-        });
320
+            pairs[i] = [ referenceKeypoint, matchedKeypoint ];
321
+        }
322
+
323
+        return pairs;
335 324
     }
336 325
 
337 326
     /**
@@ -354,6 +343,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
354 343
         const clipper = Speedy.Keypoint.Clipper();
355 344
         const lshTables = Speedy.Keypoint.Matcher.StaticLSHTables('lshTables');
356 345
         const knn = Speedy.Keypoint.Matcher.LSHKNN();
346
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
357 347
         const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
358 348
         const imagePortalSink = Speedy.Image.Portal.Sink('imagePortalSink');
359 349
         const imagePortalSource = Speedy.Image.Portal.Source('imagePortalSource');
@@ -386,6 +376,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
386 376
         imagePortalMux.port = PORT_CAMERA; // 0 = camera stream; 1 = lock image
387 377
         imagePortalCopy.size = Speedy.Size(0,0);
388 378
         imagePortalCopy.scale = Speedy.Vector2(1,1);
379
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
389 380
         keypointSink.turbo = true;
390 381
 
391 382
         // prepare input
@@ -412,7 +403,8 @@ export class ImageTrackerScanningState extends ImageTrackerState
412 403
         lshTables.output().connectTo(knn.input('lsh'));
413 404
 
414 405
         // prepare output
415
-        clipper.output().connectTo(keypointSink.input());
406
+        clipper.output().connectTo(keypointScaler.input());
407
+        keypointScaler.output().connectTo(keypointSink.input());
416 408
         knn.output().connectTo(keypointSink.input('matches'));
417 409
         //pyramid.output().connectTo(imageSink.input());
418 410
 
@@ -429,7 +421,7 @@ export class ImageTrackerScanningState extends ImageTrackerState
429 421
             greyscale, blur, nightvision, nightvisionMux, pyramid,
430 422
             detector, descriptor, clipper,
431 423
             lshTables, knn,
432
-            keypointSink,
424
+            keypointScaler, keypointSink,
433 425
             imagePortalSink, imagePortalSource,
434 426
             imagePortalMux, imagePortalBuffer, imagePortalCopy,
435 427
             //, imageSink

+ 14
- 155
src/trackers/image-tracker/states/state.ts View File

@@ -33,10 +33,10 @@ import { SpeedyPipelineNodeResize } from 'speedy-vision/types/core/pipeline/node
33 33
 import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
34 34
 import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
35 35
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
36
+import { ReferenceImage } from '../reference-image';
36 37
 import { TrackerOutput } from '../../tracker';
37 38
 import { Nullable } from '../../../utils/utils';
38
-import { IllegalOperationError } from '../../../utils/errors';
39
-import { TRACK_RECTIFIED_BORDER } from '../settings';
39
+import { IllegalOperationError, IllegalArgumentError } from '../../../utils/errors';
40 40
 
41 41
 /** State output */
42 42
 export interface ImageTrackerStateOutput
@@ -46,7 +46,6 @@ export interface ImageTrackerStateOutput
46 46
     readonly nextStateSettings?: Record<string,any>;
47 47
 }
48 48
 
49
-
50 49
 /**
51 50
  * Abstract state of the Image Tracker
52 51
  */
@@ -61,6 +60,9 @@ export abstract class ImageTrackerState
61 60
     /** pipeline */
62 61
     protected _pipeline: SpeedyPipeline;
63 62
 
63
+    /** a flag telling whether or not the pipeline has been released */
64
+    protected _pipelineReleased: boolean;
65
+
64 66
 
65 67
     /**
66 68
      * Constructor
@@ -72,6 +74,7 @@ export abstract class ImageTrackerState
72 74
         this._name = name;
73 75
         this._imageTracker = imageTracker;
74 76
         this._pipeline = this._createPipeline();
77
+        this._pipelineReleased = false;
75 78
     }
76 79
 
77 80
     /**
@@ -84,6 +87,7 @@ export abstract class ImageTrackerState
84 87
 
85 88
     /**
86 89
      * AR screen size
90
+     * It may change over time, as when flipping a phone
87 91
      */
88 92
     get screenSize(): SpeedySize
89 93
     {
@@ -107,7 +111,12 @@ export abstract class ImageTrackerState
107 111
      */
108 112
     release(): null
109 113
     {
110
-        return this._pipeline.release();
114
+        if(!this._pipelineReleased) {
115
+            this._pipeline.release();
116
+            this._pipelineReleased = true;
117
+        }
118
+
119
+        return null;
111 120
     }
112 121
 
113 122
     /**
@@ -183,154 +192,4 @@ export abstract class ImageTrackerState
183 192
      * @returns pipeline
184 193
      */
185 194
     protected abstract _createPipeline(): SpeedyPipeline;
186
-
187
-
188
-
189
-    //
190
-    // Some utility methods common to various states
191
-    //
192
-
193
-    /**
194
-     * Find the coordinates of a polyline surrounding the target image
195
-     * @param homography maps the target image to the AR screen
196
-     * @param targetSize size of the target space
197
-     * @returns promise that resolves to 4 points in AR screen space
198
-     */
199
-    protected _findPolylineCoordinates(homography: SpeedyMatrix, targetSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
200
-    {
201
-        const w = targetSize.width, h = targetSize.height;
202
-        const referenceImageCoordinates = Speedy.Matrix(2, 4, [
203
-            0, 0,
204
-            w, 0,
205
-            w, h,
206
-            0, h,
207
-        ]);
208
-
209
-        const polylineCoordinates = Speedy.Matrix.Zeros(2, 4);
210
-        return Speedy.Matrix.applyPerspectiveTransform(
211
-            polylineCoordinates,
212
-            referenceImageCoordinates,
213
-            homography
214
-        );
215
-    }
216
-
217
-    /**
218
-     * Find a polyline surrounding the target image
219
-     * @param homography maps the target image to the AR screen
220
-     * @param targetSize size of the target space
221
-     * @returns promise that resolves to 4 points in AR screen space
222
-     */
223
-    protected _findPolyline(homography: SpeedyMatrix, targetSize: SpeedySize): SpeedyPromise<SpeedyPoint2[]>
224
-    {
225
-        return this._findPolylineCoordinates(homography, targetSize).then(polylineCoordinates => {
226
-            const polydata = polylineCoordinates.read();
227
-            const polyline = Array.from({ length: 4 }, (_, i) => Speedy.Point2(polydata[2*i], polydata[2*i+1]));
228
-
229
-            return polyline;
230
-        });
231
-    }
232
-
233
-    /**
234
-     * Whether or not to rotate the warped image in order to best fit the AR screen
235
-     * @param media media associated with the reference image
236
-     * @param screenSize AR screen
237
-     * @returns boolean
238
-     */
239
-    protected _mustRotateWarpedImage(media: SpeedyMedia, screenSize: SpeedySize): boolean
240
-    {
241
-        const screenAspectRatio = screenSize.width / screenSize.height;
242
-        const mediaAspectRatio = media.width / media.height;
243
-        const eps = 0.1;
244
-
245
-        return (mediaAspectRatio >= 1+eps && screenAspectRatio < 1-eps) || (mediaAspectRatio < 1-eps && screenAspectRatio >= 1+eps);
246
-    }
247
-
248
-    /**
249
-     * Find a rectification matrix to be applied to an image fitting the entire AR screen
250
-     * @param media media associated with the reference image
251
-     * @param screenSize AR screen
252
-     * @returns promise that resolves to a rectification matrix
253
-     */
254
-    protected _findRectificationMatrixOfFullscreenImage(media: SpeedyMedia, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
255
-    {
256
-        const b = TRACK_RECTIFIED_BORDER;
257
-        const sw = screenSize.width, sh = screenSize.height;
258
-        const mediaAspectRatio = media.width / media.height;
259
-        const mustRotate = this._mustRotateWarpedImage(media, screenSize);
260
-
261
-        // compute the vertices of the target in screen space
262
-        // we suppose portrait or landscape mode for both screen & media
263
-        const c = mustRotate ? 1 / mediaAspectRatio : mediaAspectRatio;
264
-        const top = sw >= sh ? b * sh : (sh - sw * (1-2*b) / c) / 2;
265
-        const left = sw >= sh ? (sw - sh * (1-2*b) * c) / 2 : b * sw;
266
-        const right = sw - left;
267
-        const bottom = sh - top;
268
-
269
-        const targetVertices = Speedy.Matrix(2, 4, [
270
-            left, top,
271
-            right, top,
272
-            right, bottom,
273
-            left, bottom,
274
-        ]);
275
-
276
-        const screenVertices = Speedy.Matrix(2, 4, [
277
-            0, 0,
278
-            sw, 0,
279
-            sw, sh,
280
-            0, sh
281
-        ]);
282
-
283
-        const preRectificationMatrix = Speedy.Matrix.Eye(3);
284
-        const alignmentMatrix = Speedy.Matrix.Zeros(3);
285
-        const rectificationMatrix = Speedy.Matrix.Zeros(3);
286
-
287
-        return (mustRotate ? Speedy.Matrix.perspective(
288
-            // pre-rectifation: rotate by 90 degrees counterclockwise and scale to screenSize
289
-            preRectificationMatrix,
290
-            screenVertices,
291
-            Speedy.Matrix(2, 4, [ 0,sh , 0,0 , sw,0 , sw,sh ])
292
-        ) : Speedy.Promise.resolve(preRectificationMatrix)).then(_ =>
293
-            // alignment: align the target to the center of the screen
294
-            Speedy.Matrix.perspective(
295
-                alignmentMatrix,
296
-                screenVertices,
297
-                targetVertices
298
-            )
299
-        ).then(_ =>
300
-            // pre-rectify and then align
301
-            rectificationMatrix.setTo(alignmentMatrix.times(preRectificationMatrix))
302
-        );
303
-    }
304
-
305
-    /**
306
-     * Find a rectification matrix to be applied to the target image
307
-     * @param homography maps a reference image to the AR screen
308
-     * @param targetSize size of the target space
309
-     * @param media media associated with the reference image
310
-     * @param screenSize AR screen
311
-     * @returns promise that resolves to a rectification matrix
312
-     */
313
-    protected _findRectificationMatrixOfCameraImage(homography: SpeedyMatrix, targetSize: SpeedySize, media: SpeedyMedia, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
314
-    {
315
-        const sw = screenSize.width, sh = screenSize.height;
316
-        const screen = Speedy.Matrix(2, 4, [ 0, 0, sw, 0, sw, sh, 0, sh ]);
317
-
318
-        const rectificationMatrix = Speedy.Matrix.Zeros(3);
319
-        return this._findPolylineCoordinates(homography, targetSize).then(polyline =>
320
-
321
-            // from target space to (full)screen
322
-            Speedy.Matrix.perspective(rectificationMatrix, polyline, screen)
323
-
324
-        ).then(_ =>
325
-
326
-            // from (full)screen to rectified coordinates
327
-            this._findRectificationMatrixOfFullscreenImage(media, screenSize)
328
-
329
-        ).then(mat =>
330
-
331
-            // function composition
332
-            rectificationMatrix.setTo(mat.times(rectificationMatrix))
333
-
334
-        );
335
-    }
336
-}
195
+}

+ 213
- 473
src/trackers/image-tracker/states/tracking.ts View File

@@ -26,6 +26,7 @@ import { SpeedyVector2 } from 'speedy-vision/types/core/speedy-vector';
26 26
 import { SpeedySize } from 'speedy-vision/types/core/speedy-size';
27 27
 import { SpeedyMedia } from 'speedy-vision/types/core/speedy-media';
28 28
 import { SpeedyMatrix } from 'speedy-vision/types/core/speedy-matrix';
29
+import { SpeedyMatrixExpr } from 'speedy-vision/types/core/speedy-matrix-expr';
29 30
 import { SpeedyPromise } from 'speedy-vision/types/core/speedy-promise';
30 31
 import { SpeedyPipeline, SpeedyPipelineOutput } from 'speedy-vision/types/core/pipeline/pipeline';
31 32
 import { SpeedyPipelineNodeImageSource } from 'speedy-vision/types/core/pipeline/nodes/images/source';
@@ -39,40 +40,33 @@ import { SpeedyPipelineNodeKeypointBorderClipper } from 'speedy-vision/types/cor
39 40
 import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/pipeline/nodes/keypoints/transformer';
40 41
 import { SpeedyKeypoint, SpeedyTrackedKeypoint, SpeedyMatchedKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
41 42
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName, ImageTrackerResult, TrackableImage } from '../image-tracker';
42
-import { ImageTrackerState, ImageTrackerStateOutput } from './state';
43
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
43 44
 import { ImageTrackerEvent } from '../image-tracker-event';
45
+import { ImageTrackerState, ImageTrackerStateOutput } from './state';
44 46
 import { Nullable, Utils } from '../../../utils/utils';
45 47
 import { ReferenceImage } from '../reference-image';
46 48
 import { CameraModel } from '../../../geometry/camera-model';
47 49
 import { Viewer } from '../../../geometry/viewer';
48 50
 import { Pose } from '../../../geometry/pose';
49 51
 import { Transform } from '../../../geometry/transform';
50
-import { IllegalOperationError, IllegalArgumentError, TrackingError } from '../../../utils/errors';
52
+import { IllegalOperationError, IllegalArgumentError, TrackingError, NumericalError } from '../../../utils/errors';
51 53
 import {
52
-    TRACK_RECTIFIED_BORDER, TRACK_CLIPPING_BORDER, TRACK_MIN_MATCHES, TRACK_LOST_TOLERANCE,
54
+    TRACK_RECTIFIED_SCALE, TRACK_CLIPPING_BORDER, TRACK_MIN_MATCHES, TRACK_LOST_TOLERANCE,
53 55
     NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY, TRACK_WITH_NIGHTVISION,
54 56
     ORB_GAUSSIAN_KSIZE, ORB_GAUSSIAN_SIGMA,
55 57
     SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
56 58
     TRACK_HARRIS_QUALITY, TRACK_DETECTOR_CAPACITY, TRACK_MAX_KEYPOINTS,
57
-    TRACK_RANSAC_REPROJECTIONERROR, TRACK_GRID_GRANULARITY, TRACK_MATCH_RATIO,
58
-    NIGHTVISION_QUALITY,
59
-    SUBPIXEL_METHOD,
59
+    TRACK_RANSAC_REPROJECTIONERROR_NDC, TRACK_MATCH_RATIO,
60
+    NIGHTVISION_QUALITY, SUBPIXEL_METHOD,
60 61
 } from '../settings';
61 62
 import { Settings } from '../../../core/settings';
62 63
 
63
-
64 64
 /** Whether or not we want to accelerate GPU-CPU transfers. Using turbo costs a slight delay on the tracking */
65 65
 const USE_TURBO = true;
66 66
 
67 67
 /** Number of PBOs; meaningful only when using turbo */
68 68
 const NUMBER_OF_PBOS = 2;
69 69
 
70
-/** Frame skipping; meaningful only when using turbo */
71
-const TURBO_SKIP = 2;
72
-
73
-/** A pair (a,b) of arrays of keypoints such that keypoint a[i] is a match to keypoint b[i] for all i */
74
-type QualityMatches = [ SpeedyMatchedKeypoint[], SpeedyKeypoint[] ];
75
-
76 70
 
77 71
 
78 72
 /**
@@ -91,33 +85,29 @@ export class ImageTrackerTrackingState extends ImageTrackerState
91 85
     /** current homography (for computing the pose) */
92 86
     private _poseHomography: SpeedyMatrix;
93 87
 
94
-    /** initial homography (i.e., the homography we found when we first started tracking) */
95
-    private _initialHomography: SpeedyMatrix; // from (full)screen to the actual target
96
-
97 88
     /** initial keypoints (i.e., the keypoints we found when we first started tracking) */
98
-    private _initialKeypoints: SpeedyKeypoint[];
89
+    private _templateKeypoints: SpeedyKeypoint[];
99 90
 
100
-    /** a helper */
101
-    private _counter: number;
102
-
103
-    /** camera model */
104
-    private _camera: CameraModel;
91
+    /** the screen size when the tracking began */
92
+    private _initialScreenSize: SpeedySize;
105 93
 
106
-    /** predicted keypoints */
107
-    private _predictedKeypoints: SpeedyMatchedKeypoint[];
94
+    /** last output of the tracker */
95
+    private _lastOutput: ImageTrackerOutput;
108 96
 
109
-    /** last pipeline output */
97
+    /** last output of the pipeline */
110 98
     private _lastPipelineOutput: SpeedyPipelineOutput;
111 99
 
112
-    /** a helper */
113
-    private _pipelineCounter: number;
100
+    /** a helper for frame skipping */
101
+    private _skipCounter: number;
114 102
 
115
-    /** last output */
116
-    private _lastOutput: ImageTrackerOutput;
103
+    /** a helper */
104
+    private _counter: number;
117 105
 
118 106
     /** the number of consecutive frames in which we have lost the tracking */
119 107
     private _lostCounter: number;
120 108
 
109
+    /** camera model */
110
+    private _camera: CameraModel;
121 111
 
122 112
 
123 113
 
@@ -132,18 +122,14 @@ export class ImageTrackerTrackingState extends ImageTrackerState
132 122
         this._referenceImage = null;
133 123
         this._warpHomography = Speedy.Matrix.Eye(3);
134 124
         this._poseHomography = Speedy.Matrix.Eye(3);
135
-        this._initialHomography = Speedy.Matrix.Eye(3);
136
-        this._initialKeypoints = [];
137
-        this._counter = 0;
138
-        this._camera = new CameraModel();
139
-        this._predictedKeypoints = [];
140
-        this._lastPipelineOutput = { keypoints: [] };
141
-        this._pipelineCounter = 0;
125
+        this._templateKeypoints = [];
126
+        this._initialScreenSize = Speedy.Size(1, 1);
142 127
         this._lastOutput = {};
128
+        this._lastPipelineOutput = { keypoints: [] };
129
+        this._skipCounter = 0;
130
+        this._counter = 0;
143 131
         this._lostCounter = 0;
144
-
145
-        // we need at least 4 correspondences of points to compute a homography matrix
146
-        Utils.assert(TRACK_MIN_MATCHES >= 4);
132
+        this._camera = new CameraModel();
147 133
     }
148 134
 
149 135
     /**
@@ -152,11 +138,11 @@ export class ImageTrackerTrackingState extends ImageTrackerState
152 138
      */
153 139
     onEnterState(settings: Record<string,any>)
154 140
     {
155
-        const homography = settings.homography as SpeedyMatrix;
141
+        const homography = settings.homography as SpeedyMatrix; // NDC, from reference image to video
156 142
         const referenceImage = settings.referenceImage as Nullable<ReferenceImage>;
157 143
         const templateKeypoints = settings.templateKeypoints as SpeedyKeypoint[];
158
-        const keypointPortalSink = settings.keypointPortalSink as SpeedyPipelineNodeKeypointPortalSink;
159
-        const screenSize = settings.screenSize as SpeedySize; // this.screenSize is not yet set
144
+        const templateKeypointPortalSink = settings.templateKeypointPortalSink as SpeedyPipelineNodeKeypointPortalSink;
145
+        const initialScreenSize = settings.initialScreenSize as SpeedySize; // this.screenSize is not yet set
160 146
         const keypointPortalSource = this._pipeline.node('keypointPortalSource') as SpeedyPipelineNodeKeypointPortalSource;
161 147
 
162 148
         // this shouldn't happen
@@ -167,20 +153,19 @@ export class ImageTrackerTrackingState extends ImageTrackerState
167 153
         this._referenceImage = referenceImage;
168 154
         this._warpHomography = Speedy.Matrix(homography);
169 155
         this._poseHomography = Speedy.Matrix(homography);
170
-        this._initialHomography = Speedy.Matrix(homography);
171
-        this._initialKeypoints = templateKeypoints;
172
-        this._counter = 0;
173
-        this._predictedKeypoints = [];
174
-        this._lastPipelineOutput = { keypoints: [] };
175
-        this._pipelineCounter = 0;
156
+        this._templateKeypoints = templateKeypoints;
157
+        this._initialScreenSize = Speedy.Size(initialScreenSize.width, initialScreenSize.height);
176 158
         this._lastOutput = {};
159
+        this._lastPipelineOutput = { keypoints: [] };
160
+        this._skipCounter = 0;
161
+        this._counter = 0;
177 162
         this._lostCounter = 0;
178 163
 
179 164
         // setup portals
180
-        keypointPortalSource.source = keypointPortalSink;
165
+        keypointPortalSource.source = templateKeypointPortalSink;
181 166
 
182 167
         // setup camera
183
-        this._camera.init(screenSize);
168
+        this._camera.init(initialScreenSize);
184 169
 
185 170
         // emit event
186 171
         const ev = new ImageTrackerEvent('targetfound', referenceImage);
@@ -197,6 +182,9 @@ export class ImageTrackerTrackingState extends ImageTrackerState
197 182
     {
198 183
         const referenceImage = this._referenceImage as ReferenceImage;
199 184
 
185
+        // log
186
+        Utils.log(`No longer tracking image "${referenceImage.name}"!`);
187
+
200 188
         // release the camera
201 189
         this._camera.release();
202 190
 
@@ -213,7 +201,7 @@ export class ImageTrackerTrackingState extends ImageTrackerState
213 201
     {
214 202
         const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
215 203
         const borderClipper = this._pipeline.node('borderClipper') as SpeedyPipelineNodeKeypointBorderClipper;
216
-        const keypointRectifier = this._pipeline.node('keypointRectifier') as SpeedyPipelineNodeKeypointTransformer;
204
+        const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
217 205
         const screenSize = this.screenSize;
218 206
 
219 207
         /*
@@ -230,10 +218,20 @@ export class ImageTrackerTrackingState extends ImageTrackerState
230 218
             screenSize.height * TRACK_CLIPPING_BORDER
231 219
         );
232 220
 
221
+        // convert keypoints to NIS
222
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screenSize);
223
+
233 224
         // rectify the image
234
-        return this._findImageWarp(this._warpHomography, screenSize).then(warp => {
235
-            imageRectifier.transform = warp;
236
-        });
225
+        const scale = TRACK_RECTIFIED_SCALE;
226
+        const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
227
+        const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
228
+        const undistort = this._warpHomography.inverse();
229
+        const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
230
+        const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
231
+
232
+        return imageRectifier.transform.setTo(
233
+            toScreen.times(shrink.times(undistort)).times(toNDC)
234
+        ).then(() => void 0);
237 235
     }
238 236
 
239 237
     /**
@@ -242,35 +240,26 @@ export class ImageTrackerTrackingState extends ImageTrackerState
242 240
      */
243 241
     protected _gpuUpdate(): SpeedyPromise<SpeedyPipelineOutput>
244 242
     {
245
-        //return super._gpuUpdate();
246
-
247 243
         // No turbo?
248 244
         if(!USE_TURBO || Settings.powerPreference == 'low-power')
249 245
             return super._gpuUpdate();
250 246
 
251 247
         // When using turbo, we reduce the GPU usage by skipping every other frame
252
-        const counter = this._pipelineCounter;
253
-        this._pipelineCounter = (this._pipelineCounter + 1) % TURBO_SKIP;
254
-
255
-        // Skip frame
256
-        if(counter != 0) {
257
-            if(this._lastPipelineOutput.keypoints !== undefined) {
258
-                this._predictedKeypoints = this._predictKeypoints(
259
-                    this._lastPipelineOutput.keypoints,
260
-                    this._initialKeypoints
261
-                );
262
-            }
263
-            else
264
-                this._predictedKeypoints.length = 0;
248
+        if(0 == (this._skipCounter = 1 - this._skipCounter)) {
249
+            const templateKeypoints = this._templateKeypoints;
250
+            const previousKeypoints = this._lastPipelineOutput.keypoints as SpeedyMatchedKeypoint[];
251
+            //const currentKeypoints = this._predictKeypoints(previousKeypoints, templateKeypoints);
252
+            const currentKeypoints = previousKeypoints; // this actually works
253
+
254
+            this._lastPipelineOutput.keypoints = currentKeypoints;
265 255
 
266
-            this._lastPipelineOutput.keypoints = this._predictedKeypoints;
267 256
             return Speedy.Promise.resolve(this._lastPipelineOutput);
268 257
         }
269 258
 
270 259
         // Run the pipeline and store the results
271
-        return super._gpuUpdate().then(results => {
272
-            this._lastPipelineOutput = results;
273
-            return results;
260
+        return super._gpuUpdate().then(result => {
261
+            this._lastPipelineOutput = result;
262
+            return result;
274 263
         });
275 264
     }
276 265
 
@@ -281,106 +270,65 @@ export class ImageTrackerTrackingState extends ImageTrackerState
281 270
      */
282 271
     protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
283 272
     {
284
-        const imageRectifier = this._pipeline.node('imageRectifier') as SpeedyPipelineNodePerspectiveWarp;
285 273
         const keypoints = result.keypoints as SpeedyMatchedKeypoint[];
286 274
         const image = result.image as SpeedyMedia | undefined;
287
-        const referenceImage = this._referenceImage as ReferenceImage;
275
+        const referenceImage = this._referenceImage!;
276
+        const screenSize = this.screenSize;
277
+
278
+        // track the target
279
+        return Speedy.Promise.resolve()
280
+        .then(() => {
281
+
282
+            // if a change in screen size occurs, we need to recalibrate
283
+            // (perform a new pre-training)
284
+            if(!screenSize.equals(this._initialScreenSize))
285
+                throw new TrackingError('Detected a change in screen size');
288 286
 
289
-        // find the best keypoint matches
290
-        return this._preprocessMatches(keypoints, this._initialKeypoints).then(matches => {
287
+            // find matching pairs of keypoints
288
+            const allPairs = this._findMatchingPairs(this._templateKeypoints, keypoints);
289
+            const pairs = ImageTrackerUtils.refineMatchingPairs(allPairs);
290
+            if(pairs.length < TRACK_MIN_MATCHES)
291
+                throw new TrackingError('Not enough data points to continue the tracking');
291 292
 
292 293
             // find motion models
293
-            return Speedy.Promise.all<SpeedyMatrix>([
294
-                this._findAffineMotion(matches),
295
-                this._findPerspectiveMotion(matches)
294
+            const points = ImageTrackerUtils.compilePairsOfKeypointsNDC(pairs);
295
+            return Speedy.Promise.all<SpeedyMatrixExpr>([
296
+                this._findAffineMotionNDC(points),
297
+                this._findPerspectiveMotionNDC(points)
296 298
             ]);
297 299
 
298
-        }).then(([affineMotion, perspectiveMotion]) => {
300
+        })
301
+        .then(([affineMotion, perspectiveMotion]) => {
299 302
 
300 303
             const lowPower = (Settings.powerPreference == 'low-power');
301
-            const frozen = !(!USE_TURBO || lowPower || this._counter % TURBO_SKIP == 0);
304
+            const delay = NUMBER_OF_PBOS * (!lowPower ? 2 : 1);
302 305
 
303 306
             // update warp homography
304
-            const delay = NUMBER_OF_PBOS * (!lowPower ? TURBO_SKIP : 1);
305
-            const remainder = delay >>> 1; // we want remainder > 0, so it skips the first frame
306
-            if(!USE_TURBO || this._counter % delay == remainder)
307
-                this._warpHomography.setToSync(this._warpHomography.times(affineMotion));
307
+            if(!USE_TURBO || this._counter % delay == 1) // skip the first frame (PBOs)
308
+                this._warpHomography.setToSync(affineMotion.times(this._warpHomography));
308 309
 
309 310
             // update pose homography
310
-            if(!frozen)
311
-                this._poseHomography.setToSync(this._warpHomography.times(perspectiveMotion));
311
+            this._poseHomography.setToSync(perspectiveMotion.times(this._warpHomography));
312
+            if(Number.isNaN(this._poseHomography.at(0,0)))
313
+                throw new NumericalError('Bad homography'); // normalize? 1 / h33
312 314
 
313 315
             // update counter
314 316
             this._counter = (this._counter + 1) % delay;
315 317
 
316
-            // update the camera
317
-            if(!frozen)
318
-                return this._camera.update(this._poseHomography, this.screenSize);
319
-            else
320
-                return this._camera.matrix;
318
+            // update camera model FIXME
319
+            const toNDC = ImageTrackerUtils.rasterToNDC(screenSize);
320
+            const toScreen = ImageTrackerUtils.NDCToRaster(screenSize);
321
+            const homography = Speedy.Matrix(toScreen.times(this._poseHomography).times(toNDC));
321 322
 
322
-        }).then(_ => {
323
+            //console.log("PIXL ", homography.toString());
324
+            //console.log("POSE ", this._poseHomography.toString());
325
+            //console.log("WARP ", this._warpHomography.toString());
326
+            //console.log("> AF ", Speedy.Matrix(affineMotion).toString());
327
+            //console.log("> PF ", Speedy.Matrix(perspectiveMotion).toString());
323 328
 
324
-            // find the inverse of the rectification matrix
325
-            const rectificationMatrix = imageRectifier.transform;
326
-            const inverseRectificationMatrix = Speedy.Matrix(rectificationMatrix.inverse());
327
-
328
-            // move keypoints from rectified space back to image space
329
-            const n = keypoints.length;
330
-            const coords: number[] = new Array(2*n);
331
-            for(let i = 0, j = 0; i < n; i++, j += 2) {
332
-                coords[j] = keypoints[i].position.x;
333
-                coords[j+1] = keypoints[i].position.y;
334
-            }
335
-
336
-            return Speedy.Matrix.applyPerspectiveTransform(
337
-                Speedy.Matrix.Zeros(2, n),
338
-                Speedy.Matrix(2, n, coords),
339
-                inverseRectificationMatrix
340
-            );
341
-
342
-            /*
343
-            // test image center
344
-            const coords2: number[] = new Array(2 * n);
345
-            for(let i = 0, j = 0; i < n; i++, j += 2) {
346
-                coords2[j] = this._imageTracker.screenSize.width / 2;
347
-                coords2[j+1] = this._imageTracker.screenSize.height / 2;
348
-                if(i % 2 == 0) {
349
-                    coords2[j] = this._imageTracker.screenSize.width / 4;
350
-                    coords2[j+1] = this._imageTracker.screenSize.height / 4;
351
-                }
352
-            }
353
-
354
-            return Speedy.Matrix.applyPerspectiveTransform(
355
-                Speedy.Matrix.Zeros(2, n),
356
-                Speedy.Matrix(2, n, coords2),
357
-                this._poseHomography
358
-                //this._warpHomography
359
-            );
360
-            */
361
-
362
-        }).then(mat => {
363
-
364
-            /*
365
-
366
-            const n = keypoints.length;
367
-            const coords = mat.read();
368
-
369
-            // ** this will interfere with the calculations when frame skipping is on **
370
-
371
-            // get keypoints in image space
372
-            for(let i = 0, j = 0; i < n; i++, j += 2) {
373
-                keypoints[i].position.x = coords[j];
374
-                keypoints[i].position.y = coords[j+1];
375
-            }
376
-
377
-            */
378
-
379
-            // find a polyline surrounding the target
380
-            return this._findPolyline(this._poseHomography, this.screenSize);
381
-            //return this._findPolyline(this._warpHomography, this.screenSize);
382
-
383
-        }).then(polyline => {
329
+            return this._camera.update(homography, screenSize);
330
+        })
331
+        .then(() => {
384 332
 
385 333
             // we let the target object be at the origin of the world space
386 334
             // (identity transform). We also perform a change of coordinates,
@@ -406,33 +354,34 @@ export class ImageTrackerTrackingState extends ImageTrackerState
406 354
                 viewer: viewer
407 355
             };
408 356
 
409
-            // build and save the output
410
-            this._lastOutput = {
357
+            // tracker output
358
+            const trackerOutput: ImageTrackerOutput = {
411 359
                 exports: result,
412
-                cameraMatrix: this._camera.matrix,
413
-                homography: this._warpHomography,
414
-                //keypoints: keypoints,
415
-                screenSize: this.screenSize,
360
+                //keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
416 361
                 image: image,
417
-                polyline: polyline,
362
+                polylineNDC: ImageTrackerUtils.findPolylineNDC(this._poseHomography),
363
+                cameraMatrix: this._camera.matrix,
364
+                screenSize: screenSize,
418 365
             };
419 366
 
367
+            // save the last output
368
+            this._lastOutput = trackerOutput;
369
+
420 370
             // we have successfully tracked the target in this frame
421 371
             this._lostCounter = 0;
422 372
 
423 373
             // done!
424 374
             return {
425 375
                 nextState: 'tracking',
426
-                trackerOutput: this._lastOutput
376
+                trackerOutput: trackerOutput
427 377
             };
428 378
 
429
-        }).catch(err => {
379
+        })
380
+        .catch(err => {
430 381
 
431 382
             // give some tolerance to tracking errors
432 383
             if(err instanceof TrackingError) {
433 384
                 if(++this._lostCounter <= TRACK_LOST_TOLERANCE) {
434
-                    //console.log("ABSORB",this._lostCounter,err.toString())
435
-                    // absorb the error
436 385
                     return {
437 386
                         nextState: 'tracking',
438 387
                         trackerOutput: this._lastOutput
@@ -440,351 +389,144 @@ export class ImageTrackerTrackingState extends ImageTrackerState
440 389
                 }
441 390
             }
442 391
 
443
-            // lost tracking
392
+            // log
444 393
             Utils.warning(`The target has been lost! ${err.toString()}`);
445
-            this._camera.reset();
446 394
 
447 395
             // go back to the scanning state
448 396
             return {
449 397
                 nextState: 'scanning',
450
-                trackerOutput: {
451
-                    image: image,
452
-                    screenSize: this.screenSize,
453
-                },
398
+                trackerOutput: { }
454 399
             };
455 400
 
456 401
         });
457 402
     }
458 403
 
459 404
     /**
460
-     * Find quality matches between two sets of keypoints
461
-     * @param currKeypoints keypoints of the current frame
462
-     * @param prevKeypoints keypoints of the previous frame
463
-     * @returns quality matches
464
-     */
465
-    private _findQualityMatches(currKeypoints: SpeedyMatchedKeypoint[], prevKeypoints: SpeedyKeypoint[]): QualityMatches
466
-    {
467
-        const result: QualityMatches = [ [], [] ];
468
-        const n = currKeypoints.length;
469
-
470
-        for(let i = 0; i < n; i++) {
471
-            const currKeypoint = currKeypoints[i];
472
-
473
-            if(currKeypoint.matches[0].index >= 0 && currKeypoint.matches[1].index >= 0) {
474
-                const d1 = currKeypoint.matches[0].distance;
475
-                const d2 = currKeypoint.matches[1].distance;
476
-
477
-                if(d1 <= TRACK_MATCH_RATIO * d2) {
478
-                    const prevKeypoint = prevKeypoints[currKeypoint.matches[0].index];
479
-
480
-                    result[0].push(currKeypoint);
481
-                    result[1].push(prevKeypoint);
482
-                }
483
-            }
484
-        }
485
-
486
-        return result;
487
-    }
488
-
489
-    /**
490
-     * Find a better spatial distribution of the input matches
491
-     * @param matches quality matches
492
-     * @returns refined quality matches
493
-     */
494
-    private _refineQualityMatches(matches: QualityMatches): QualityMatches
495
-    {
496
-        const currKeypoints = matches[0];
497
-        const prevKeypoints = matches[1];
498
-
499
-        // find a better spatial distribution of the keypoints
500
-        const indices = this._distributeKeypoints(currKeypoints, TRACK_GRID_GRANULARITY);
501
-        const n = indices.length; // number of refined matches
502
-
503
-        // assemble output
504
-        const result: QualityMatches = [ new Array(n), new Array(n) ];
505
-        for(let i = 0; i < n; i++) {
506
-            result[0][i] = currKeypoints[indices[i]];
507
-            result[1][i] = prevKeypoints[indices[i]];
508
-        }
509
-
510
-        // done!
511
-        return result;
512
-    }
513
-
514
-    /**
515
-     * Spatially distribute keypoints over a grid
516
-     * @param keypoints keypoints to be distributed
517
-     * @param gridCells number of grid elements in each axis
518
-     * @returns a list of indices of keypoints[]
405
+     * Find an affine motion model in NDC between pairs of keypoints in NDC
406
+     * given as a 2 x 2n [ src | dest ] matrix
407
+     * @param points compiled pairs of keypoints in NDC
408
+     * @returns a promise that resolves to a 3x3 warp in NDC that maps source to destination
519 409
      */
520
-    private _distributeKeypoints(keypoints: SpeedyKeypoint[], gridCells: number): number[]
410
+    private _findAffineMotionNDC(points: SpeedyMatrix): SpeedyPromise<SpeedyMatrixExpr>
521 411
     {
522
-        // get the coordinates of the keypoints
523
-        const n = keypoints.length;
524
-        const points: number[] = new Array(2 * n);
525
-        for(let i = 0, j = 0; i < n; i++, j += 2) {
526
-            points[j] = keypoints[i].x;
527
-            points[j+1] = keypoints[i].y;
528
-        }
529
-
530
-        // normalize the coordinates to [0,1] x [0,1]
531
-        this._normalizePoints(points);
532
-
533
-        // distribute the keypoints over a grid
534
-        const numberOfCells = gridCells * gridCells;
535
-        const grid: number[] = (new Array(numberOfCells)).fill(-1);
536
-        for(let i = 0, j = 0; i < n; i++, j += 2) {
537
-            // find the grid location of the i-th point
538
-            const xg = Math.floor(points[j] * gridCells); // 0 <= xg,yg < gridCells
539
-            const yg = Math.floor(points[j+1] * gridCells);
540
-
541
-            // store the index of the i-th point in the grid
542
-            grid[yg * gridCells + xg] = i;
543
-        }
544
-
545
-        // retrieve points of the grid
546
-        const indices: number[] = [];
547
-        for(let g = 0; g < numberOfCells; g++) {
548
-            if(grid[g] >= 0) {
549
-                const i = grid[g];
550
-                indices.push(i);
551
-            }
552
-        }
412
+        /*
553 413
 
554
-        // done!
555
-        return indices;
556
-    }
414
+        We can probably get more accurate motion estimates if we
415
+        work in 3D rather than in 2D. We're currently estimating an
416
+        affine motion in 2D NDC space, which does not account for
417
+        perspective distortions. What if we projected the keypoints
418
+        into 3D NDC space, estimated the camera motion (rotation and
419
+        translation) that best describes the observed observed motion
420
+        of the keypoints, and then projected things back to 2D NDC
421
+        space? Need to figure this out; we'll get a homography matrix.
557 422
 
558
-    /**
559
-     * Normalize points to [0,1)^2
560
-     * @param points 2 x n matrix of points in column-major format
561
-     * @returns points
562
-     */
563
-    private _normalizePoints(points: number[]): number[]
564
-    {
565
-        Utils.assert(points.length % 2 == 0);
566
-
567
-        const n = points.length / 2;
568
-        if(n == 0)
569
-            return points;
570
-
571
-        let xmin = Number.POSITIVE_INFINITY, xmax = Number.NEGATIVE_INFINITY;
572
-        let ymin = Number.POSITIVE_INFINITY, ymax = Number.NEGATIVE_INFINITY;
573
-        for(let i = 0, j = 0; i < n; i++, j += 2) {
574
-            const x = points[j], y = points[j+1];
575
-            xmin = x < xmin ? x : xmin;
576
-            ymin = y < ymin ? y : ymin;
577
-            xmax = x > xmax ? x : xmax;
578
-            ymax = y > ymax ? y : ymax;
579
-        }
423
+        Note: work with a 6 DoF perspective transform instead of 8.
580 424
 
581
-        const xlen = xmax - xmin + 1; // +1 is a correction factor, so that 0 <= x,y < 1
582
-        const ylen = ymax - ymin + 1;
583
-        for(let i = 0, j = 0; i < n; i++, j += 2) {
584
-            points[j] = (points[j] - xmin) / xlen;
585
-            points[j+1] = (points[j+1] - ymin) / ylen;
586
-        }
425
+        */
587 426
 
588
-        return points;
589
-    }
427
+        return ImageTrackerUtils.findAffineWarpNDC(points, {
428
+            method: 'pransac',
429
+            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR_NDC,
430
+            numberOfHypotheses: 512*4,
431
+            bundleSize: 128,
432
+            mask: undefined // score is not needed
433
+        }).then(([ warp, score ]) => {
590 434
 
591
-    /**
592
-     * Find a matrix with the coordinates of quality matches
593
-     * @param matches n quality matches
594
-     * @returns a 2 x 2n matrix split into two 2 x n blocks [ prevKeypoints | currKeypoints ]
595
-     */
596
-    private _findMatrixOfMatches(matches: QualityMatches): SpeedyMatrix
597
-    {
598
-        const n = matches[0].length;
599
-        Utils.assert(n > 0);
435
+            const scale = TRACK_RECTIFIED_SCALE;
436
+            const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
437
+            const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
438
+            const grow = ImageTrackerUtils.inverseBestFitScaleNDC(aspectRatio, scale);
439
+            const scaledWarp = grow.times(warp).times(shrink);
600 440
 
601
-        // sets of keypoints
602
-        const currKeypoints = matches[0];
603
-        const prevKeypoints = matches[1];
441
+            const distort = this._warpHomography;
442
+            const undistort = distort.inverse();
443
+            const correctedWarp = distort.times(scaledWarp).times(undistort);
604 444
 
605
-        // get the coordinates of the keypoints of the set of refined matches
606
-        const src: number[] = new Array(2*n);
607
-        const dst: number[] = new Array(2*n);
445
+            return correctedWarp;
608 446
 
609
-        for(let i = 0, j = 0; i < n; i++, j += 2) {
610
-            src[j] = prevKeypoints[i].x;
611
-            src[j+1] = prevKeypoints[i].y;
447
+        }).catch(err => {
612 448
 
613
-            dst[j] = currKeypoints[i].x;
614
-            dst[j+1] = currKeypoints[i].y;
615
-        }
449
+            throw new TrackingError(`Can't find an affine motion`, err);
616 450
 
617
-        // assemble the matrix
618
-        return Speedy.Matrix(2, 2*n, src.concat(dst));
451
+        });
619 452
     }
620 453
 
621 454
     /**
622
-     * Preprocess keypoint matches
623
-     * @param currKeypoints keypoints of the current frame
624
-     * @param prevKeypoints keypoints of the previous frame
625
-     * @returns a promise that is rejected if there are not enough "good" matches, or that is resolved to a
626
-     *          2 x 2n matrix split into two 2 x n blocks [ source x,y coordinates | dest x,y coordinates ]
455
+     * Find a perspective motion model in NDC between pairs of keypoints in NDC
456
+     * given as a 2 x 2n [ src | dest ] matrix
457
+     * @param points compiled pairs of keypoints in NDC
458
+     * @returns a promise that resolves to a 3x3 warp in NDC that maps source to destination
627 459
      */
628
-    private _preprocessMatches(currKeypoints: SpeedyMatchedKeypoint[], prevKeypoints: SpeedyKeypoint[]): SpeedyPromise<SpeedyMatrix>
460
+    private _findPerspectiveMotionNDC(points: SpeedyMatrix): SpeedyPromise<SpeedyMatrixExpr>
629 461
     {
630
-        // find and refine quality matches
631
-        const qualityMatches = this._findQualityMatches(currKeypoints, prevKeypoints);
632
-        const refinedMatches = this._refineQualityMatches(qualityMatches);
633
-
634
-        // not enough matches?
635
-        const n = refinedMatches[0].length;
636
-        if(n < TRACK_MIN_MATCHES)
637
-            return Speedy.Promise.reject(new TrackingError('Not enough data to compute a motion model'));
638
-
639
-        // find matrix of matches
640
-        const matrixOfMatches = this._findMatrixOfMatches(refinedMatches);
462
+        return ImageTrackerUtils.findPerspectiveWarpNDC(points, {
463
+            method: 'pransac',
464
+            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR_NDC,
465
+            numberOfHypotheses: 512*2,
466
+            bundleSize: 128,//128*4,
467
+            mask: undefined // score is not needed
468
+        }).then(([ warp, score ]) => {
641 469
 
642
-        // warp matrix of matches
643
-        const result = Speedy.Matrix.Zeros(2, 2*n);
644
-        return this._findKeypointWarp().then(transform =>
470
+            const scale = TRACK_RECTIFIED_SCALE;
471
+            const aspectRatio = ImageTrackerUtils.bestFitAspectRatioNDC(this._imageTracker, this._referenceImage!);
472
+            const shrink = ImageTrackerUtils.bestFitScaleNDC(aspectRatio, scale);
473
+            const grow = ImageTrackerUtils.inverseBestFitScaleNDC(aspectRatio, scale);
474
+            const scaledWarp = grow.times(warp).times(shrink);
645 475
 
646
-            Speedy.Matrix.applyAffineTransform(
647
-                result,
648
-                matrixOfMatches,
649
-                transform.block(0,1,0,2)
650
-            )
476
+            const distort = this._poseHomography;
477
+            const undistort = distort.inverse();
478
+            const correctedWarp = distort.times(scaledWarp).times(undistort);
651 479
 
652
-        );
653
-    }
480
+            return correctedWarp;
654 481
 
655
-    /**
656
-     * Find an affine motion model of the target image
657
-     * @param preprocessedMatches 2 x 2n matrix split into two 2 x n blocks [ src | dest ]
658
-     * @returns a promise that resolves to a 3x3 affine motion model (last row is [ 0  0  1 ])
659
-     */
660
-    private _findAffineMotion(preprocessedMatches: SpeedyMatrix): SpeedyPromise<SpeedyMatrix>
661
-    {
662
-        const model = Speedy.Matrix.Eye(3);
663
-        const n = preprocessedMatches.columns / 2; // number of preprocessed matches
664
-
665
-        // find motion model
666
-        return Speedy.Matrix.findAffineTransform(
667
-            model.block(0,1,0,2),
668
-            preprocessedMatches.block(0,1,0,n-1),
669
-            preprocessedMatches.block(0,1,n,2*n-1), {
670
-            method: 'pransac',
671
-            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR,
672
-            numberOfHypotheses: 512,
673
-            bundleSize: 128,
674
-        }).then(_ => {
675
-
676
-            // validate the model
677
-            const a00 = model.at(0,0);
678
-            if(Number.isNaN(a00))
679
-                throw new TrackingError(`Can't compute affine motion model: bad keypoints`);
482
+        }).catch(err => {
680 483
 
681
-            // done!
682
-            return model;
484
+            throw new TrackingError(`Can't find a perspective motion`, err);
683 485
 
684 486
         });
685 487
     }
686 488
 
687 489
     /**
688
-     * Find a perspective motion model of the target image
689
-     * @param preprocessedMatches 2 x 2n matrix split into two 2 x n blocks [ src | dest ]
690
-     * @returns a promise that resolves to a 3x3 perspective motion model
490
+     * Find matching pairs of two sets of keypoints matched via brute force
491
+     * @param srcKeypoints source (database)
492
+     * @param destKeypoints destination
493
+     * @returns an array of matching pairs [src, dest]
691 494
      */
692
-    private _findPerspectiveMotion(preprocessedMatches: SpeedyMatrix): SpeedyPromise<SpeedyMatrix>
495
+    private _findMatchingPairs(srcKeypoints: SpeedyKeypoint[], destKeypoints: SpeedyMatchedKeypoint[]): ImageTrackerKeypointPair[]
693 496
     {
694
-        /*
695
-
696
-        We can probably get more accurate motion estimates if we
697
-        work in 3D rather than in 2D. We're currently estimating
698
-        an affine transform in image space. What if we projected
699
-        the keypoints into world space, estimated the camera motion
700
-        (rotation and translation) that best describes the observed
701
-        observed motion of the keypoints, and then projected things
702
-        back to image space? Need to figure this out; we'll get a
703
-        homography matrix.
704
-
705
-        Note: keypoints are in rectified image space.
706
-
707
-        Note: work with a 6 DoF perspective transform instead of 8.
708
-
709
-        */
710
-
711
-        const model = Speedy.Matrix.Zeros(3);
712
-        const n = preprocessedMatches.columns / 2; // number of preprocessed matches
713
-
714
-        // find motion model
715
-        return Speedy.Matrix.findHomography(
716
-            model,
717
-            preprocessedMatches.block(0,1,0,n-1),
718
-            preprocessedMatches.block(0,1,n,2*n-1), {
719
-            method: 'pransac',
720
-            reprojectionError: TRACK_RANSAC_REPROJECTIONERROR,
721
-            numberOfHypotheses: 512*2,
722
-            bundleSize: 128*4, //*4
723
-        }).then(_ => {
724
-
725
-            // validate the model
726
-            const a00 = model.at(0,0);
727
-            if(Number.isNaN(a00))
728
-                throw new TrackingError(`Can't compute perspective motion model: bad keypoints`);
729
-
730
-            // done!
731
-            return model;
497
+        const pairs: ImageTrackerKeypointPair[] = [];
732 498
 
733
-        });
734
-    }
499
+        for(let i = 0; i < destKeypoints.length; i++) {
500
+            const destKeypoint = destKeypoints[i];
735 501
 
736
-    /**
737
-     * Find a rectification matrix to be applied to the target image
738
-     * @param homography maps a reference image to the AR screen
739
-     * @param media target
740
-     * @param screenSize AR screen
741
-     * @returns promise that resolves to a rectification matrix
742
-     */
743
-    private _findImageWarp(homography: SpeedyMatrix, screenSize: SpeedySize): SpeedyPromise<SpeedyMatrix>
744
-    {
745
-        const referenceImage = this._referenceImage as ReferenceImage;
746
-        const media = this._imageTracker.database._findMedia(referenceImage.name);
747
-        const mat = Speedy.Matrix.Zeros(3);
502
+            if(destKeypoint.matches[0].index >= 0 && destKeypoint.matches[1].index >= 0) {
503
+                const d1 = destKeypoint.matches[0].distance;
504
+                const d2 = destKeypoint.matches[1].distance;
748 505
 
749
-        return this._findRectificationMatrixOfFullscreenImage(media, screenSize).then(warp =>
750
-            mat.setTo(warp.times(homography.inverse()))
751
-        );
752
-    }
506
+                // the best match should be "much better" than the second best match,
507
+                // which means that they are "distinct enough"
508
+                if(d1 <= TRACK_MATCH_RATIO * d2) {
509
+                    const srcKeypoint = srcKeypoints[destKeypoint.matches[0].index];
510
+                    pairs.push([srcKeypoint, destKeypoint]);
511
+                }
512
+            }
513
+        }
753 514
 
754
-    /**
755
-     * Find a warp to be applied to the keypoints
756
-     * @returns affine transform
757
-     */
758
-    private _findKeypointWarp(): SpeedyPromise<SpeedyMatrix>
759
-    {
760
-        const referenceImage = this._referenceImage as ReferenceImage;
761
-        const media = this._imageTracker.database._findMedia(referenceImage.name);
762
-        const screenSize = this.screenSize;
763
-        const sw = screenSize.width, sh = screenSize.height;
764
-        const mat = Speedy.Matrix.Eye(3, 3);
765
-
766
-        // no rotation is needed
767
-        if(!this._mustRotateWarpedImage(media, screenSize))
768
-            return Speedy.Promise.resolve(mat);
769
-
770
-        // rotate by 90 degrees clockwise and scale
771
-        return Speedy.Matrix.affine(
772
-            mat.block(0,1,0,2),
773
-            Speedy.Matrix(2, 3, [ 0,sh , 0,0 , sw,0  ]),
774
-            Speedy.Matrix(2, 3, [ 0,0 , sw,0 , sw,sh ])
775
-        ).then(_ => mat);
515
+        return pairs;
776 516
     }
777 517
 
778 518
     /**
779 519
      * Predict the keypoints without actually looking at the image
780 520
      * @param curr keypoints at time t (will modify the contents)
781
-     * @param initial keypoints at time t-1 (not just t = 0)
521
+     * @param prev keypoints at time t-1 (not just t = 0)
782 522
      * @returns keypoints at time t+1
783 523
      */
784
-    private _predictKeypoints(curr: SpeedyMatchedKeypoint[], initial: SpeedyKeypoint[]): SpeedyMatchedKeypoint[]
524
+    /*
525
+    private _predictKeypoints(curr: SpeedyMatchedKeypoint[], prev: SpeedyKeypoint[]): SpeedyMatchedKeypoint[]
785 526
     {
786 527
         // the target image is likely to be moving roughly in
787 528
         // the same manner as it was in the previous frame
529
+        const alpha = 0.8; //0.2;
788 530
         const next: SpeedyMatchedKeypoint[] = [];
789 531
         const n = curr.length;
790 532
 
@@ -793,25 +535,23 @@ export class ImageTrackerTrackingState extends ImageTrackerState
793 535
 
794 536
             if(cur.matches[0].index < 0 || cur.matches[1].index < 0)
795 537
                 continue;
796
-            /*
797
-            else if(cur.matches[0].distance > TRACK_MATCH_RATIO * cur.matches[1].distance)
798
-                continue;
799
-            */
538
+            //else if(cur.matches[0].distance > TRACK_MATCH_RATIO * cur.matches[1].distance)
539
+            //    continue;
800 540
 
801
-            const ini = initial[cur.matches[0].index];
802
-            const dx = cur.position.x - ini.position.x;
803
-            const dy = cur.position.y - ini.position.y;
541
+            const prv = prev[cur.matches[0].index];
542
+            const dx = cur.position.x - prv.position.x;
543
+            const dy = cur.position.y - prv.position.y;
804 544
 
805 545
             // a better mathematical model is needed
806
-            const alpha = 0.8; //0.2;
807
-            cur.position.x = ini.position.x + alpha * dx;
808
-            cur.position.y = ini.position.y + alpha * dy;
546
+            cur.position.x = prv.position.x + alpha * dx;
547
+            cur.position.y = prv.position.y + alpha * dy;
809 548
             next.push(cur);
810 549
         }
811 550
 
812 551
         // done!
813 552
         return next;
814 553
     }
554
+    */
815 555
 
816 556
     /**
817 557
      * Create & setup the pipeline
@@ -835,10 +575,10 @@ export class ImageTrackerTrackingState extends ImageTrackerState
835 575
         const denoiser = Speedy.Filter.GaussianBlur();
836 576
         const borderClipper = Speedy.Keypoint.BorderClipper('borderClipper');
837 577
         const clipper = Speedy.Keypoint.Clipper();
838
-        const keypointRectifier = Speedy.Keypoint.Transformer('keypointRectifier');
578
+        const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
839 579
         const keypointPortalSource = Speedy.Keypoint.Portal.Source('keypointPortalSource');
840 580
         const keypointSink = Speedy.Keypoint.SinkOfMatchedKeypoints('keypoints');
841
-        const imageSink = Speedy.Image.Sink('image');
581
+        //const imageSink = Speedy.Image.Sink('image');
842 582
 
843 583
         source.media = null;
844 584
         screen.size = Speedy.Size(0,0);
@@ -858,7 +598,7 @@ export class ImageTrackerTrackingState extends ImageTrackerState
858 598
         clipper.size = TRACK_MAX_KEYPOINTS;
859 599
         borderClipper.imageSize = screen.size;
860 600
         borderClipper.borderSize = Speedy.Vector2(0,0);
861
-        keypointRectifier.transform = Speedy.Matrix.Eye(3);
601
+        keypointScaler.transform = Speedy.Matrix.Eye(3);
862 602
         matcher.k = 2;
863 603
         keypointPortalSource.source = null;
864 604
         keypointSink.turbo = USE_TURBO;
@@ -893,9 +633,8 @@ export class ImageTrackerTrackingState extends ImageTrackerState
893 633
         descriptor.output().connectTo(matcher.input('keypoints'));
894 634
 
895 635
         // prepare output
896
-        descriptor.output().connectTo(keypointRectifier.input());
897
-        //preMatcher.output().connectTo(keypointRectifier.input());
898
-        keypointRectifier.output().connectTo(keypointSink.input());
636
+        descriptor.output().connectTo(keypointScaler.input());
637
+        keypointScaler.output().connectTo(keypointSink.input());
899 638
         matcher.output().connectTo(keypointSink.input('matches'));
900 639
         //imageRectifier.output().connectTo(imageSink.input());
901 640
 
@@ -905,9 +644,10 @@ export class ImageTrackerTrackingState extends ImageTrackerState
905 644
             imageRectifier, nightvision, nightvisionMux, blur,
906 645
             detector, subpixel, borderClipper, clipper, denoiser,
907 646
             descriptor, matcher,
908
-            keypointPortalSource, keypointRectifier, keypointSink,
647
+            keypointPortalSource, keypointScaler, keypointSink,
909 648
             //imageSink
910 649
         );
650
+
911 651
         return pipeline;
912 652
     }
913 653
 }

+ 49
- 80
src/trackers/image-tracker/states/training.ts View File

@@ -31,6 +31,7 @@ import { SpeedyPipelineNodeKeypointTransformer } from 'speedy-vision/types/core/
31 31
 import { SpeedyKeypoint } from 'speedy-vision/types/core/speedy-keypoint';
32 32
 import { Resolution } from '../../../utils/resolution';
33 33
 import { ImageTracker, ImageTrackerOutput, ImageTrackerStateName } from '../image-tracker';
34
+import { ImageTrackerUtils, ImageTrackerKeypointPair } from '../image-tracker-utils';
34 35
 import { ImageTrackerState, ImageTrackerStateOutput } from './state';
35 36
 import { ReferenceImage } from '../reference-image';
36 37
 import { ReferenceImageDatabase } from '../reference-image-database';
@@ -43,7 +44,6 @@ import {
43 44
     SCAN_WITH_NIGHTVISION, NIGHTVISION_GAIN, NIGHTVISION_OFFSET, NIGHTVISION_DECAY,
44 45
     SUBPIXEL_GAUSSIAN_KSIZE, SUBPIXEL_GAUSSIAN_SIGMA,
45 46
     TRAIN_IMAGE_SCALE,
46
-    TRAIN_TARGET_NORMALIZED_SIZE,
47 47
     NIGHTVISION_QUALITY,
48 48
     SUBPIXEL_METHOD,
49 49
 } from '../settings';
@@ -58,6 +58,9 @@ interface TrainingMap
58 58
 
59 59
     /** maps a keypoint index to an image index */
60 60
     readonly referenceImageIndex: number[];
61
+
62
+    /** reference images */
63
+    readonly referenceImages: ReferenceImage[];
61 64
 }
62 65
 
63 66
 
@@ -70,9 +73,6 @@ export class ImageTrackerTrainingState extends ImageTrackerState
70 73
     /** index of the image being used to train the tracker */
71 74
     private _currentImageIndex = 0;
72 75
 
73
-    /** reference images */
74
-    private _image: ReferenceImage[] = [];
75
-
76 76
     /** training map */
77 77
     private _trainingMap: TrainingMap;
78 78
 
@@ -89,7 +89,8 @@ export class ImageTrackerTrainingState extends ImageTrackerState
89 89
         // initialize the training map
90 90
         this._trainingMap = {
91 91
             keypoints: [],
92
-            referenceImageIndex: []
92
+            referenceImageIndex: [],
93
+            referenceImages: [],
93 94
         };
94 95
     }
95 96
 
@@ -107,9 +108,9 @@ export class ImageTrackerTrainingState extends ImageTrackerState
107 108
 
108 109
         // prepare to train...
109 110
         this._currentImageIndex = 0;
110
-        this._image.length = 0;
111
-        this._trainingMap.referenceImageIndex.length = 0;
112 111
         this._trainingMap.keypoints.length = 0;
112
+        this._trainingMap.referenceImageIndex.length = 0;
113
+        this._trainingMap.referenceImages.length = 0;
113 114
 
114 115
         // lock the database
115 116
         Utils.log(`Image Tracker: training using ${database.count} reference image${database.count != 1 ? 's' : ''}`);
@@ -117,7 +118,17 @@ export class ImageTrackerTrainingState extends ImageTrackerState
117 118
 
118 119
         // collect all images
119 120
         for(const referenceImage of database)
120
-            this._image.push(referenceImage);
121
+            this._trainingMap.referenceImages.push(referenceImage);
122
+    }
123
+
124
+    /**
125
+     * Called when leaving the state, after update()
126
+     */
127
+    onLeaveState(): void
128
+    {
129
+        // we don't return to this state, so we can release the pipeline early
130
+        this._pipeline.release();
131
+        this._pipelineReleased = true;
121 132
     }
122 133
 
123 134
     /**
@@ -126,18 +137,13 @@ export class ImageTrackerTrainingState extends ImageTrackerState
126 137
      */
127 138
     protected _beforeUpdate(): SpeedyPromise<void>
128 139
     {
129
-        const arScreenSize = this.screenSize;
130 140
         const source = this._pipeline.node('source') as SpeedyPipelineNodeImageSource;
131 141
         const screen = this._pipeline.node('screen') as SpeedyPipelineNodeResize;
132 142
         const keypointScaler = this._pipeline.node('keypointScaler') as SpeedyPipelineNodeKeypointTransformer;
133 143
 
134
-        // this shouldn't happen
135
-        if(this._currentImageIndex >= this._image.length)
136
-            return Speedy.Promise.reject(new IllegalOperationError());
137
-
138 144
         // set the appropriate training media
139 145
         const database = this._imageTracker.database;
140
-        const referenceImage = this._image[this._currentImageIndex];
146
+        const referenceImage = this._trainingMap.referenceImages[this._currentImageIndex];
141 147
         const media = database._findMedia(referenceImage.name);
142 148
         source.media = media;
143 149
 
@@ -146,46 +152,12 @@ export class ImageTrackerTrainingState extends ImageTrackerState
146 152
         const scale = TRAIN_IMAGE_SCALE; // ORB is not scale-invariant
147 153
         const aspectRatioOfTrainingImage = media.width / media.height;
148 154
 
149
-        /*
150
-        let sin = 0, cos = 1;
151
-
152
-        if((aspectRatioOfSourceVideo - 1) * (aspectRatioOfTrainingImage - 1) >= 0) {
153
-            // training image and source video: both in landscape mode or both in portrait mode
154
-            screen.size = Utils.resolution(resolution, aspectRatioOfTrainingImage);
155
-            screen.size.width = Math.round(screen.size.width * scale);
156
-            screen.size.height = Math.round(screen.size.height * scale);
157
-        }
158
-        else if(aspectRatioOfTrainingImage > aspectRatioOfSourceVideo) {
159
-            // training image: portrait mode; source video: landscape mode
160
-            screen.size = Utils.resolution(resolution, 1 / aspectRatioOfTrainingImage);
161
-            screen.size.width = Math.round(screen.size.width * scale);
162
-            screen.size.height = Math.round(screen.size.height * scale);
163
-            sin = 1; cos = 0; // rotate 90deg
164
-        }
165
-        else {
166
-            // training image: landscape mode; source video: portrait mode
167
-        }
168
-        */
169 155
         screen.size = Utils.resolution(resolution, aspectRatioOfTrainingImage);
170 156
         screen.size.width = Math.round(screen.size.width * scale);
171 157
         screen.size.height = Math.round(screen.size.height * scale);
172 158
 
173
-
174
-        // convert keypoints from the training image space to AR screen space
175
-        // let's pretend that trained keypoints belong to the AR screen space,
176
-        // regardless of the size of the target image. This will make things
177
-        // easier when computing the homography.
178
-        /*
179
-        const sw = arScreenSize.width / screen.size.width;
180
-        const sh = arScreenSize.height / screen.size.height;
181
-        */
182
-        const sw = TRAIN_TARGET_NORMALIZED_SIZE / screen.size.width;
183
-        const sh = TRAIN_TARGET_NORMALIZED_SIZE / screen.size.height;
184
-        keypointScaler.transform = Speedy.Matrix(3, 3, [
185
-            sw, 0,  0,
186
-            0,  sh, 0,
187
-            0,  0,  1,
188
-        ]);
159
+        // convert keypoints to NIS
160
+        keypointScaler.transform = ImageTrackerUtils.rasterToNIS(screen.size);
189 161
 
190 162
         // log
191 163
         Utils.log(`Image Tracker: training using reference image "${referenceImage.name}" at ${screen.size.width}x${screen.size.height}...`);
@@ -201,13 +173,19 @@ export class ImageTrackerTrainingState extends ImageTrackerState
201 173
      */
202 174
     protected _afterUpdate(result: SpeedyPipelineOutput): SpeedyPromise<ImageTrackerStateOutput>
203 175
     {
204
-        const referenceImage = this._image[this._currentImageIndex];
176
+        const referenceImage = this._trainingMap.referenceImages[this._currentImageIndex];
205 177
         const keypoints = result.keypoints as SpeedyKeypoint[];
206 178
         const image = result.image as SpeedyMedia | undefined;
207 179
 
208 180
         // log
209 181
         Utils.log(`Image Tracker: found ${keypoints.length} keypoints in reference image "${referenceImage.name}"`);
210 182
 
183
+        // tracker output
184
+        const trackerOutput: ImageTrackerOutput = {
185
+            keypointsNIS: image !== undefined ? keypoints : undefined, // debug only
186
+            image: image,
187
+        };
188
+
211 189
         // set the training map, so that we can map all keypoints of the current image to the current image
212 190
         for(let i = 0; i < keypoints.length; i++) {
213 191
             this._trainingMap.keypoints.push(keypoints[i]);
@@ -217,31 +195,22 @@ export class ImageTrackerTrainingState extends ImageTrackerState
217 195
         // the current image has been processed!
218 196
         ++this._currentImageIndex;
219 197
 
220
-        // set output
221
-        if(this._currentImageIndex >= this._image.length) {
222
-
223
-            // finished training!
224
-            return Speedy.Promise.resolve({
225
-                //nextState: 'training',
226
-                nextState: 'scanning',
227
-                nextStateSettings: {
228
-                    keypoints: this._trainingMap.keypoints,
229
-                },
230
-                trackerOutput: { },
231
-                //trackerOutput: { image, keypoints, screenSize: this.screenSize },
232
-            });
233
-
234
-        }
235
-        else {
236
-
237
-            // we're not done yet
198
+        // we're not done yet
199
+        if(this._currentImageIndex < this._trainingMap.referenceImages.length) {
238 200
             return Speedy.Promise.resolve({
239 201
                 nextState: 'training',
240
-                trackerOutput: { },
241
-                //trackerOutput: { image, keypoints, screenSize: this.screenSize },
202
+                trackerOutput: trackerOutput
242 203
             });
243
-
244 204
         }
205
+
206
+        // finished training!
207
+        return Speedy.Promise.resolve({
208
+            nextState: 'scanning',
209
+            trackerOutput: trackerOutput,
210
+            nextStateSettings: {
211
+                database: this._trainingMap.keypoints,
212
+            }
213
+        });
245 214
     }
246 215
 
247 216
     /**
@@ -267,7 +236,7 @@ export class ImageTrackerTrainingState extends ImageTrackerState
267 236
         const clipper = Speedy.Keypoint.Clipper();
268 237
         const keypointScaler = Speedy.Keypoint.Transformer('keypointScaler');
269 238
         const keypointSink = Speedy.Keypoint.Sink('keypoints');
270
-        const imageSink = Speedy.Image.Sink('image');
239
+        //const imageSink = Speedy.Image.Sink('image');
271 240
 
272 241
         source.media = null;
273 242
         screen.size = Speedy.Size(0,0);
@@ -317,7 +286,7 @@ export class ImageTrackerTrainingState extends ImageTrackerState
317 286
         // prepare output
318 287
         descriptor.output().connectTo(keypointScaler.input());
319 288
         keypointScaler.output().connectTo(keypointSink.input());
320
-        nightvisionMux.output().connectTo(imageSink.input());
289
+        //nightvisionMux.output().connectTo(imageSink.input());
321 290
 
322 291
         // done!
323 292
         pipeline.init(
@@ -326,13 +295,13 @@ export class ImageTrackerTrainingState extends ImageTrackerState
326 295
             pyramid, detector, blur, descriptor, clipper,
327 296
             denoiser, blurredPyramid, subpixel,
328 297
             keypointScaler, keypointSink,
329
-            imageSink
298
+            //imageSink
330 299
         );
331 300
         return pipeline;
332 301
     }
333 302
 
334 303
     /**
335
-     * Get reference image
304
+     * Get the reference image associated with a keypoint index in the training map
336 305
      * @param keypointIndex -1 if not found
337 306
      * @returns reference image
338 307
      */
@@ -342,11 +311,11 @@ export class ImageTrackerTrainingState extends ImageTrackerState
342 311
         if(imageIndex < 0)
343 312
             return null;
344 313
 
345
-        return this._image[imageIndex];
314
+        return this._trainingMap.referenceImages[imageIndex];
346 315
     }
347 316
 
348 317
     /**
349
-     * Get reference image index
318
+     * Get the reference image index associated with a keypoint index in the training map
350 319
      * @param keypointIndex -1 if not found
351 320
      * @returns reference image index, or -1 if not found
352 321
      */
@@ -357,14 +326,14 @@ export class ImageTrackerTrainingState extends ImageTrackerState
357 326
             return -1;
358 327
 
359 328
         const imageIndex = this._trainingMap.referenceImageIndex[keypointIndex];
360
-        if(imageIndex < 0 || imageIndex >= this._image.length)
329
+        if(imageIndex < 0 || imageIndex >= this._trainingMap.referenceImages.length)
361 330
             return -1;
362 331
 
363 332
         return imageIndex;
364 333
     }
365 334
 
366 335
     /**
367
-     * Get keypoint of the trained set
336
+     * Get a keypoint of the trained set
368 337
      * @param keypointIndex -1 if not found
369 338
      * @returns a keypoint
370 339
      */

Loading…
Cancel
Save