@@ -15,7 +15,7 @@ const DEFAULTS = {
15
15
filterBoxesThreshold : 0.01 ,
16
16
IOUThreshold : 0.4 ,
17
17
classProbThreshold : 0.4 ,
18
- URL : 'https://raw.githubusercontent.com/ml5js/ml5-library /master/src /YOLO/model.json' ,
18
+ URL : 'https://raw.githubusercontent.com/ml5js/ml5-data-and-models /master/models /YOLO/model.json' ,
19
19
imageSize : 416 ,
20
20
} ;
21
21
@@ -41,31 +41,9 @@ class YOLOBase {
41
41
}
42
42
43
43
init ( ) {
44
- // indices tensor to filter the elements later on
45
- this . indicesTensor = tf . range ( 1 , 846 , 1 , 'int32' ) ;
46
-
47
- // Grid To Split the raw predictions : Assumes Our Model output is 1 Tensor with 13x13x425
48
- // gonna hard code all this stuff see if it works
49
- // this can be done once at the initial phase
50
- // TODO : make this more modular
51
-
52
- [ this . ConvIndex , this . ConvDims , this . AnchorsTensor ] = tf . tidy ( ( ) => {
53
- let ConvIndex = tf . range ( 0 , 13 ) ;
54
- const ConvHeightIndex = tf . tile ( ConvIndex , [ 13 ] ) ;
55
-
56
- let ConvWidthindex = tf . tile ( tf . expandDims ( ConvIndex , 0 ) , [ 13 , 1 ] ) ;
57
- ConvWidthindex = tf . transpose ( ConvWidthindex ) . flatten ( ) ;
58
-
59
- ConvIndex = tf . transpose ( tf . stack ( [ ConvHeightIndex , ConvWidthindex ] ) ) ;
60
- ConvIndex = tf . reshape ( ConvIndex , [ 13 , 13 , 1 , 2 ] ) ;
61
-
62
- const ConvDims = tf . reshape ( tf . tensor1d ( [ 13 , 13 ] ) , [ 1 , 1 , 1 , 2 ] ) ;
63
- // AnchorsTensor
64
- const Aten = tf . tensor2d ( this . anchors ) ;
65
- const AnchorsTensor = tf . reshape ( Aten , [ 1 , 1 , this . anchorsLength , 2 ] ) ;
66
-
67
- return [ ConvIndex , ConvDims , AnchorsTensor ] ;
68
- } ) ;
44
+ const Aten = tf . tensor2d ( this . anchors ) ;
45
+ this . anchorsTensor = tf . reshape ( Aten , [ 1 , 1 , this . anchorsLength , 2 ] ) ;
46
+ Aten . dispose ( ) ;
69
47
}
70
48
71
49
/**
@@ -85,6 +63,7 @@ class YOLOBase {
85
63
return results ;
86
64
}
87
65
66
+
88
67
async loadModel ( ) {
89
68
try {
90
69
this . model = await tf . loadModel ( this . modelURL ) ;
@@ -137,68 +116,22 @@ class YOLOBase {
137
116
return batched ;
138
117
}
139
118
140
-
141
119
/**
142
120
* postProcessing for the yolo output
143
121
* TODO : make this more modular in preperation for yolov3-tiny
144
- * @param rawPrediction a 4D tensor 13*13*425
122
+ * @param rawPrediction a 4D tensor
145
123
*/
146
124
async postProcess ( rawPrediction ) {
147
- const [ boxes , boxScores , classes , Indices ] = tf . tidy ( ( ) => {
148
- const reshaped = tf . reshape ( rawPrediction , [ 13 , 13 , this . anchorsLength , this . classesLength + 5 ] ) ;
149
- // Box Coords
150
- const boxxy = tf . sigmoid ( reshaped . slice ( [ 0 , 0 , 0 , 0 ] , [ 13 , 13 , this . anchorsLength , 2 ] ) ) ;
151
- const boxwh = tf . exp ( reshaped . slice ( [ 0 , 0 , 0 , 2 ] , [ 13 , 13 , this . anchorsLength , 2 ] ) ) ;
152
- // ObjectnessScore
153
- const boxConfidence = tf . sigmoid ( reshaped . slice ( [ 0 , 0 , 0 , 4 ] , [ 13 , 13 , this . anchorsLength , 1 ] ) ) ;
154
- // ClassProb
155
- const boxClassProbs = tf . softmax ( reshaped . slice ( [ 0 , 0 , 0 , 5 ] , [ 13 , 13 , this . anchorsLength , this . classesLength ] ) ) ;
156
-
157
- // from boxes with xy wh to x1,y1 x2,y2
158
- // xy:bounding box center wh:width/Height
159
- // Mainly for NMS + rescaling
160
- // x1 = x + (h/2)
161
- // y1 = y - (w/2)
162
- // x2 = x - (h/2)
163
- // y2 = y + (w/2)
164
-
165
- const boxxy1 = tf . div ( tf . add ( boxxy , this . ConvIndex ) , this . ConvDims ) ;
166
- const boxwh1 = tf . div ( tf . mul ( boxwh , this . AnchorsTensor ) , this . ConvDims ) ;
167
- const div = tf . div ( boxwh1 , tf . scalar ( 2 ) ) ;
168
- const boxMins = tf . sub ( boxxy1 , div ) ;
169
- const boxMaxes = tf . add ( boxxy1 , div ) ;
170
- const size = [ boxMins . shape [ 0 ] , boxMins . shape [ 1 ] , boxMins . shape [ 2 ] , 1 ] ;
171
- // main box tensor
172
- const finalboxes = tf . concat ( [
173
- boxMins . slice ( [ 0 , 0 , 0 , 1 ] , size ) ,
174
- boxMins . slice ( [ 0 , 0 , 0 , 0 ] , size ) ,
175
- boxMaxes . slice ( [ 0 , 0 , 0 , 1 ] , size ) ,
176
- boxMaxes . slice ( [ 0 , 0 , 0 , 0 ] , size ) ,
177
- ] , 3 ) . reshape ( [ 845 , 4 ] ) ;
178
-
179
- // Filterboxes by objectness threshold
180
- // not filtering / getting a mask really
181
- const boxConfidence1 = boxConfidence . squeeze ( [ 3 ] ) ;
182
- const objectnessMask = tf . greaterEqual ( boxConfidence1 , tf . scalar ( this . filterBoxesThreshold ) ) ;
183
-
184
- // Filterboxes by class probability threshold
185
- const boxScores1 = tf . mul ( boxConfidence1 , tf . max ( boxClassProbs , 3 ) ) ;
186
- const boxClassProbMask = tf . greaterEqual ( boxScores1 , tf . scalar ( this . classProbThreshold ) ) ;
187
-
188
- // getting classes indices
189
- const classes1 = tf . argMax ( boxClassProbs , - 1 ) ;
190
-
191
- // Final Mask each elem that survived both filters (0x0 0x1 1x0 = fail ) 1x1 = survived
192
- const finalMask = boxClassProbMask . mul ( objectnessMask ) ;
193
-
194
- const indices = finalMask . flatten ( ) . toInt ( ) . mul ( this . indicesTensor ) ;
195
- return [ finalboxes , boxScores1 , classes1 , indices ] ;
196
- } ) ;
125
+ const [ boxes , boxScores , classes , Indices ] = tf . tidy ( ( ) => this . split ( rawPrediction . squeeze ( [ 0 ] ) , this . anchorsTensor ) ) ;
126
+ // for the case of yolov3 there are 2 output tensors
127
+ // v3
128
+ // this.split(rawPrediction[0], this.AnchorsTensorL1);
129
+ // this.split(rawPrediction[1], this.AnchorsTensorL2);
197
130
198
131
// we started at one in the range so we remove 1 now
199
132
// this is where a major bottleneck happens
200
133
// this can be replaced with tf.boolean_mask() if tfjs team implements it
201
- // thisis also why wehave 2 tf.tidy()'s
134
+ // this is also why wehave 2 tf.tidy()'s
202
135
// more info : https://github.com/ModelDepot/tfjs-yolo-tiny/issues/6
203
136
204
137
const indicesArr = Array . from ( await Indices . data ( ) ) . filter ( i => i > 0 ) . map ( i => i - 1 ) ;
@@ -218,8 +151,11 @@ class YOLOBase {
218
151
// Image Rescale
219
152
const Height = tf . scalar ( this . imgHeight ) ;
220
153
const Width = tf . scalar ( this . imgWidth ) ;
221
- const ImageDims = tf . stack ( [ Height , Width , Height , Width ] ) . reshape ( [ 1 , 4 ] ) ;
154
+ // this for x1 y1 x2 y2
155
+ // const ImageDims = tf.stack([Height, Width, Height, Width]).reshape([1, 4]);
222
156
157
+ // this for x y w h
158
+ const ImageDims = tf . stack ( [ Width , Height , Width , Height ] ) . reshape ( [ 1 , 4 ] ) ;
223
159
const filteredBoxes2 = filteredBoxes1 . mul ( ImageDims ) ;
224
160
return [ filteredBoxes2 , filteredScores1 , filteredclasses1 ] ;
225
161
} ) ;
@@ -246,40 +182,96 @@ class YOLOBase {
246
182
let Push = true ;
247
183
for ( let i = 0 ; i < selectedBoxes . length ; i += 1 ) {
248
184
// Compare IoU of zipped[1], since that is the box coordinates arr
249
- const IOU = iou ( box [ 1 ] , selectedBoxes [ i ] [ 1 ] ) ;
185
+ // this a quick fix that can be done muck better the iou atm takes x1 y1 x2 y2 and we have xy wh
186
+ // this is dirty & needs to be directly edited in the iou func
187
+ // x1 = x - (w/2)
188
+ // y1 = y - (h/2)
189
+ // x2 = x + (w/2)
190
+ // y2 = y + (h/2)
191
+ const a = selectedBoxes [ i ] [ 1 ] ;
192
+ const b = box [ 1 ] ;
193
+ const box1 = [ b [ 0 ] - ( b [ 2 ] / 2 ) , b [ 1 ] - ( b [ 3 ] / 2 ) , b [ 0 ] + ( b [ 2 ] / 2 ) , b [ 1 ] + ( b [ 3 ] / 2 ) ] ;
194
+ const box2 = [ a [ 0 ] - ( a [ 2 ] / 2 ) , a [ 1 ] - ( a [ 3 ] / 2 ) , a [ 0 ] + ( a [ 2 ] / 2 ) , a [ 1 ] + ( a [ 3 ] / 2 ) ] ;
195
+ const IOU = iou ( box1 , box2 ) ;
250
196
if ( IOU > this . IOUThreshold ) {
251
197
Push = false ;
252
198
break ;
253
199
}
254
200
}
255
201
if ( Push ) selectedBoxes . push ( box ) ;
256
202
} ) ;
257
-
258
203
// final phase
259
204
const detections = [ ] ;
260
205
// add any output you want
261
206
for ( let id = 0 ; id < selectedBoxes . length ; id += 1 ) {
262
207
const classProb = selectedBoxes [ id ] [ 0 ] ;
263
- const classProbRounded = Math . round ( classProb * 1000 ) / 10 ;
264
208
const className = this . classNames [ selectedBoxes [ id ] [ 2 ] ] ;
209
+ const [ x , y , w , h ] = selectedBoxes [ id ] [ 1 ] ;
265
210
const classIndex = selectedBoxes [ id ] [ 2 ] ;
266
- const [ y1 , x1 , y2 , x2 ] = selectedBoxes [ id ] [ 1 ] ;
267
211
// TODO : add a hsla color for later visualization
268
212
const detection = {
269
213
id,
270
214
className,
271
- classIndex,
272
215
classProb,
273
- classProbRounded ,
274
- x1 ,
275
- y1 ,
276
- x2 ,
277
- y2 ,
216
+ classIndex ,
217
+ x ,
218
+ y ,
219
+ w ,
220
+ h ,
278
221
} ;
279
222
detections . push ( detection ) ;
280
223
}
281
224
return detections ;
282
225
}
226
+
227
+ split ( rawPrediction , AnchorsTensor ) {
228
+ const [ outputWidth , outputHeight ] = [ rawPrediction . shape [ 0 ] , rawPrediction . shape [ 1 ] ] ;
229
+ const reshaped = tf . reshape ( rawPrediction , [ outputWidth , outputHeight , this . anchorsLength , this . classesLength + 5 ] ) ;
230
+ // Box Coords
231
+ const boxxy = tf . sigmoid ( reshaped . slice ( [ 0 , 0 , 0 , 0 ] , [ outputWidth , outputHeight , this . anchorsLength , 2 ] ) ) ;
232
+ const boxwh = tf . exp ( reshaped . slice ( [ 0 , 0 , 0 , 2 ] , [ outputWidth , outputHeight , this . anchorsLength , 2 ] ) ) ;
233
+ // ObjectnessScore
234
+ const boxConfidence = tf . sigmoid ( reshaped . slice ( [ 0 , 0 , 0 , 4 ] , [ outputWidth , outputHeight , this . anchorsLength , 1 ] ) ) ;
235
+ // ClassProb
236
+ const boxClassProbs = tf . softmax ( reshaped . slice ( [ 0 , 0 , 0 , 5 ] , [ outputWidth , outputHeight , this . anchorsLength , this . classesLength ] ) ) ;
237
+
238
+ // this assumes that we have a square output tensor eg 13x13 // 26x26
239
+ // this is making an index map to add to the w y coordinates
240
+ // see this
241
+ let ConvIndex = tf . range ( 0 , outputWidth ) ;
242
+ const ConvHeightIndex = tf . tile ( ConvIndex , [ outputWidth ] ) ;
243
+ let ConvWidthindex = tf . tile ( tf . expandDims ( ConvIndex , 0 ) , [ outputWidth , 1 ] ) ;
244
+ ConvWidthindex = tf . transpose ( ConvWidthindex ) . flatten ( ) ;
245
+ ConvIndex = tf . transpose ( tf . stack ( [ ConvHeightIndex , ConvWidthindex ] ) ) ;
246
+
247
+ ConvIndex = tf . reshape ( ConvIndex , [ outputWidth , outputWidth , 1 , 2 ] ) ;
248
+ const ConvDims = tf . reshape ( tf . tensor1d ( [ outputWidth , outputWidth ] ) , [ 1 , 1 , 1 , 2 ] ) ;
249
+ // ConvIndex.print();
250
+ const boxxy1 = tf . div ( tf . add ( boxxy , ConvIndex ) , ConvDims ) ;
251
+ const boxwh1 = tf . div ( tf . mul ( boxwh , AnchorsTensor ) , ConvDims ) ;
252
+
253
+ // TODO : need to get the anchors size frome the input anchors tensor
254
+ const finalboxes = tf . concat ( [ boxxy1 , boxwh1 ] , 3 ) . reshape ( [ ( outputWidth * outputHeight * this . anchorsLength ) , 4 ] ) ;
255
+
256
+ // Filterboxes by objectness threshold
257
+ // not filtering / getting a mask really
258
+ const boxConfidence1 = boxConfidence . squeeze ( [ 3 ] ) ;
259
+ const objectnessMask = tf . greaterEqual ( boxConfidence1 , tf . scalar ( this . filterBoxesThreshold ) ) ;
260
+
261
+ // Filterboxes by class probability threshold
262
+ const boxScores1 = tf . mul ( boxConfidence1 , tf . max ( boxClassProbs , 3 ) ) ;
263
+ const boxClassProbMask = tf . greaterEqual ( boxScores1 , tf . scalar ( this . classProbThreshold ) ) ;
264
+
265
+ // getting classes indices
266
+ const classes1 = tf . argMax ( boxClassProbs , - 1 ) ;
267
+
268
+ // removed this from init as it semm to not be affecting perf very much
269
+ const indicesTensor = tf . range ( 1 , ( outputWidth * outputHeight * this . anchorsLength ) + 1 , 1 , 'int32' ) ;
270
+ // Final Mask each elem that survived both filters (0x0 0x1 1x0 = fail ) 1x1 = survived
271
+ const finalMask = boxClassProbMask . mul ( objectnessMask ) ;
272
+ const indices = finalMask . flatten ( ) . toInt ( ) . mul ( indicesTensor ) ;
273
+ return [ finalboxes , boxScores1 , classes1 , indices ] ;
274
+ }
283
275
}
284
276
285
277
const YOLO = options => new YOLOBase ( options ) ;
0 commit comments