Skip to content

Commit 95d31e6

Browse files
committed
yolov2 ready + fixed tests (i think)
1 parent b20f514 commit 95d31e6

File tree

2 files changed

+84
-91
lines changed

2 files changed

+84
-91
lines changed

src/YOLO/index.js

Lines changed: 82 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const DEFAULTS = {
1515
filterBoxesThreshold: 0.01,
1616
IOUThreshold: 0.4,
1717
classProbThreshold: 0.4,
18-
URL: 'https://raw.githubusercontent.com/ml5js/ml5-library/master/src/YOLO/model.json',
18+
URL: 'https://raw.githubusercontent.com/ml5js/ml5-data-and-models/master/models/YOLO/model.json',
1919
imageSize: 416,
2020
};
2121

@@ -41,31 +41,9 @@ class YOLOBase {
4141
}
4242

4343
init() {
44-
// indices tensor to filter the elements later on
45-
this.indicesTensor = tf.range(1, 846, 1, 'int32');
46-
47-
// Grid To Split the raw predictions : Assumes Our Model output is 1 Tensor with 13x13x425
48-
// gonna hard code all this stuff see if it works
49-
// this can be done once at the initial phase
50-
// TODO : make this more modular
51-
52-
[this.ConvIndex, this.ConvDims, this.AnchorsTensor] = tf.tidy(() => {
53-
let ConvIndex = tf.range(0, 13);
54-
const ConvHeightIndex = tf.tile(ConvIndex, [13]);
55-
56-
let ConvWidthindex = tf.tile(tf.expandDims(ConvIndex, 0), [13, 1]);
57-
ConvWidthindex = tf.transpose(ConvWidthindex).flatten();
58-
59-
ConvIndex = tf.transpose(tf.stack([ConvHeightIndex, ConvWidthindex]));
60-
ConvIndex = tf.reshape(ConvIndex, [13, 13, 1, 2]);
61-
62-
const ConvDims = tf.reshape(tf.tensor1d([13, 13]), [1, 1, 1, 2]);
63-
// AnchorsTensor
64-
const Aten = tf.tensor2d(this.anchors);
65-
const AnchorsTensor = tf.reshape(Aten, [1, 1, this.anchorsLength, 2]);
66-
67-
return [ConvIndex, ConvDims, AnchorsTensor];
68-
});
44+
const Aten = tf.tensor2d(this.anchors);
45+
this.anchorsTensor = tf.reshape(Aten, [1, 1, this.anchorsLength, 2]);
46+
Aten.dispose();
6947
}
7048

7149
/**
@@ -85,6 +63,7 @@ class YOLOBase {
8563
return results;
8664
}
8765

66+
8867
async loadModel() {
8968
try {
9069
this.model = await tf.loadModel(this.modelURL);
@@ -137,68 +116,22 @@ class YOLOBase {
137116
return batched;
138117
}
139118

140-
141119
/**
142120
* postProcessing for the yolo output
143121
* TODO : make this more modular in preperation for yolov3-tiny
144-
* @param rawPrediction a 4D tensor 13*13*425
122+
* @param rawPrediction a 4D tensor
145123
*/
146124
async postProcess(rawPrediction) {
147-
const [boxes, boxScores, classes, Indices] = tf.tidy(() => {
148-
const reshaped = tf.reshape(rawPrediction, [13, 13, this.anchorsLength, this.classesLength + 5]);
149-
// Box Coords
150-
const boxxy = tf.sigmoid(reshaped.slice([0, 0, 0, 0], [13, 13, this.anchorsLength, 2]));
151-
const boxwh = tf.exp(reshaped.slice([0, 0, 0, 2], [13, 13, this.anchorsLength, 2]));
152-
// ObjectnessScore
153-
const boxConfidence = tf.sigmoid(reshaped.slice([0, 0, 0, 4], [13, 13, this.anchorsLength, 1]));
154-
// ClassProb
155-
const boxClassProbs = tf.softmax(reshaped.slice([0, 0, 0, 5], [13, 13, this.anchorsLength, this.classesLength]));
156-
157-
// from boxes with xy wh to x1,y1 x2,y2
158-
// xy:bounding box center wh:width/Height
159-
// Mainly for NMS + rescaling
160-
// x1 = x + (h/2)
161-
// y1 = y - (w/2)
162-
// x2 = x - (h/2)
163-
// y2 = y + (w/2)
164-
165-
const boxxy1 = tf.div(tf.add(boxxy, this.ConvIndex), this.ConvDims);
166-
const boxwh1 = tf.div(tf.mul(boxwh, this.AnchorsTensor), this.ConvDims);
167-
const div = tf.div(boxwh1, tf.scalar(2));
168-
const boxMins = tf.sub(boxxy1, div);
169-
const boxMaxes = tf.add(boxxy1, div);
170-
const size = [boxMins.shape[0], boxMins.shape[1], boxMins.shape[2], 1];
171-
// main box tensor
172-
const finalboxes = tf.concat([
173-
boxMins.slice([0, 0, 0, 1], size),
174-
boxMins.slice([0, 0, 0, 0], size),
175-
boxMaxes.slice([0, 0, 0, 1], size),
176-
boxMaxes.slice([0, 0, 0, 0], size),
177-
], 3).reshape([845, 4]);
178-
179-
// Filterboxes by objectness threshold
180-
// not filtering / getting a mask really
181-
const boxConfidence1 = boxConfidence.squeeze([3]);
182-
const objectnessMask = tf.greaterEqual(boxConfidence1, tf.scalar(this.filterBoxesThreshold));
183-
184-
// Filterboxes by class probability threshold
185-
const boxScores1 = tf.mul(boxConfidence1, tf.max(boxClassProbs, 3));
186-
const boxClassProbMask = tf.greaterEqual(boxScores1, tf.scalar(this.classProbThreshold));
187-
188-
// getting classes indices
189-
const classes1 = tf.argMax(boxClassProbs, -1);
190-
191-
// Final Mask each elem that survived both filters (0x0 0x1 1x0 = fail ) 1x1 = survived
192-
const finalMask = boxClassProbMask.mul(objectnessMask);
193-
194-
const indices = finalMask.flatten().toInt().mul(this.indicesTensor);
195-
return [finalboxes, boxScores1, classes1, indices];
196-
});
125+
const [boxes, boxScores, classes, Indices] = tf.tidy(() => this.split(rawPrediction.squeeze([0]), this.anchorsTensor));
126+
// for the case of yolov3 there are 2 output tensors
127+
// v3
128+
// this.split(rawPrediction[0], this.AnchorsTensorL1);
129+
// this.split(rawPrediction[1], this.AnchorsTensorL2);
197130

198131
// we started at one in the range so we remove 1 now
199132
// this is where a major bottleneck happens
200133
// this can be replaced with tf.boolean_mask() if tfjs team implements it
201-
// thisis also why wehave 2 tf.tidy()'s
134+
// this is also why wehave 2 tf.tidy()'s
202135
// more info : https://github.com/ModelDepot/tfjs-yolo-tiny/issues/6
203136

204137
const indicesArr = Array.from(await Indices.data()).filter(i => i > 0).map(i => i - 1);
@@ -218,8 +151,11 @@ class YOLOBase {
218151
// Image Rescale
219152
const Height = tf.scalar(this.imgHeight);
220153
const Width = tf.scalar(this.imgWidth);
221-
const ImageDims = tf.stack([Height, Width, Height, Width]).reshape([1, 4]);
154+
// this for x1 y1 x2 y2
155+
// const ImageDims = tf.stack([Height, Width, Height, Width]).reshape([1, 4]);
222156

157+
// this for x y w h
158+
const ImageDims = tf.stack([Width, Height, Width, Height]).reshape([1, 4]);
223159
const filteredBoxes2 = filteredBoxes1.mul(ImageDims);
224160
return [filteredBoxes2, filteredScores1, filteredclasses1];
225161
});
@@ -246,40 +182,96 @@ class YOLOBase {
246182
let Push = true;
247183
for (let i = 0; i < selectedBoxes.length; i += 1) {
248184
// Compare IoU of zipped[1], since that is the box coordinates arr
249-
const IOU = iou(box[1], selectedBoxes[i][1]);
185+
// this a quick fix that can be done muck better the iou atm takes x1 y1 x2 y2 and we have xy wh
186+
// this is dirty & needs to be directly edited in the iou func
187+
// x1 = x - (w/2)
188+
// y1 = y - (h/2)
189+
// x2 = x + (w/2)
190+
// y2 = y + (h/2)
191+
const a = selectedBoxes[i][1];
192+
const b = box[1];
193+
const box1 = [b[0] - (b[2] / 2), b[1] - (b[3] / 2), b[0] + (b[2] / 2), b[1] + (b[3] / 2)];
194+
const box2 = [a[0] - (a[2] / 2), a[1] - (a[3] / 2), a[0] + (a[2] / 2), a[1] + (a[3] / 2)];
195+
const IOU = iou(box1, box2);
250196
if (IOU > this.IOUThreshold) {
251197
Push = false;
252198
break;
253199
}
254200
}
255201
if (Push) selectedBoxes.push(box);
256202
});
257-
258203
// final phase
259204
const detections = [];
260205
// add any output you want
261206
for (let id = 0; id < selectedBoxes.length; id += 1) {
262207
const classProb = selectedBoxes[id][0];
263-
const classProbRounded = Math.round(classProb * 1000) / 10;
264208
const className = this.classNames[selectedBoxes[id][2]];
209+
const [x, y, w, h] = selectedBoxes[id][1];
265210
const classIndex = selectedBoxes[id][2];
266-
const [y1, x1, y2, x2] = selectedBoxes[id][1];
267211
// TODO : add a hsla color for later visualization
268212
const detection = {
269213
id,
270214
className,
271-
classIndex,
272215
classProb,
273-
classProbRounded,
274-
x1,
275-
y1,
276-
x2,
277-
y2,
216+
classIndex,
217+
x,
218+
y,
219+
w,
220+
h,
278221
};
279222
detections.push(detection);
280223
}
281224
return detections;
282225
}
226+
227+
split(rawPrediction, AnchorsTensor) {
228+
const [outputWidth, outputHeight] = [rawPrediction.shape[0], rawPrediction.shape[1]];
229+
const reshaped = tf.reshape(rawPrediction, [outputWidth, outputHeight, this.anchorsLength, this.classesLength + 5]);
230+
// Box Coords
231+
const boxxy = tf.sigmoid(reshaped.slice([0, 0, 0, 0], [outputWidth, outputHeight, this.anchorsLength, 2]));
232+
const boxwh = tf.exp(reshaped.slice([0, 0, 0, 2], [outputWidth, outputHeight, this.anchorsLength, 2]));
233+
// ObjectnessScore
234+
const boxConfidence = tf.sigmoid(reshaped.slice([0, 0, 0, 4], [outputWidth, outputHeight, this.anchorsLength, 1]));
235+
// ClassProb
236+
const boxClassProbs = tf.softmax(reshaped.slice([0, 0, 0, 5], [outputWidth, outputHeight, this.anchorsLength, this.classesLength]));
237+
238+
// this assumes that we have a square output tensor eg 13x13 // 26x26
239+
// this is making an index map to add to the w y coordinates
240+
// see this
241+
let ConvIndex = tf.range(0, outputWidth);
242+
const ConvHeightIndex = tf.tile(ConvIndex, [outputWidth]);
243+
let ConvWidthindex = tf.tile(tf.expandDims(ConvIndex, 0), [outputWidth, 1]);
244+
ConvWidthindex = tf.transpose(ConvWidthindex).flatten();
245+
ConvIndex = tf.transpose(tf.stack([ConvHeightIndex, ConvWidthindex]));
246+
247+
ConvIndex = tf.reshape(ConvIndex, [outputWidth, outputWidth, 1, 2]);
248+
const ConvDims = tf.reshape(tf.tensor1d([outputWidth, outputWidth]), [1, 1, 1, 2]);
249+
// ConvIndex.print();
250+
const boxxy1 = tf.div(tf.add(boxxy, ConvIndex), ConvDims);
251+
const boxwh1 = tf.div(tf.mul(boxwh, AnchorsTensor), ConvDims);
252+
253+
// TODO : need to get the anchors size frome the input anchors tensor
254+
const finalboxes = tf.concat([boxxy1, boxwh1], 3).reshape([(outputWidth * outputHeight * this.anchorsLength), 4]);
255+
256+
// Filterboxes by objectness threshold
257+
// not filtering / getting a mask really
258+
const boxConfidence1 = boxConfidence.squeeze([3]);
259+
const objectnessMask = tf.greaterEqual(boxConfidence1, tf.scalar(this.filterBoxesThreshold));
260+
261+
// Filterboxes by class probability threshold
262+
const boxScores1 = tf.mul(boxConfidence1, tf.max(boxClassProbs, 3));
263+
const boxClassProbMask = tf.greaterEqual(boxScores1, tf.scalar(this.classProbThreshold));
264+
265+
// getting classes indices
266+
const classes1 = tf.argMax(boxClassProbs, -1);
267+
268+
// removed this from init as it semm to not be affecting perf very much
269+
const indicesTensor = tf.range(1, (outputWidth * outputHeight * this.anchorsLength) + 1, 1, 'int32');
270+
// Final Mask each elem that survived both filters (0x0 0x1 1x0 = fail ) 1x1 = survived
271+
const finalMask = boxClassProbMask.mul(objectnessMask);
272+
const indices = finalMask.flatten().toInt().mul(indicesTensor);
273+
return [finalboxes, boxScores1, classes1, indices];
274+
}
283275
}
284276

285277
const YOLO = options => new YOLOBase(options);

src/YOLO/index_test.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ describe('YOLO', () => {
2525

2626
beforeEach(async () => {
2727
jasmine.DEFAULT_TIMEOUT_INTERVAL = 100000;
28-
yolo = new ml5.YOLO();
28+
yolo = ml5.YOLO();
29+
await yolo.loadModel();
2930
});
3031

3132
it('instantiates the YOLO classifier with defaults', () => {

0 commit comments

Comments
 (0)