-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataprep.js
More file actions
87 lines (71 loc) · 2.99 KB
/
dataprep.js
File metadata and controls
87 lines (71 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
// This program is highly inspired by the Doodle Classifier made by
// Daniel Shiffman in "The coding train" as part of his toy neural
// network library. This is the Tensorflow version of his work with
// some tweaks and modifications.
// https://github.com/CodingTrain/Toy-Neural-Network-JS/tree/master/examples/doodle_classification
// This function create the xs and ys tensors from the data tables
function prepareData(data, labels){
// Local arrays that will contain the data to be converted to tensors
let doodles = [];
let labelsData = [];
// TODO: Return something that's less cancerous
let returnedTensors = [];
// For all doodles
for (let i = 0; i < data.length; i++){
// Let's create a pixel array to store each of them
let pixels = [];
// For all pixels in the Doodle
for (let j = 0; j < dataLength; j++){
// Let's add the pixel to the pixel array and normalize it (/255) to
// get a value between 0 and 1.
pixels[j] = data[i][j] / 255;
}
// Add the pixels of this doodle to the doodle array
doodles[i] = pixels;
// Let's look what's the doodleLabelList index of the label of this doodle
let doodleIndex = doodleLabelList.indexOf(labels[i]);
// Add this index to the labels array
labelsData[i] = doodleIndex;
}
// Let's create the xs tensor with the doodles array
xs = tf.tensor2d(doodles);
// We need to use the "oneHot" function of tensorflow to process the labels
let labelsTensor = tf.tensor1d(labelsData, 'int32');
ys = tf.oneHot(labelsTensor, doodleLabelList.length).cast('float32');
// Remove the labelsTensor (which was temporary) to avoid dataleaks
labelsTensor.dispose();
// Let's set the sourceXs and Ys for what we just created
returnedTensors[0] = xs;
returnedTensors[1] = ys;
return returnedTensors;
}
// Concatenates all the training and testing labels/data
function initializeData(){
// Log progress
console.log("Initializing data");
// For all doodles
for(let i = 0; i < doodleLabelList.length; i++){
// Create a dataObject for that doodle
dataObjectsArray[i] = new dataObject(doodleLabelList[i]);
dataObjectsArray[i].loadBytesData();
dataObjectsArray[i].splitData();
}
// Let's merge all the arrays of the data objects into single ones
// For all data objects
for(let i = 0; i < dataObjectsArray.length; i++){
// Concatenate the training labels
training_labels = training_labels.
concat(dataObjectsArray[i].getTrainingLabels());
// Same for the testing labels
testing_labels = testing_labels.
concat(dataObjectsArray[i].getTestingLabels());
// Same for the training data
training_data = training_data.
concat(dataObjectsArray[i].getTrainingData());
// Same for the testing data
testing_data = testing_data.
concat(dataObjectsArray[i].getTestingData());
}
// Log progress
console.log("Done");
}