@@ -17,13 +17,22 @@ dffml_model_tensorflow
1717 pip install dffml-model-tensorflow
1818
1919
20+ .. note ::
21+
22+ It's important to keep the hidden layer config and feature config the same
23+ across invocations of train, predict, and accuracy methods.
24+
25+ Models are saved under the ``directory `` parameter in subdirectories named
26+ after the hash of their feature names and hidden layer config. Which means
27+ if any of those parameters change between invocations, it's being told to
28+ look for a different saved model.
29+
2030tfdnnc
2131~~~~~~
2232
2333*Core *
2434
25- Implemented using Tensorflow's DNNClassifier. Models are saved under the
26- ``directory `` in subdirectories named after the hash of their feature names.
35+ Implemented using Tensorflow's DNNClassifier.
2736
2837.. code-block :: console
2938
@@ -33,49 +42,49 @@ Implemented using Tensorflow's DNNClassifier. Models are saved under the
3342 $ sed -i 's/.*setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' *.csv
3443 $ head iris_training.csv
3544 $ dffml train \
36- -model tfdnnc \
37- -model-epochs 3000 \
38- -model-steps 20000 \
39- -model-classification classification \
40- -model-classifications 0 1 2 \
41- -model-clstype int \
42- -sources iris=csv \
43- -source-filename iris_training.csv \
44- -features \
45- def:SepalLength:float:1 \
46- def:SepalWidth:float:1 \
47- def:PetalLength:float:1 \
48- def:PetalWidth:float:1 \
49- -log debug
45+ -model tfdnnc \
46+ -model-epochs 3000 \
47+ -model-steps 20000 \
48+ -model-classification classification \
49+ -model-classifications 0 1 2 \
50+ -model-clstype int \
51+ -sources iris=csv \
52+ -source-filename iris_training.csv \
53+ -features \
54+ def:SepalLength:float:1 \
55+ def:SepalWidth:float:1 \
56+ def:PetalLength:float:1 \
57+ def:PetalWidth:float:1 \
58+ -log debug
5059 ... lots of output ...
5160 $ dffml accuracy \
52- -model tfdnnc \
53- -model-classification classification \
54- -model-classifications 0 1 2 \
55- -model-clstype int \
56- -sources iris=csv \
57- -source-filename iris_test.csv \
58- -features \
59- def:SepalLength:float:1 \
60- def:SepalWidth:float:1 \
61- def:PetalLength:float:1 \
62- def:PetalWidth:float:1 \
63- -log critical
61+ -model tfdnnc \
62+ -model-classification classification \
63+ -model-classifications 0 1 2 \
64+ -model-clstype int \
65+ -sources iris=csv \
66+ -source-filename iris_test.csv \
67+ -features \
68+ def:SepalLength:float:1 \
69+ def:SepalWidth:float:1 \
70+ def:PetalLength:float:1 \
71+ def:PetalWidth:float:1 \
72+ -log critical
6473 0.99996233782
6574 $ dffml predict all \
66- -model tfdnnc \
67- -model-classification classification \
68- -model-classifications 0 1 2 \
69- -model-clstype int \
70- -sources iris=csv \
71- -source-filename iris_test.csv \
72- -features \
73- def:SepalLength:float:1 \
74- def:SepalWidth:float:1 \
75- def:PetalLength:float:1 \
76- def:PetalWidth:float:1 \
77- -caching \
78- -log critical \
75+ -model tfdnnc \
76+ -model-classification classification \
77+ -model-classifications 0 1 2 \
78+ -model-clstype int \
79+ -sources iris=csv \
80+ -source-filename iris_test.csv \
81+ -features \
82+ def:SepalLength:float:1 \
83+ def:SepalWidth:float:1 \
84+ def:PetalLength:float:1 \
85+ def:PetalWidth:float:1 \
86+ -caching \
87+ -log critical \
7988 > results.json
8089 $ head -n 33 results.json
8190 [
@@ -147,6 +156,124 @@ Implemented using Tensorflow's DNNClassifier. Models are saved under the
147156 - default: <class 'str'>
148157 - Data type of classifications values (default: str)
149158
159+ tfdnnr
160+ ~~~~~~
161+
162+ *Core *
163+
164+ Implemented using Tensorflow's DNNEstimator.
165+
166+ Usage:
167+
168+ * predict: Name of the feature we are trying to predict or using for training.
169+
170+ Generating train and test data
171+
172+ * This creates files `train.csv ` and `test.csv `,
173+ make sure to take a BACKUP of files with same name in the directory
174+ from where this command is run as it overwrites any existing files.
175+
176+ .. code-block :: console
177+
178+ $ cat > train.csv << EOF
179+ Feature1,Feature2,TARGET
180+ 0.93,0.68,3.89
181+ 0.24,0.42,1.75
182+ 0.36,0.68,2.75
183+ 0.53,0.31,2.00
184+ 0.29,0.25,1.32
185+ 0.29,0.52,2.14
186+ EOF
187+ $ cat > test.csv << EOF
188+ Feature1,Feature2,TARGET
189+ 0.57,0.84,3.65
190+ 0.95,0.19,2.46
191+ 0.23,0.15,0.93
192+ EOF
193+ $ dffml train \
194+ -model tfdnnr \
195+ -model-epochs 300 \
196+ -model-steps 2000 \
197+ -model-predict TARGET \
198+ -model-hidden 8 16 8 \
199+ -sources s=csv \
200+ -source-readonly \
201+ -source-filename train.csv \
202+ -features \
203+ def:Feature1:float:1 \
204+ def:Feature2:float:1 \
205+ -log debug
206+ Enabling debug log shows tensorflow losses...
207+ $ dffml accuracy \
208+ -model tfdnnr \
209+ -model-predict TARGET \
210+ -model-hidden 8 16 8 \
211+ -sources s=csv \
212+ -source-readonly \
213+ -source-filename test.csv \
214+ -features \
215+ def:Feature1:float:1 \
216+ def:Feature2:float:1 \
217+ -log critical
218+ 0.9468210011
219+ $ echo -e 'Feature1,Feature2,TARGET\n0.21,0.18,0.84\n' | \
220+ dffml predict all \
221+ -model tfdnnr \
222+ -model-predict TARGET \
223+ -model-hidden 8 16 8 \
224+ -sources s=csv \
225+ -source-readonly \
226+ -source-filename /dev/stdin \
227+ -features \
228+ def:Feature1:float:1 \
229+ def:Feature2:float:1 \
230+ -log critical
231+ [
232+ {
233+ "extra": {},
234+ "features": {
235+ "Feature1": 0.21,
236+ "Feature2": 0.18,
237+ "TARGET": 0.84
238+ },
239+ "last_updated": "2019-10-24T15:26:41Z",
240+ "prediction": {
241+ "confidence": NaN,
242+ "value": 1.1983429193496704
243+ },
244+ "src_url": 0
245+ }
246+ ]
247+
248+ The ``NaN `` in ``confidence `` is the expected behaviour. (See TODO in
249+ predict).
250+
251+ **Args **
252+
253+ - directory: String
254+
255+ - default: /home/user/.cache/dffml/tensorflow
256+ - Directory where state should be saved
257+
258+ - steps: Integer
259+
260+ - default: 3000
261+ - Number of steps to train the model
262+
263+ - epochs: Integer
264+
265+ - default: 30
266+ - Number of iterations to pass over all repos in a source
267+
268+ - hidden: List of integers
269+
270+ - default: [12, 40, 15]
271+ - List length is the number of hidden layers in the network. Each entry in the list is the number of nodes in that hidden layer
272+
273+ - predict: String
274+
275+ - Feature name holding truth value
276+
150277dffml_model_scratch
151278-------------------
152279
0 commit comments