docs: model: tensorflow: Python code for DNNClassifier

sk-ip · pdxjohnny · commit 38c09317ad72 · 2020-03-05T14:58:35.000-08:00
Related: #433
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Create a fresh archive of the git repo for release instead of cleaning
   existing repo with `git clean` for development service release command.
 - Simplified SLR tests for scratch model
+- Test tensorflow DNNClassifier documentation exaples in CI
 
 ## [0.3.4] - 2020-02-28
 ### Added
diff --git a/docs/plugins/dffml_model.rst b/docs/plugins/dffml_model.rst
@@ -34,59 +34,34 @@ tfdnnc
 
 Implemented using Tensorflow's DNNClassifier.
 
-.. code-block:: console
+First we create the training and testing datasets
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/train_data.sh
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/test_data.sh
+
+Train the model
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/train.sh
+
+Assess the accuracy
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/accuracy.sh
+
+Output
+
+.. code-block::
 
-    $ wget http://download.tensorflow.org/data/iris_training.csv
-    $ wget http://download.tensorflow.org/data/iris_test.csv
-    $ head iris_training.csv
-    $ sed -i 's/.*setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' *.csv
-    $ head iris_training.csv
-    $ dffml train \
-        -model tfdnnc \
-        -model-epochs 3000 \
-        -model-steps 20000 \
-        -model-predict classification:int:1 \
-        -model-classifications 0 1 2 \
-        -model-clstype int \
-        -sources iris=csv \
-        -source-filename iris_training.csv \
-        -model-features \
-          SepalLength:float:1 \
-          SepalWidth:float:1 \
-          PetalLength:float:1 \
-          PetalWidth:float:1 \
-        -log debug
-    ... lots of output ...
-    $ dffml accuracy \
-        -model tfdnnc \
-        -model-predict classification:int:1 \
-        -model-classifications 0 1 2 \
-        -model-clstype int \
-        -sources iris=csv \
-        -source-filename iris_test.csv \
-        -model-features \
-          SepalLength:float:1 \
-          SepalWidth:float:1 \
-          PetalLength:float:1 \
-          PetalWidth:float:1 \
-        -log critical
     0.99996233782
-    $ dffml predict all \
-        -model tfdnnc \
-        -model-predict classification:int:1 \
-        -model-classifications 0 1 2 \
-        -model-clstype int \
-        -sources iris=csv \
-        -source-filename iris_test.csv \
-        -model-features \
-          SepalLength:float:1 \
-          SepalWidth:float:1 \
-          PetalLength:float:1 \
-          PetalWidth:float:1 \
-        -caching \
-        -log critical \
-      > results.json
-    $ head -n 33 results.json
+
+Make a prediction
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/predict.sh
+
+Output
+
+.. code-block:: json
+
     [
         {
             "extra": {},
@@ -107,25 +82,11 @@ Implemented using Tensorflow's DNNClassifier.
             },
             "key": "0"
         },
-        {
-            "extra": {},
-            "features": {
-                "PetalLength": 5.4,
-                "PetalWidth": 2.1,
-                "SepalLength": 6.9,
-                "SepalWidth": 3.1,
-                "classification": 2
-            },
-            "last_updated": "2019-07-31T02:00:12Z",
-            "prediction": {
-                "classification":
-                {
-                    "confidence": 0.9999984502792358,
-                    "value": 2
-                }
-            },
-            "key": "1"
-        },
+    ]
+
+Example usage of Tensorflow DNNClassifier model using python API
+
+.. literalinclude:: /../model/tensorflow/examples/tfdnnc/tfdnnc.py
 
 **Args**
 
diff --git a/model/tensorflow/examples/tfdnnc/__init__.py b/model/tensorflow/examples/tfdnnc/__init__.py
diff --git a/model/tensorflow/examples/tfdnnc/accuracy.sh b/model/tensorflow/examples/tfdnnc/accuracy.sh
@@ -0,0 +1,13 @@
+dffml accuracy \
+  -model tfdnnc \
+  -model-predict classification:int:1 \
+  -model-classifications 0 1 2 \
+  -model-clstype int \
+  -sources iris=csv \
+  -source-filename iris_test.csv \
+  -model-features \
+    SepalLength:float:1 \
+    SepalWidth:float:1 \
+    PetalLength:float:1 \
+    PetalWidth:float:1 \
+  -log critical
diff --git a/model/tensorflow/examples/tfdnnc/predict.sh b/model/tensorflow/examples/tfdnnc/predict.sh
@@ -0,0 +1,13 @@
+echo -e 'SepalLength,SepalWidth,PetalLength,PetalWidth\n5.9,3.0,4.2,1.5\n' | \
+dffml predict all \
+  -model tfdnnc \
+  -model-predict classification:int:1 \
+  -model-classifications 0 1 2 \
+  -model-clstype int \
+  -sources iris=csv \
+  -model-features \
+    SepalLength:float:1 \
+    SepalWidth:float:1 \
+    PetalLength:float:1 \
+    PetalWidth:float:1 \
+  -source-filename /dev/stdin
diff --git a/model/tensorflow/examples/tfdnnc/test_data.sh b/model/tensorflow/examples/tfdnnc/test_data.sh
@@ -0,0 +1,2 @@
+wget http://download.tensorflow.org/data/iris_test.csv
+sed -i 's/.*setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' *.csv
diff --git a/model/tensorflow/examples/tfdnnc/test_tfdnnc.py b/model/tensorflow/examples/tfdnnc/test_tfdnnc.py
@@ -0,0 +1,66 @@
+import os
+import ast
+import sys
+import json
+import tempfile
+import contextlib
+import subprocess
+import unittest.mock
+
+from dffml.util.os import chdir
+
+
+def sh_filepath(filename):
+    return os.path.join(os.path.dirname(__file__), filename)
+
+
+@contextlib.contextmanager
+def directory_with_csv_files():
+    with tempfile.TemporaryDirectory() as tempdir:
+        with chdir(tempdir):
+            subprocess.check_output(["bash", sh_filepath("train_data.sh")])
+            subprocess.check_output(["bash", sh_filepath("test_data.sh")])
+            yield tempdir
+
+
+class TestExample(unittest.TestCase):
+    def python_test(self, filename):
+        # Path to target file
+        filepath = os.path.join(os.path.dirname(__file__), filename)
+        # Capture output
+        stdout = subprocess.check_output([sys.executable, filepath])
+        lines = stdout.decode().split("\n")
+        # Check the Accuracy
+        self.assertIn("Accuracy: 0.9", lines[0])
+        # Check the classification
+        self.assertEqual(
+            round(ast.literal_eval(lines[1])["classification"]), 1
+        )
+        self.assertEqual(
+            round(ast.literal_eval(lines[2])["classification"]), 2
+        )
+
+    def test_python_filenames(self):
+        with directory_with_csv_files() as tempdir:
+            self.python_test("tfdnnc.py")
+
+    def test_shell(self):
+        with directory_with_csv_files() as tempdir:
+            # Run training
+            subprocess.check_output(["bash", sh_filepath("train.sh")])
+            # Check the Accuracy
+            stdout = subprocess.check_output(
+                ["bash", sh_filepath("accuracy.sh")]
+            )
+            self.assertAlmostEqual(
+                float(stdout.decode().strip()), 0.9, places=0
+            )
+            # Make the prediction
+            stdout = subprocess.check_output(
+                ["bash", sh_filepath("predict.sh")]
+            )
+            records = json.loads(stdout.decode())
+            # Check the classification
+            self.assertAlmostEqual(
+                round(records[0]["prediction"]["classification"]["value"]), 1
+            )
diff --git a/model/tensorflow/examples/tfdnnc/tfdnnc.py b/model/tensorflow/examples/tfdnnc/tfdnnc.py
@@ -0,0 +1,42 @@
+from dffml import CSVSource, Features, DefFeature
+from dffml.noasync import train, accuracy, predict
+from dffml_model_tensorflow.dnnc import DNNClassifierModel
+
+model = DNNClassifierModel(
+    features=Features(
+        DefFeature("SepalLength", float, 1),
+        DefFeature("SepalWidth", float, 1),
+        DefFeature("PetalLength", float, 1),
+        DefFeature("PetalWidth", float, 1),
+    ),
+    predict=DefFeature("classification", int, 1),
+    epochs=3000,
+    steps=20000,
+    classifications=[0, 1, 2],
+    clstype=int,
+)
+
+# Train the model
+train(model, "iris_training.csv")
+
+# Assess accuracy (alternate way of specifying data source)
+print("Accuracy:", accuracy(model, CSVSource(filename="iris_test.csv")))
+
+# Make prediction
+for i, features, prediction in predict(
+    model,
+    {
+        "PetalLength": 4.2,
+        "PetalWidth": 1.5,
+        "SepalLength": 5.9,
+        "SepalWidth": 3.0,
+    },
+    {
+        "PetalLength": 5.4,
+        "PetalWidth": 2.1,
+        "SepalLength": 6.9,
+        "SepalWidth": 3.1,
+    },
+):
+    features["classification"] = prediction["classification"]["value"]
+    print(features)
diff --git a/model/tensorflow/examples/tfdnnc/train.sh b/model/tensorflow/examples/tfdnnc/train.sh
@@ -0,0 +1,15 @@
+dffml train \
+  -model tfdnnc \
+  -model-epochs 3000 \
+  -model-steps 20000 \
+  -model-predict classification:int:1 \
+  -model-classifications 0 1 2 \
+  -model-clstype int \
+  -sources iris=csv \
+  -source-filename iris_training.csv \
+  -model-features \
+    SepalLength:float:1 \
+    SepalWidth:float:1 \
+    PetalLength:float:1 \
+    PetalWidth:float:1 \
+  -log debug
diff --git a/model/tensorflow/examples/tfdnnc/train_data.sh b/model/tensorflow/examples/tfdnnc/train_data.sh
@@ -0,0 +1,2 @@
+wget http://download.tensorflow.org/data/iris_training.csv
+sed -i 's/.*setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' *.csv

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+wget http://download.tensorflow.org/data/iris_test.csv`
	`2`	`+sed -i 's/.setosa,versicolor,virginica/SepalLength,SepalWidth,PetalLength,PetalWidth,classification/g' .csv`