intel
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/plugins/dffml_model.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/plugins/dffml_model.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎model/scratch/dffml_model_scratch/slr.py‎
Lines changed: 112 additions & 150 deletions b/‎model/scratch/dffml_model_scratch/slr.py‎
Lines changed: 112 additions & 150 deletions
@@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Test scikit LR documentation examples in CI
 - Create a fresh archive of the git repo for release instead of cleaning
   existing repo with `git clean` for development service release command.
+- Simplified SLR tests for scratch model
 
 ## [0.3.4] - 2020-02-28
 ### Added
 
@@ -548,9 +548,9 @@ hash of their feature names.
 
   - Features to train on
 
-- directory: String
+- directory: Path
 
-  - default: /home/user/.cache/dffml/scratch
+  - default: ~/.cache/dffml/scratch
   - Directory where state should be saved
 
 dffml_model_scikit
 
@@ -1,93 +1,140 @@
-# SPDX-License-Identifier: MIT
-# Copyright (c) 2019 Intel Corporation
-"""
-Description of what this model does
-"""
-import os
-import json
-import hashlib
+import pathlib
 from typing import AsyncIterator, Tuple, Any
 
 import numpy as np
 
-from dffml.record import Record
-from dffml.base import config, field
-from dffml.source.source import Sources
-from dffml.model.accuracy import Accuracy
-from dffml.model.model import ModelContext, Model, ModelNotTrained
-from dffml.util.entrypoint import entrypoint
-from dffml.feature.feature import Feature, Features
+from dffml import (
+    config,
+    field,
+    entrypoint,
+    SimpleModel,
+    ModelNotTrained,
+    Accuracy,
+    Feature,
+    Features,
+    Sources,
+    Record,
+)
 
 
 @config
 class SLRConfig:
     predict: Feature = field("Label or the value to be predicted")
     features: Features = field("Features to train on")
-    directory: str = field(
+    directory: pathlib.Path = field(
         "Directory where state should be saved",
-        default=os.path.join(
-            os.path.expanduser("~"), ".cache", "dffml", "scratch"
-        ),
+        default=pathlib.Path("~", ".cache", "dffml", "scratch"),
     )
 
 
-class SLRContext(ModelContext):
-    def __init__(self, parent):
-        super().__init__(parent)
+@entrypoint("scratchslr")
+class SLR(SimpleModel):
+    r"""
+    Simple Linear Regression Model for 2 variables implemented from scratch.
+    Models are saved under the ``directory`` in subdirectories named after the
+    hash of their feature names.
+
+    .. code-block:: console
+
+        $ cat > dataset.csv << EOF
+        Years,Salary
+        1,40
+        2,50
+        3,60
+        4,70
+        5,80
+        EOF
+        $ dffml train \
+            -model scratchslr \
+            -model-features Years:int:1 \
+            -model-predict Salary:float:1 \
+            -sources f=csv \
+            -source-filename dataset.csv \
+            -log debug
+        $ dffml accuracy \
+            -model scratchslr \
+            -model-features Years:int:1 \
+            -model-predict Salary:float:1 \
+            -sources f=csv \
+            -source-filename dataset.csv \
+            -log debug
+        1.0
+        $ echo -e 'Years,Salary\n6,0\n' | \
+          dffml predict all \
+            -model scratchslr \
+            -model-features Years:int:1 \
+            -model-predict Salary:float:1 \
+            -sources f=csv \
+            -source-filename /dev/stdin \
+            -log debug
+        [
+            {
+                "extra": {},
+                "features": {
+                    "Salary": 0,
+                    "Years": 6
+                },
+                "last_updated": "2019-07-19T09:46:45Z",
+                "prediction": {
+                    "Salary": {
+                        "confidence": 1.0,
+                        "value": 90.0
+                    }
+                },
+                "key": "0"
+            }
+        ]
+
+    """
+
+    # The configuration class needs to be set as the CONFIG property
+    CONFIG = SLRConfig
+    # Simple Linear Regression only supports training on a single feature
+    NUM_SUPPORTED_FEATURES = 1
+    # We only support single dimensional values, non-matrix / array
+    SUPPORTED_LENGTHS = [1]
+
+    def __init__(self, config):
+        super().__init__(config)
         self.xData = np.array([])
         self.yData = np.array([])
-        self.features = self.applicable_features(self.parent.config.features)
-        self._features_hash_ = hashlib.sha384(
-            ("".join(sorted(self.features))).encode()
-        ).hexdigest()
 
     @property
     def regression_line(self):
-        return self.parent.saved.get(self._features_hash_, None)
+        """
+        Load regression_line from disk, if it hasn't been set yet, return None
+        """
+        return self.storage.get("regression_line", None)
 
     @regression_line.setter
     def regression_line(self, rline):
-        self.parent.saved[self._features_hash_] = rline
-
-    def applicable_features(self, features):
-        usable = []
-        if len(features) != 1:
-            raise ValueError(
-                "Simple Linear Regression doesn't support features other than 1"
-            )
-        for feature in features:
-            if feature.dtype() != int and feature.dtype() != float:
-                raise ValueError(
-                    "Simple Linear Regression only supports int or float feature"
-                )
-            if feature.length() != 1:
-                raise ValueError(
-                    "Simple LR only supports single values (non-matrix / array)"
-                )
-            usable.append(feature.NAME)
-        return sorted(usable)
-
-    async def predict_input(self, x):
+        """
+        Set regression_line in self.storage so it will be saved to disk
+        """
+        self.storage["regression_line"] = rline
+
+    def predict_input(self, x):
+        """
+        Use the regression line to make a prediction by returning ``m * x + b``.
+        """
         prediction = self.regression_line[0] * x + self.regression_line[1]
         self.logger.debug(
             "Predicted Value of {} {}:".format(
-                self.parent.config.predict.NAME, prediction
+                self.config.predict.NAME, prediction
             )
         )
         return prediction
 
-    async def squared_error(self, ys, yline):
+    def squared_error(self, ys, yline):
         return sum((ys - yline) ** 2)
 
-    async def coeff_of_deter(self, ys, regression_line):
+    def coeff_of_deter(self, ys, regression_line):
         y_mean_line = [np.mean(ys) for y in ys]
-        squared_error_mean = await self.squared_error(ys, y_mean_line)
-        squared_error_regression = await self.squared_error(
-            ys, regression_line
-        )
+        squared_error_mean = self.squared_error(ys, y_mean_line)
+        squared_error_regression = self.squared_error(ys, regression_line)
         return 1 - (squared_error_regression / squared_error_mean)
 
-    async def best_fit_line(self):
+    def best_fit_line(self):
         self.logger.debug(
             "Number of input records: {}".format(len(self.xData))
         )
@@ -100,23 +147,24 @@ async def best_fit_line(self):
         )
         b = mean_y - (m * mean_x)
         regression_line = [m * x + b for x in x]
-        accuracy = await self.coeff_of_deter(y, regression_line)
+        accuracy = self.coeff_of_deter(y, regression_line)
         return (m, b, accuracy)
 
     async def train(self, sources: Sources):
         async for record in sources.with_features(
-            self.features + [self.parent.config.predict.NAME]
+            self.features + [self.config.predict.NAME]
         ):
             feature_data = record.features(
-                self.features + [self.parent.config.predict.NAME]
+                self.features + [self.config.predict.NAME]
             )
             self.xData = np.append(self.xData, feature_data[self.features[0]])
             self.yData = np.append(
-                self.yData, feature_data[self.parent.config.predict.NAME]
+                self.yData, feature_data[self.config.predict.NAME]
             )
-        self.regression_line = await self.best_fit_line()
+        self.regression_line = self.best_fit_line()
 
     async def accuracy(self, sources: Sources) -> Accuracy:
+        # Ensure the model has been trained before we try to make a prediction
         if self.regression_line is None:
             raise ModelNotTrained("Train model before assessing for accuracy.")
         accuracy_value = self.regression_line[2]
@@ -125,101 +173,15 @@ async def accuracy(self, sources: Sources) -> Accuracy:
     async def predict(
         self, records: AsyncIterator[Record]
     ) -> AsyncIterator[Tuple[Record, Any, float]]:
+        # Ensure the model has been trained before we try to make a prediction
         if self.regression_line is None:
             raise ModelNotTrained("Train model before prediction.")
-        target = self.parent.config.predict.NAME
+        target = self.config.predict.NAME
         async for record in records:
             feature_data = record.features(self.features)
             record.predicted(
                 target,
-                await self.predict_input(feature_data[self.features[0]]),
+                self.predict_input(feature_data[self.features[0]]),
                 self.regression_line[2],
             )
             yield record
-
-
-@entrypoint("slr")
-class SLR(Model):
-    """
-    Simple Linear Regression Model for 2 variables implemented from scratch.
-    Models are saved under the ``directory`` in subdirectories named after the
-    hash of their feature names.
-
-    .. code-block:: console
-
-        $ cat > dataset.csv << EOF
-        Years,Salary
-        1,40
-        2,50
-        3,60
-        4,70
-        5,80
-        EOF
-        $ dffml train \\
-            -model scratchslr \\
-            -model-features Years:int:1 \\
-            -model-predict Salary:float:1 \\
-            -sources f=csv \\
-            -source-filename dataset.csv \\
-            -log debug
-        $ dffml accuracy \\
-            -model scratchslr \\
-            -model-features Years:int:1 \\
-            -model-predict Salary:float:1 \\
-            -sources f=csv \\
-            -source-filename dataset.csv \\
-            -log debug
-        1.0
-        $ echo -e 'Years,Salary\\n6,0\\n' | \\
-          dffml predict all \\
-            -model scratchslr \\
-            -model-features Years:int:1 \\
-            -model-predict Salary:float:1 \\
-            -sources f=csv \\
-            -source-filename /dev/stdin \\
-            -log debug
-        [
-            {
-                "extra": {},
-                "features": {
-                    "Salary": 0,
-                    "Years": 6
-                },
-                "last_updated": "2019-07-19T09:46:45Z",
-                "prediction": {
-                    "Salary": {
-                        "confidence": 1.0,
-                        "value": 90.0
-                    }
-                },
-                "key": "0"
-            }
-        ]
-
-    """
-
-    CONTEXT = SLRContext
-    CONFIG = SLRConfig
-
-    def __init__(self, config: SLRConfig) -> None:
-        super().__init__(config)
-        self.saved = {}
-
-    def _filename(self):
-        return os.path.join(
-            self.config.directory,
-            hashlib.sha384(self.config.predict.NAME.encode()).hexdigest()
-            + ".json",
-        )
-
-    async def __aenter__(self) -> SLRContext:
-        filename = self._filename()
-        if os.path.isfile(filename):
-            with open(filename, "r") as read:
-                self.saved = json.load(read)
-        return self
-
-    async def __aexit__(self, exc_type, exc_value, traceback):
-        filename = self._filename()
-        with open(filename, "w") as write:
-            json.dump(self.saved, write)