Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Commit 9f48560

Browse files
authored
docs: model: daal4py: Add example usage for Linear Regression
Fixes: #692 Signed-off-by: <[email protected]>
1 parent a94a5d2 commit 9f48560

File tree

11 files changed

+205
-8
lines changed

11 files changed

+205
-8
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6363
core plugins
6464
- HTTP service got a `-redirect` flag which allows for URL redirection via a
6565
HTTP 307 response
66+
- Daal4py example usage.
6667
### Changed
6768
- Renamed `-seed` to `-inputs` in `dataflow create` command
6869
- Renamed configloader/png to configloader/image and added support for loading JPEG and TIFF file formats

model/daal4py/dffml_model_daal4py/daal4pylr.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,65 @@ class DAAL4PyLRModelConfig:
2828

2929
@entrypoint("daal4pylr")
3030
class DAAL4PyLRModel(SimpleModel):
31+
"""
32+
Implemented using daal4py.
33+
34+
First we create the training and testing datasets
35+
36+
.. literalinclude:: /../model/daal4py/examples/lr/train_data.sh
37+
38+
.. literalinclude:: /../model/daal4py/examples/lr/test_data.sh
39+
40+
Train the model
41+
42+
.. literalinclude:: /../model/daal4py/examples/lr/train.sh
43+
44+
Assess the accuracy
45+
46+
.. literalinclude:: /../model/daal4py/examples/lr/accuracy.sh
47+
48+
Output
49+
50+
.. code-block::
51+
52+
0.6666666666666666
53+
54+
55+
Make a prediction
56+
57+
.. literalinclude:: /../model/daal4py/examples/lr/predict.sh
58+
59+
Output
60+
61+
.. code-block:: json
62+
63+
[
64+
{
65+
"extra": {},
66+
"features": {
67+
"ans": 1,
68+
"f1": 0.8
69+
},
70+
"key": "0",
71+
"last_updated": "2020-07-22T02:53:11Z",
72+
"prediction": {
73+
"ans": {
74+
"confidence": NaN,
75+
"value": 1.1907472649730522
76+
}
77+
}
78+
}
79+
]
80+
81+
82+
83+
84+
85+
Example usage of daal4py Linear Regression model using python API
86+
87+
.. literalinclude:: /../model/daal4py/examples/lr/textclassifier.py
88+
"""
89+
3190
CONFIG = DAAL4PyLRModelConfig
3291

3392
def __init__(self, config) -> None:
@@ -114,6 +173,10 @@ async def predict(
114173
predict = self.pd.DataFrame(feature_data, index=[0])
115174
preds = self.lm_predictor.compute(predict, self.lm_trained)
116175
target = self.parent.config.predict.name
117-
record.predicted(target, preds.prediction, float("nan"))
176+
if preds.prediction.size == 1:
177+
prediction = preds.prediction.flat[0]
178+
else:
179+
prediction = preds.prediction
180+
record.predicted(target, prediction, float("nan"))
118181
# Yield the record to the caller
119182
yield record

model/daal4py/examples/lr/__init__.py

Whitespace-only changes.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
dffml accuracy \
2+
-model daal4pylr \
3+
-model-features f1:float:1 \
4+
-model-predict ans:int:1 \
5+
-model-directory tempdir \
6+
-sources f=csv \
7+
-source-filename test.csv

model/daal4py/examples/lr/lr.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from dffml import CSVSource, Features, Feature
2+
from dffml.noasync import train, accuracy, predict
3+
from dffml_model_daal4py.daal4pylr import DAAL4PyLRModel
4+
5+
model = DAAL4PyLRModel(
6+
features=Features(Feature("f1", float, 1)),
7+
predict=Feature("ans", int, 1),
8+
directory="tempdir",
9+
)
10+
11+
# Train the model
12+
train(model, "train.csv")
13+
14+
# Assess accuracy (alternate way of specifying data source)
15+
print("Accuracy:", accuracy(model, CSVSource(filename="test.csv")))
16+
17+
# Make prediction
18+
for i, features, prediction in predict(model, {"f1": 0.8, "ans": 0}):
19+
features["ans"] = prediction["ans"]["value"]
20+
print(features)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
echo -e 'f1,ans\n0.8,1\n' | \
2+
dffml predict all \
3+
-model daal4pylr \
4+
-model-features f1:float:1 \
5+
-model-predict ans:int:1 \
6+
-model-directory tempdir \
7+
-sources f=csv \
8+
-source-filename /dev/stdin
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
cat > test.csv << EOF
2+
f1,ans
3+
18.8,16.4
4+
20.3,17.7
5+
22.4,19.6
6+
19.4,16.9
7+
15.5,14.0
8+
16.7,14.6
9+
EOF
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import os
2+
import ast
3+
import sys
4+
import json
5+
import tempfile
6+
import contextlib
7+
import subprocess
8+
import unittest.mock
9+
import logging, sys
10+
11+
import numpy as np
12+
13+
from dffml.util.os import chdir
14+
15+
16+
def sh_filepath(filename):
17+
return os.path.join(os.path.dirname(__file__), filename)
18+
19+
20+
@contextlib.contextmanager
21+
def directory_with_csv_files():
22+
with tempfile.TemporaryDirectory() as tempdir:
23+
with chdir(tempdir):
24+
subprocess.check_output(["bash", sh_filepath("train_data.sh")])
25+
subprocess.check_output(["bash", sh_filepath("test_data.sh")])
26+
yield tempdir
27+
28+
29+
class TestExample(unittest.TestCase):
30+
def python_test(self, filename):
31+
# Path to target file
32+
filepath = os.path.join(os.path.dirname(__file__), filename)
33+
# Capture output
34+
stdout = subprocess.check_output([sys.executable, filepath])
35+
lines = stdout.decode().split("\n")
36+
# Check the Accuracy
37+
self.assertRegex(lines[0], r"Accuracy: [-+]?\d*\.?\d+|\d+")
38+
# Check the ans
39+
self.assertIsInstance(ast.literal_eval(lines[1])["ans"], float)
40+
41+
def test_python_filenames(self):
42+
with directory_with_csv_files() as tempdir:
43+
self.python_test("lr.py")
44+
45+
def test_shell(self):
46+
with directory_with_csv_files() as tempdir:
47+
# Run training
48+
subprocess.check_output(["bash", sh_filepath("train.sh")])
49+
# Check the Accuracy
50+
stdout = subprocess.check_output(
51+
["bash", sh_filepath("accuracy.sh")]
52+
)
53+
lines = stdout.decode().split("\n")
54+
self.assertRegex(lines[0], r"[-+]?\d*\.?\d+|\d+")
55+
# Make the prediction
56+
stdout = subprocess.check_output(
57+
["bash", sh_filepath("predict.sh")]
58+
)
59+
records = json.loads(stdout.decode())
60+
# Check the ans
61+
self.assertIsInstance(
62+
records[0]["prediction"]["ans"]["value"], float
63+
)

model/daal4py/examples/lr/train.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
dffml train \
2+
-model daal4pylr \
3+
-model-features f1:float:1 \
4+
-model-predict ans:int:1 \
5+
-model-directory tempdir \
6+
-sources f=csv \
7+
-source-filename train.csv
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
cat >train.csv << EOF
2+
f1,ans
3+
12.4,11.2
4+
14.3,12.5
5+
14.5,12.7
6+
14.9,13.1
7+
16.1,14.1
8+
16.9,14.8
9+
16.5,14.4
10+
15.4,13.4
11+
17.0,14.9
12+
17.9,15.6
13+
18.8,16.4
14+
20.3,17.7
15+
22.4,19.6
16+
19.4,16.9
17+
15.5,14.0
18+
16.7,14.6
19+
EOF

0 commit comments

Comments
 (0)