Skip to content
This repository was archived by the owner on Aug 25, 2024. It is now read-only.

Commit ef28bed

Browse files
authored
model: scratch: SAG LR documentation
1 parent 0f30866 commit ef28bed

File tree

4 files changed

+134
-22
lines changed

4 files changed

+134
-22
lines changed

docs/plugins/dffml_model.rst

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -417,12 +417,64 @@ dffml_model_scratch
417417
pip install dffml-model-scratch
418418
419419
420-
scratchlgr
421-
~~~~~~~~~~
420+
scratchlgrsag
421+
~~~~~~~~~~~~~
422422

423423
*Official*
424424

425-
No description
425+
Logistic Regression using stochastic average gradient descent optimizer
426+
427+
428+
.. code-block:: console
429+
430+
$ cat > dataset.csv << EOF
431+
f1,ans
432+
0.1,0
433+
0.7,1
434+
0.6,1
435+
0.2,0
436+
0.8,1
437+
EOF
438+
$ dffml train \
439+
-model scratchlgrsag \
440+
-model-features f1:float:1 \
441+
-model-predict ans:int:1 \
442+
-sources f=csv \
443+
-source-filename dataset.csv \
444+
-log debug
445+
$ dffml accuracy \
446+
-model scratchlgrsag \
447+
-model-features f1:float:1 \
448+
-model-predict ans:int:1 \
449+
-sources f=csv \
450+
-source-filename dataset.csv \
451+
-log debug
452+
1.0
453+
$ echo -e 'f1,ans\n0.8,0\n' | \
454+
dffml predict all \
455+
-model scratchlgrsag \
456+
-model-features f1:float:1 \
457+
-model-predict ans:int:1 \
458+
-sources f=csv \
459+
-source-filename /dev/stdin \
460+
-log debug
461+
[
462+
{
463+
"extra": {},
464+
"features": {
465+
"ans": 0,
466+
"f1": 0.8
467+
},
468+
"last_updated": "2020-03-19T13:41:08Z",
469+
"prediction": {
470+
"ans": {
471+
"confidence": 1.0,
472+
"value": 1
473+
}
474+
},
475+
"key": "0"
476+
}
477+
]
426478
427479
**Args**
428480

model/scratch/dffml_model_scratch/logisticregression.py

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,64 @@ class LogisticRegressionConfig:
2727
)
2828

2929

30-
@entrypoint("scratchlgr")
30+
@entrypoint("scratchlgrsag")
3131
class LogisticRegression(SimpleModel):
32+
r"""
33+
Logistic Regression using stochastic average gradient descent optimizer
3234
35+
36+
.. code-block:: console
37+
38+
$ cat > dataset.csv << EOF
39+
f1,ans
40+
0.1,0
41+
0.7,1
42+
0.6,1
43+
0.2,0
44+
0.8,1
45+
EOF
46+
$ dffml train \
47+
-model scratchlgrsag \
48+
-model-features f1:float:1 \
49+
-model-predict ans:int:1 \
50+
-sources f=csv \
51+
-source-filename dataset.csv \
52+
-log debug
53+
$ dffml accuracy \
54+
-model scratchlgrsag \
55+
-model-features f1:float:1 \
56+
-model-predict ans:int:1 \
57+
-sources f=csv \
58+
-source-filename dataset.csv \
59+
-log debug
60+
1.0
61+
$ echo -e 'f1,ans\n0.8,0\n' | \
62+
dffml predict all \
63+
-model scratchlgrsag \
64+
-model-features f1:float:1 \
65+
-model-predict ans:int:1 \
66+
-sources f=csv \
67+
-source-filename /dev/stdin \
68+
-log debug
69+
[
70+
{
71+
"extra": {},
72+
"features": {
73+
"ans": 0,
74+
"f1": 0.8
75+
},
76+
"last_updated": "2020-03-19T13:41:08Z",
77+
"prediction": {
78+
"ans": {
79+
"confidence": 1.0,
80+
"value": 1
81+
}
82+
},
83+
"key": "0"
84+
}
85+
]
86+
87+
"""
3388
# The configuration class needs to be set as the CONFIG property
3489
CONFIG = LogisticRegressionConfig
3590
# Logistic Regression only supports training on a single feature
@@ -50,16 +105,15 @@ def separating_line(self):
50105
return self.storage.get("separating_line", None)
51106

52107
@separating_line.setter
53-
def separating_line(self, rline):
108+
def separating_line(self, sline):
54109
"""
55110
Set separating_line in self.storage so it will be saved to disk
56111
"""
57-
self.storage["separating_line"] = rline
112+
self.storage["separating_line"] = sline
58113

59114
def predict_input(self, x):
60115
"""
61-
Use the regression
62-
line to make a prediction by returning ``m * x + b``.
116+
The Logistic regression with SAG optimizer: returns w * x + b > 0.5
63117
"""
64118
prediction = self.separating_line[0] * x + self.separating_line[1]
65119
if prediction > 0.5:
@@ -73,24 +127,30 @@ def predict_input(self, x):
73127
)
74128
return prediction
75129

76-
def best_fit_line(self):
130+
def best_separating_line(self):
131+
"""
132+
Determine the best separating hyperplane (here, the integer weight)
133+
s.t. w * x + b is well separable from 0.5.
134+
"""
77135
self.logger.debug(
78136
"Number of input records: {}".format(len(self.xData))
79137
)
80-
x = self.xData
81-
y = self.yData
82-
learning_rate = 0.01
83-
w = 0.01
84-
b = 0.0
85-
for _ in range(1, 1500):
138+
x = self.xData # feature array
139+
y = self.yData # class array
140+
learning_rate = 0.01 # learning rate for step: weight -= lr * step
141+
w = 0.01 # initial weight
142+
b = 0.0 # here unbiased data is considered so b = 0
143+
# epochs' loop: 1500 epochs
144+
for _ in range(0, 1500):
86145
z = w * x + b
87146
val = -np.multiply(y, z)
88147
num = -np.multiply(y, np.exp(val))
89148
den = 1 + np.exp(val)
90-
f = num / den
91-
gradJ = np.sum(x * f)
92-
w = w - learning_rate * gradJ / len(x)
93-
error = 0
149+
f = num / den # f is gradient dJ for each data point
150+
gradJ = np.sum(x * f) # total dJ
151+
w = w - learning_rate * gradJ / len(x) # SAG subtraction
152+
# Accuracy calculation
153+
error = 0 # incorrect values
94154
for x_id in range(len(x)):
95155
yhat = x[x_id] * w + b > 0.5
96156
if yhat:
@@ -113,7 +173,7 @@ async def train(self, sources: Sources):
113173
self.yData = np.append(
114174
self.yData, feature_data[self.config.predict.NAME]
115175
)
116-
self.separating_line = self.best_fit_line()
176+
self.separating_line = self.best_separating_line()
117177

118178
async def accuracy(self, sources: Sources) -> Accuracy:
119179
# Ensure the model has been trained before we try to make a prediction

model/scratch/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
entry_points={
6969
"dffml.model": [
7070
"scratchslr = dffml_model_scratch.slr:SLR",
71-
"scratchlgr = dffml_model_scratch.logisticregression:LogisticRegression",
71+
"scratchlgrsag = dffml_model_scratch.logisticregression:LogisticRegression",
7272
]
7373
},
7474
)

model/scratch/tests/test_lgr_integration.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ async def test_run(self):
2323
# Arguments for the model
2424
model_args = [
2525
"-model",
26-
"scratchlgr",
26+
"scratchlgrsag",
2727
"-model-features",
2828
"f1:int:1",
2929
"-model-predict",

0 commit comments

Comments
 (0)