Skip to content

Commit faeeef9

Browse files
bjcmitdtzar
authored andcommitted
Added notebooks for training and scoring a Diabetes Ridge regression model (#145)
* Added notebooks for training and scoring a Ridge regression model with the Diabetes dataset * Fixed joblib import and remove request headers Co-authored-by: David Tesar <[email protected]>
1 parent e4a931a commit faeeef9

File tree

2 files changed

+285
-0
lines changed

2 files changed

+285
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Score Data with a Ridge Regression Model Trained on the Diabetes Dataset"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"This notebook loads the model trained in the Diabetes Ridge Regression Training notebook, prepares the data, and scores the data."
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 1,
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"import json\n",
24+
"import numpy\n",
25+
"from azureml.core.model import Model\n",
26+
"import joblib"
27+
]
28+
},
29+
{
30+
"cell_type": "markdown",
31+
"metadata": {},
32+
"source": [
33+
"## Load Model"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 2,
39+
"metadata": {},
40+
"outputs": [],
41+
"source": [
42+
"model_path = Model.get_model_path(model_name=\"sklearn_regression_model.pkl\")\n",
43+
"model = joblib.load(model_path)"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## Prepare Data"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 3,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"raw_data = '{\"data\":[[1,2,3,4,5,6,7,8,9,10],[10,9,8,7,6,5,4,3,2,1]]}'\n",
60+
"\n",
61+
"data = json.loads(raw_data)[\"data\"]\n",
62+
"data = numpy.array(data)"
63+
]
64+
},
65+
{
66+
"cell_type": "markdown",
67+
"metadata": {},
68+
"source": [
69+
"## Score Data"
70+
]
71+
},
72+
{
73+
"cell_type": "code",
74+
"execution_count": 4,
75+
"metadata": {},
76+
"outputs": [
77+
{
78+
"name": "stdout",
79+
"output_type": "stream",
80+
"text": [
81+
"Test result: {'result': [5113.099642122813, 3713.6329271385353]}\n"
82+
]
83+
}
84+
],
85+
"source": [
86+
"request_headers = {}\n",
87+
"\n",
88+
"result = model.predict(data)\n",
89+
"print(\"Test result: \", {\"result\": result.tolist()})"
90+
]
91+
}
92+
],
93+
"metadata": {
94+
"kernelspec": {
95+
"display_name": "Python (storedna)",
96+
"language": "python",
97+
"name": "storedna"
98+
},
99+
"language_info": {
100+
"codemirror_mode": {
101+
"name": "ipython",
102+
"version": 3
103+
},
104+
"file_extension": ".py",
105+
"mimetype": "text/x-python",
106+
"name": "python",
107+
"nbconvert_exporter": "python",
108+
"pygments_lexer": "ipython3",
109+
"version": "3.6.9"
110+
}
111+
},
112+
"nbformat": 4,
113+
"nbformat_minor": 2
114+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Train a Ridge Regression Model on the Diabetes Dataset"
8+
]
9+
},
10+
{
11+
"cell_type": "markdown",
12+
"metadata": {},
13+
"source": [
14+
"This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model."
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 1,
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"from sklearn.datasets import load_diabetes\n",
24+
"from sklearn.linear_model import Ridge\n",
25+
"from sklearn.metrics import mean_squared_error\n",
26+
"from sklearn.model_selection import train_test_split\n",
27+
"import joblib"
28+
]
29+
},
30+
{
31+
"cell_type": "markdown",
32+
"metadata": {},
33+
"source": [
34+
"## Load Data"
35+
]
36+
},
37+
{
38+
"cell_type": "code",
39+
"execution_count": 2,
40+
"metadata": {},
41+
"outputs": [],
42+
"source": [
43+
"X, y = load_diabetes(return_X_y=True)"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"## Split Data into Training and Validation Sets"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 3,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
60+
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
61+
" \"test\": {\"X\": X_test, \"y\": y_test}}"
62+
]
63+
},
64+
{
65+
"cell_type": "markdown",
66+
"metadata": {},
67+
"source": [
68+
"## Train Model on Training Set"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": 4,
74+
"metadata": {},
75+
"outputs": [
76+
{
77+
"data": {
78+
"text/plain": [
79+
"Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,\n",
80+
" normalize=False, random_state=None, solver='auto', tol=0.001)"
81+
]
82+
},
83+
"execution_count": 4,
84+
"metadata": {},
85+
"output_type": "execute_result"
86+
}
87+
],
88+
"source": [
89+
"alpha = 0.5\n",
90+
"\n",
91+
"reg = Ridge(alpha=alpha)\n",
92+
"reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])"
93+
]
94+
},
95+
{
96+
"cell_type": "markdown",
97+
"metadata": {},
98+
"source": [
99+
"## Validate Model on Validation Set"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": 6,
105+
"metadata": {},
106+
"outputs": [
107+
{
108+
"name": "stdout",
109+
"output_type": "stream",
110+
"text": [
111+
"mse: 3298.9096058070622\n"
112+
]
113+
}
114+
],
115+
"source": [
116+
"preds = reg.predict(data[\"test\"][\"X\"])\n",
117+
"print(\"mse: \", mean_squared_error(preds, y_test))"
118+
]
119+
},
120+
{
121+
"cell_type": "markdown",
122+
"metadata": {},
123+
"source": [
124+
"## Save Model"
125+
]
126+
},
127+
{
128+
"cell_type": "code",
129+
"execution_count": 7,
130+
"metadata": {},
131+
"outputs": [
132+
{
133+
"data": {
134+
"text/plain": [
135+
"['sklearn_regression_model.pkl']"
136+
]
137+
},
138+
"execution_count": 7,
139+
"metadata": {},
140+
"output_type": "execute_result"
141+
}
142+
],
143+
"source": [
144+
"model_name = \"sklearn_regression_model.pkl\"\n",
145+
"\n",
146+
"joblib.dump(value=reg, filename=model_name)"
147+
]
148+
}
149+
],
150+
"metadata": {
151+
"kernelspec": {
152+
"display_name": "Python (storedna)",
153+
"language": "python",
154+
"name": "storedna"
155+
},
156+
"language_info": {
157+
"codemirror_mode": {
158+
"name": "ipython",
159+
"version": 3
160+
},
161+
"file_extension": ".py",
162+
"mimetype": "text/x-python",
163+
"name": "python",
164+
"nbconvert_exporter": "python",
165+
"pygments_lexer": "ipython3",
166+
"version": "3.6.9"
167+
}
168+
},
169+
"nbformat": 4,
170+
"nbformat_minor": 2
171+
}

0 commit comments

Comments
 (0)