Skip to content

Commit d2df6d3

Browse files
Updating scripts, ReadMe and adding app folder
1 parent 0af9d0a commit d2df6d3

34 files changed

+1556
-47
lines changed

samples/features/r-services/Getting-Started/Predictive-Modeling/Predictive Model.R

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
#Connection string to connect to SQL Server
2-
connStr <- paste("Driver=SQL Server; Server=", "MyServer",
3-
";Database=", "tutorialdb", ";Trusted_Connection=true;", sep = "");
1+
2+
##################### STEP1 - Connect to DB and read data ####################
3+
4+
#Connection string to connect to SQL Server named instance
5+
connStr <- paste("Driver=SQL Server; Server=", "NELLIELAPTOP\\VNEXTCTP14",
6+
";Database=", "Tutorialdb", ";Trusted_Connection=true;", sep = "");
47

58
#Get the data from SQL Server Table
69
SQL_rentaldata <- RxSqlServerData(table = "dbo.rental_data",
@@ -10,8 +13,11 @@ SQL_rentaldata <- RxSqlServerData(table = "dbo.rental_data",
1013
rentaldata <- rxImport(SQL_rentaldata);
1114

1215
#Let's see the structure of the data and the top rows
16+
# Ski rental data, giving the number of ski rentals on a given date
1317
head(rentaldata);
14-
str(rentaldata);
18+
19+
20+
##################### STEP2 - Clean and prepare the data ####################
1521

1622
#Changing the three factor columns to factor types
1723
#This helps when building the model because we are explicitly saying that these values are categorical
@@ -22,6 +28,8 @@ rentaldata$WeekDay <- factor(rentaldata$WeekDay);
2228
#Visualize the dataset after the change
2329
str(rentaldata);
2430

31+
##################### STEP3 - train model ####################
32+
2533
#Now let's split the dataset into 2 different sets
2634
#One set for training the model and the other for validating it
2735
train_data = rentaldata[rentaldata$Year < 2015,];
@@ -31,21 +39,25 @@ test_data = rentaldata[rentaldata$Year == 2015,];
3139
actual_counts <- test_data$RentalCount;
3240

3341
#Model 1: Use rxLinMod to create a linear regression model. We are training the data using the training data set
34-
model_linmod <- rxLinMod(RentalCount ~ Month + Day + WeekDay + Snow + Holiday, data = train_data);
42+
model_linmod <- rxLinMod(RentalCount ~ Month + Day + WeekDay + Snow + Holiday, data = train_data);
3543

3644
#Model 2: Use rxDTree to create a decision tree model. We are training the data using the training data set
3745
model_dtree <- rxDTree(RentalCount ~ Month + Day + WeekDay + Snow + Holiday, data = train_data);
3846

47+
48+
#################### STEP4 - Predict using the models ########################
49+
3950
#Use the models we just created to predict using the test data set.
4051
#That enables us to compare actual values of RentalCount from the two models and compare to the actual values in the test data set
41-
predict_linmod <- rxPredict(model_linmod, test_data, writeModelVars = TRUE);
52+
predict_linmod <- rxPredict(model_linmod, test_data, writeModelVars = TRUE, extraVarsToWrite = c("Year"));
4253

43-
predict_dtree <- rxPredict(model_dtree, test_data, writeModelVars = TRUE);
54+
predict_dtree <- rxPredict(model_dtree, test_data, writeModelVars = TRUE, extraVarsToWrite = c("Year"));
4455

4556
#Look at the top rows of the two prediction data sets.
4657
head(predict_linmod);
4758
head(predict_dtree);
4859

60+
#################### STEP5 - Compare models ########################
4961
#Now we will use the plotting functionality in R to viusalize the results from the predictions
5062
#We are plotting the difference between actual and predicted values for both models to compare accuracy
5163
par(mfrow = c(2, 1));
Lines changed: 107 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,12 @@
1-
--Before we start, we need to restore the DB for this tutorial.
2-
--Step1:Download the compressed backup file
3-
--Save the file on a location where SQL Server can access it. For example:C:\Program Files \Microsoft SQL Server \MSSQL13.MSSQLSERVER\MSSQL\Backup\
4-
--In a new query window in SSMS, execute the following restore statement, but REMEMBER TO CHANGE THE FILE PATHS
5-
--to match the directories of your installation!
6-
USE master;
7-
GO
8-
RESTORE DATABASE TutorialDB
9-
FROM DISK = 'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\Backup\TutorialDB.bak'
10-
WITH
11-
MOVE 'TutorialDB' TO 'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TutorialDB.mdf'
12-
, MOVE 'TutorialDB_log' TO 'C:\Program Files\Microsoft SQL Server\MSSQL13.MSSQLSERVER\MSSQL\DATA\TutorialDB.ldf';
13-
GO
141

15-
USE tutorialdb;
2+
USE TutorialDB;
3+
4+
-- Table containing ski rental data
165
SELECT * FROM [dbo].[rental_data];
176

187

19-
-- Operationalize
20-
USE tutorialdb;
21-
GO
22-
-- Setup model table
8+
9+
-------------------------- STEP 1 - Setup model table ----------------------------------------
2310
DROP TABLE IF EXISTS rental_rx_models;
2411
GO
2512
CREATE TABLE rental_rx_models (
@@ -28,7 +15,11 @@ CREATE TABLE rental_rx_models (
2815
);
2916
GO
3017

31-
-- Stored procedure that trains and generates a model using the rental_data and a decision tree algorithm
18+
19+
20+
21+
-------------------------- STEP 2 - Train model ----------------------------------------
22+
-- Stored procedure that trains and generates an R model using the rental_data and a decision tree algorithm
3223
DROP PROCEDURE IF EXISTS generate_rental_rx_model;
3324
go
3425
CREATE PROCEDURE generate_rental_rx_model (@trained_model varbinary(max) OUTPUT)
@@ -39,7 +30,7 @@ BEGIN
3930
, @script = N'
4031
require("RevoScaleR");
4132
42-
rental_train_data$Holiday = factor(rental_train_data$Holiday);
33+
rental_train_data$Holiday = factor(rental_train_data$Holiday);
4334
rental_train_data$Snow = factor(rental_train_data$Snow);
4435
rental_train_data$WeekDay = factor(rental_train_data$WeekDay);
4536
@@ -48,35 +39,96 @@ BEGIN
4839
#Before saving the model to the DB table, we need to serialize it
4940
trained_model <- as.raw(serialize(model_dtree, connection=NULL));'
5041

51-
, @input_data_1 = N'select "RentalCount", "Month", "Day", "WeekDay", "Snow", "Holiday" from dbo.rental_data where Year < 2015'
42+
, @input_data_1 = N'select "RentalCount", "Year", "Month", "Day", "WeekDay", "Snow", "Holiday" from dbo.rental_data where Year < 2015'
5243
, @input_data_1_name = N'rental_train_data'
5344
, @params = N'@trained_model varbinary(max) OUTPUT'
5445
, @trained_model = @trained_model OUTPUT;
5546
END;
5647
GO
48+
49+
------------------- STEP 3 - Save model to table -------------------------------------
5750
TRUNCATE TABLE rental_rx_models;
58-
--Script to call the stored procedure that generates the rxDTree model and save the model in a table in SQL Server
51+
5952
DECLARE @model VARBINARY(MAX);
6053
EXEC generate_rental_rx_model @model OUTPUT;
54+
6155
INSERT INTO rental_rx_models (model_name, model) VALUES('rxDTree', @model);
56+
6257
SELECT * FROM rental_rx_models;
58+
59+
60+
61+
------------------ STEP 4 - Use the model to predict number of rentals --------------------------
62+
DROP PROCEDURE IF EXISTS predict_rentalcount;
63+
GO
64+
CREATE PROCEDURE predict_rentalcount (@model varchar(100))
65+
AS
66+
BEGIN
67+
DECLARE @rx_model varbinary(max) = (select model from rental_rx_models where model_name = @model);
68+
69+
EXEC sp_execute_external_script
70+
@language = N'R'
71+
, @script = N'
72+
require("RevoScaleR");
73+
74+
#Before using the model to predict, we need to unserialize it
75+
rental_model<-unserialize(rx_model);
76+
77+
rental_predictions <-rxPredict(rental_model, rental_score_data, writeModelVars = TRUE, extraVarsToWrite = c("Year"));
78+
79+
OutputDataSet <- cbind(rental_predictions[1],rental_predictions[2], rental_predictions[3], rental_predictions[4], rental_predictions[5], rental_predictions[6], rental_predictions[7], rental_predictions[8])
80+
'
81+
, @input_data_1 = N'Select "RentalCount", "Year" ,"Month", "Day", "WeekDay", "Snow", "Holiday" from rental_data where Year = 2015'
82+
, @input_data_1_name = N'rental_score_data'
83+
, @params = N'@rx_model varbinary(max)'
84+
, @rx_model = @rx_model
85+
with result sets (("RentalCount_Predicted" float, "RentalCount_Actual" float,"Month" float,"Day" float,"WeekDay" float,"Snow" float,"Holiday" float, "Year" float));
86+
87+
END;
6388
GO
6489

65-
--Stored procedure that takes model name and new data as inout parameters and predicts the rental count for the new data
66-
DROP PROCEDURE IF EXISTS predict_rentals;
90+
---------------- STEP 5 - Create DB table to store predictions -----------------------
91+
DROP TABLE IF EXISTS [dbo].[rental_predictions];
92+
GO
93+
--Create a table to store the predictions in
94+
CREATE TABLE [dbo].[rental_predictions](
95+
[RentalCount_Predicted] [int] NULL,
96+
[RentalCount_Actual] [int] NULL,
97+
[Month] [int] NULL,
98+
[Day] [int] NULL,
99+
[WeekDay] [int] NULL,
100+
[Snow] [int] NULL,
101+
[Holiday] [int] NULL,
102+
[Year] [int] NULL
103+
) ON [PRIMARY]
104+
GO
105+
106+
107+
---------------- STEP 6 - Save the predictions in a DB table -----------------------
108+
TRUNCATE TABLE rental_predictions;
109+
--Insert the results of the predictions for test set into a table
110+
INSERT INTO rental_predictions
111+
EXEC predict_rentalcount 'rxDTree';
112+
113+
-- Select contents of the table
114+
SELECT * FROM rental_predictions;
115+
116+
------------- STEP 7 - Alternative to the previous stored procedure - Uses new data to predict future rental counts
117+
--Stored procedure that takes model name and new data as input parameters and predicts the rental count for the new data
118+
DROP PROCEDURE IF EXISTS predict_rentalcount_new;
67119
GO
68-
CREATE PROCEDURE predict_rentals (@model VARCHAR(100),@q NVARCHAR(MAX))
120+
CREATE PROCEDURE predict_rentalcount_new (@model VARCHAR(100),@q NVARCHAR(MAX))
69121
AS
70122
BEGIN
71123
DECLARE @rx_model VARBINARY(MAX) = (SELECT model FROM rental_rx_models WHERE model_name = @model);
72-
EXECUTE sp_execute_external_script
124+
EXECUTE sp_execute_external_script
73125
@language = N'R'
74126
, @script = N'
75127
require("RevoScaleR");
76128
77129
#The InputDataSet contains the new data passed to this stored proc. We will use this data to predict.
78130
rentals = InputDataSet;
79-
131+
80132
#Convert types to factors
81133
rentals$Holiday = factor(rentals$Holiday);
82134
rentals$Snow = factor(rentals$Snow);
@@ -92,12 +144,38 @@ BEGIN
92144
, @params = N'@rx_model varbinary(max)'
93145
, @rx_model = @rx_model
94146
WITH RESULT SETS (("RentalCount_Predicted" FLOAT));
95-
147+
96148
END;
97149
GO
98150

99151
--Execute the predict_rentals stored proc and pass the modelname and a query string with a set of features we want to use to predict the rental count
100-
EXEC dbo.predict_rentals @model = 'rxDTree',
152+
EXEC dbo.predict_rentalcount_new @model = 'rxDTree',
101153
@q ='SELECT CONVERT(INT, 3) AS Month, CONVERT(INT, 24) AS Day, CONVERT(INT, 4) AS WeekDay, CONVERT(INT, 1) AS Snow, CONVERT(INT, 1) AS Holiday';
102154
GO
103155

156+
157+
-------------- STEP 8 - Getting predictions from an Application ----------------------------------
158+
-- Create stored procedure that returns predictions as JSON
159+
-- This stored procedure is going to be called from our application
160+
DROP PROCEDURE IF EXISTS get_rental_predictions;
161+
GO
162+
CREATE PROCEDURE get_rental_predictions (@year int)
163+
AS
164+
SELECT
165+
"Year",
166+
RentalCount_Predicted ,
167+
RentalCount_Actual ,
168+
"Month" ,
169+
"Day" ,
170+
"WeekDay" ,
171+
"Snow",
172+
"Holiday"
173+
FROM rental_predictions
174+
WHERE Year = @year
175+
FOR JSON PATH, root('data')
176+
177+
RETURN
178+
GO
179+
180+
-- Executing stored procedure with year = 2015
181+
EXEC get_rental_predictions 2015;

samples/features/r-services/Getting-Started/Predictive-Modeling/README.md

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Build a predictive model with SQL Server R Services
22

3-
This sample shows how to create a predictive model in R and operationalize it with SQL Server 2016.
3+
This sample shows how to create a predictive model in R and operationalize it with SQL Server 2016 or vNext.
44

55
### Contents
66

@@ -24,9 +24,9 @@ Follow the step by step tutorial [here](http://aka.ms/sqldev/R) to walk through
2424

2525
<!-- Delete the ones that don't apply -->
2626
- **Applies to:** SQL Server 2016 (or higher)
27-
- **Key features:**
27+
- **Key features:**SQL Server R Services
2828
- **Workload:** SQL Server R Services
29-
- **Programming Language:** T-SQL, R
29+
- **Programming Language:** T-SQL, R, JavaScript (NodeJS)
3030
- **Authors:** Nellie Gustafsson
3131
- **Update history:** Getting started tutorial for R Services
3232

@@ -43,20 +43,62 @@ After that, you can download a DB backup file and restore it using Setup.sql. [D
4343
<!-- Examples -->
4444
1. SQL Server 2016 (or higher) with R Services installed
4545
2. SQL Server Management Studio
46-
3. R IDE Tool like Visual Studio
47-
46+
3. R IDE Tool like Visual Studio RTVS
47+
48+
## Run this sample app
49+
1. From SQL Server Management Studio or SQL Server Data Tools connect to your SQL Server 2016 or vNext SQL database and execute setup.sql to restore the sample DB
50+
2. From SQL Server Management Studio or SQL Server Data Tools, execute Predictive Model.sql script to set up tables, train model, predict using that model etc.
51+
This is all covered step by step in the [tutorial](http://aka.ms/sqldev/R)
52+
53+
3. Navigate to the folder where you have downloaded sample and run **npm install** in command window, or run setup.bat if you are on Windows operating system. This command will install necessary npm packages defined in project.json.
54+
55+
4. Locate db.js file in the project, change database connection info in createConnection() method to reference your database. the following tokens should be replaced:
56+
1. SERVERNAME - name of the database server.
57+
2. DATABASE - Name of database where Todo table is stored.
58+
3. USERNAME - SQL Server login that can access table data and execute stored procedures.
59+
4. PASSWORD - Password associated to SQL Server login.
60+
61+
```
62+
var config = {
63+
server : "SERVER.database.windows.net",
64+
userName: "USER",
65+
password: "PASSWORD",
66+
// If you have a named instance, you can put the instance name here:
67+
options: { encrypt: true, database: 'DATABASE' }
68+
};
69+
```
70+
71+
5. Run sample app by opening a command window, navigate to the location where here you have downloaded sample and run **node bin\www**
72+
6. Go to a browser and navigate to the following [link] (http://localhost:3000/client.html). You should now see an HTML table containing the predictions generated using R in SQL Server.
4873

4974
<a name=sample-details></a>
50-
## Sample Details
5175

52-
### PredictiveModel.R
76+
## Sample details
77+
78+
This sample application shows how to create a predictive model and generate predictions using the model. It also shows how to build a simple REST API service tthat gets data from the DB.
79+
NodeJS REST API is used to implement REST Service in the example.
5380

81+
### Predictive Model.R
5482
The R script that generates a predictive model and uses it to predict rental counts
5583

56-
### PredictiveModel.SQL
84+
### Predictive Model.SQL
85+
Takes the R code in PredictiveModel.R and deploys it inside SQL Server. Creating stored procedures and tables for training, storing models and creating stored procedures for prediction.
86+
87+
### app.js
88+
File that contains startup code.
89+
### db.js
90+
File that contains functions that wrap Tedious library
91+
### predictions.js
92+
File that contains action that will be called to get the predictions
5793

58-
Takes the R code in PredictiveModel.R and uses it inside SQL Server. Creating stored procedures for training and prediction.
94+
Service uses Tedious library for data access and built-in JSON functionalities that are available in SQL Server 2016 and Azure SQL Database.
5995

96+
<a name=disclaimers></a>
97+
98+
## Disclaimers
99+
The code included in this sample is not intended demonstrate some general guidance and architectural patterns for web development.
100+
It contains minimal code required to create a REST API.
101+
You can easily modify this code to fit the architecture of your application.
60102

61103

62104
<a name=related-links></a>
@@ -67,5 +109,5 @@ Takes the R code in PredictiveModel.R and uses it inside SQL Server. Creating st
67109
For additional content, see these articles:
68110

69111
[SQL Server R Services - Upgrade and Installation FAQ](https://msdn.microsoft.com/en-us/library/mt653951.aspx)
70-
71-
[Other SQL Server R Services Tutorials](https://msdn.microsoft.com/en-us/library/mt591993.aspx)
112+
[Other SQL Server R Services Tutorials](https://msdn.microsoft.com/en-us/library/mt591993.aspx)
113+
[Watch a presentation about predictive modeling in SQL Server, that also goes through this sample](https://www.youtube.com/watch?v=YCyj9cdi4Nk&feature=youtu.be)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
node_modules/*
2+
bin/*.dll
3+
obj/*
4+
*.sln
5+
*.log
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
{
2+
// Use IntelliSense to learn about possible Node.js debug attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"type": "node",
9+
"request": "launch",
10+
"name": "Launch Program",
11+
"program": "${workspaceRoot}\\bin\\www.js",
12+
"cwd": "${workspaceRoot}"
13+
},
14+
{
15+
"type": "node",
16+
"request": "attach",
17+
"name": "Attach to Process",
18+
"port": 5858
19+
}
20+
]
21+
}

0 commit comments

Comments
 (0)