1
+ -- The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
2
+ USE [tpcxbb_1gb]
3
+ GO
4
+
5
+ -- **************************************************************
6
+ -- STEP 1 Create a table for storing the machine learning model
7
+ -- **************************************************************
8
+ DROP TABLE IF EXISTS [dbo].[models]
9
+ GO
10
+ CREATE TABLE [dbo].[models](
11
+ [language] [varchar](30 ) NOT NULL ,
12
+ [model_name] [varchar](30 ) NOT NULL ,
13
+ [model] [varbinary](max ) NOT NULL ,
14
+ [create_time] [datetime2](7 ) NULL DEFAULT (sysdatetime ()),
15
+ [created_by] [nvarchar](500 ) NULL DEFAULT (suser_sname ()),
16
+ PRIMARY KEY CLUSTERED
17
+ (
18
+ [language],
19
+ [model_name]
20
+ )
21
+ )
22
+ GO
23
+
24
+ -- *************************************************************************************************************
25
+ -- STEP 2 Look at the dataset we will use in this sample
26
+ -- Tag is a label indicating the sentiment of a review. These are actual values we will use to train the model
27
+ -- For training purposes, we will use 90% percent of the data.
28
+ -- For testing / scoring purposes, we will use 10% percent of the data.
29
+ -- *************************************************************************************************************
30
+ CREATE OR ALTER VIEW product_reviews_training_data
31
+ AS
32
+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .9 AS INT ))
33
+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
34
+ CASE
35
+ WHEN pr_review_rating < 3 THEN 1
36
+ WHEN pr_review_rating = 3 THEN 2
37
+ ELSE 3
38
+ END AS tag
39
+ FROM product_reviews;
40
+ GO
41
+
42
+ CREATE OR ALTER VIEW product_reviews_test_data
43
+ AS
44
+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .1 AS INT ))
45
+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
46
+ CASE
47
+ WHEN pr_review_rating < 3 THEN 1
48
+ WHEN pr_review_rating = 3 THEN 2
49
+ ELSE 3
50
+ END AS tag
51
+ FROM product_reviews;
52
+ GO
53
+
54
+ -- Look at the dataset we will use in this sample
55
+ SELECT TOP (100 ) * FROM product_reviews_training_data;
56
+ GO
57
+
58
+ -- ***************************************************************************************************
59
+ -- STEP 3 Create a stored procedure for training a
60
+ -- text classifier model for product review sentiment classification (Positive, Negative, Neutral)
61
+ -- 1 = Negative, 2 = Neutral, 3 = Positive
62
+ -- ***************************************************************************************************
63
+ CREATE OR ALTER PROCEDURE [dbo].[create_text_classification_model]
64
+ AS
65
+ BEGIN
66
+ DECLARE @model varbinary (max )
67
+ , @train_script nvarchar (max );
68
+
69
+ -- The Python script we want to execute
70
+ SET @train_script = N'
71
+ ##Import necessary packages
72
+ from microsoftml import rx_logistic_regression,featurize_text, n_gram
73
+ import pickle
74
+
75
+ ## Defining the tag column as a categorical type
76
+ training_data["tag"] = training_data["tag"].astype("category")
77
+
78
+ ## Create a machine learning model for multiclass text classification.
79
+ ## We are using a text featurizer function to split the text in features of 2-word chunks
80
+ model = rx_logistic_regression(formula = "tag ~ features", data = training_data, method = "multiClass", ml_transforms=[
81
+ featurize_text(language="English",
82
+ cols=dict(features="pr_review_content"),
83
+ word_feature_extractor=n_gram(2, weighting="TfIdf"))])
84
+
85
+ ## Serialize the model so that we can store it in a table
86
+ modelbin = pickle.dumps(model)
87
+ ' ;
88
+
89
+ EXECUTE sp_execute_external_script
90
+ @language = N ' Python'
91
+ , @script = @train_script
92
+ , @input_data_1 = N ' SELECT * FROM product_reviews_training_data'
93
+ , @input_data_1_name = N ' training_data'
94
+ , @params = N ' @modelbin varbinary(max) OUTPUT'
95
+ , @modelbin = @model OUTPUT ;
96
+
97
+ -- Save model to DB Table
98
+ DELETE FROM dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' ;
99
+ INSERT INTO dbo .models (language , model_name, model) VALUES (' Python' , ' rx_logistic_regression' , @model);
100
+ END ;
101
+ GO
102
+
103
+ -- ***************************************************************************************************
104
+ -- STEP 4 Execute the stored procedure that creates and saves the machine learning model in a table
105
+ -- ***************************************************************************************************
106
+
107
+ EXECUTE [dbo].[create_text_classification_model];
108
+ -- Take a look at the model object saved in the model table
109
+ SELECT * FROM dbo .models ;
110
+ GO
111
+
112
+ -- ******************************************************************************************************************
113
+ -- STEP 5 --Stored procedure that uses the model we just created to predict/classify the sentiment of product reviews
114
+ -- ******************************************************************************************************************
115
+ CREATE OR ALTER PROCEDURE [dbo].[predict_review_sentiment]
116
+ AS
117
+ BEGIN
118
+ -- text classifier for online review sentiment classification (Positive, Negative, Neutral)
119
+ DECLARE
120
+ @model_bin varbinary (max )
121
+ , @prediction_script nvarchar (max );
122
+
123
+ -- Select the model binary object from the model table
124
+ SET @model_bin = (select model from dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' );
125
+
126
+
127
+ -- The Python script we want to execute
128
+ SET @prediction_script = N'
129
+ from microsoftml import rx_predict
130
+ from revoscalepy import rx_data_step
131
+ import pickle
132
+
133
+ ## The input data from the query in @input_data_1 is populated in test_data
134
+ ## We are selecting 10% of the entire dataset for testing the model
135
+
136
+ ## Unserialize the model
137
+ model = pickle.loads(model_bin)
138
+
139
+ ## Use the rx_logistic_regression model
140
+ predictions = rx_predict(model = model, data = test_data, extra_vars_to_write = ["tag", "pr_review_content"], overwrite = True)
141
+
142
+ ## Converting to output data set
143
+ result = rx_data_step(predictions)
144
+ ' ;
145
+
146
+ EXECUTE sp_execute_external_script
147
+ @language = N ' Python'
148
+ , @script = @prediction_script
149
+ , @input_data_1 = N ' SELECT * FROM product_reviews_test_data'
150
+ , @input_data_1_name = N ' test_data'
151
+ , @output_data_1_name = N ' result'
152
+ , @params = N ' @model_bin varbinary(max)'
153
+ , @model_bin = @model_bin
154
+ WITH RESULT SETS ((" Review" NVARCHAR (MAX )," Tag" FLOAT , " Predicted_Score_Negative" FLOAT , " Predicted_Score_Neutral" FLOAT , " Predicted_Score_Positive" FLOAT ));
155
+ END
156
+ GO
157
+
158
+
159
+ -- ***************************************************************************************************
160
+ -- STEP 6 Execute the multi class prediction using the model we trained earlier
161
+ -- ***************************************************************************************************
162
+ EXECUTE [dbo].[predict_review_sentiment]
163
+ GO
164
+
165
+
166
+
167
+
168
+
0 commit comments