1
+ USE [tpcxbb_1gb]
2
+ GO
3
+
4
+ -- **************************************************************
5
+ -- STEP 1 Create a table for storing the machine learning model
6
+ -- **************************************************************
7
+ DROP TABLE IF EXISTS [dbo].[models]
8
+ GO
9
+ CREATE TABLE [dbo].[models](
10
+ [language] [varchar](30 ) NOT NULL ,
11
+ [model_name] [varchar](30 ) NOT NULL ,
12
+ [model] [varbinary](max ) NOT NULL ,
13
+ [create_time] [datetime2](7 ) NULL DEFAULT (sysdatetime ()),
14
+ [created_by] [nvarchar](500 ) NULL DEFAULT (suser_sname ()),
15
+ PRIMARY KEY CLUSTERED
16
+ (
17
+ [language],
18
+ [model_name]
19
+ )
20
+ )
21
+ GO
22
+
23
+ -- *************************************************************************************************************
24
+ -- STEP 2 Look at the dataset we will use in this sample
25
+ -- Tag is a label indicating the sentiment of a review. These are actual values we will use to train the model
26
+ -- For training purposes, we will use 90% percent of the data.
27
+ -- For testing / scoring purposes, we will use 10% percent of the data.
28
+ -- *************************************************************************************************************
29
+ CREATE OR ALTER VIEW product_reviews_training_data
30
+ AS
31
+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .9 AS INT ))
32
+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
33
+ CASE
34
+ WHEN pr_review_rating < 3 THEN 1
35
+ WHEN pr_review_rating = 3 THEN 2
36
+ ELSE 3
37
+ END AS tag
38
+ FROM product_reviews;
39
+ GO
40
+
41
+ CREATE OR ALTER VIEW product_reviews_test_data
42
+ AS
43
+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .1 AS INT ))
44
+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
45
+ CASE
46
+ WHEN pr_review_rating < 3 THEN 1
47
+ WHEN pr_review_rating = 3 THEN 2
48
+ ELSE 3
49
+ END AS tag
50
+ FROM product_reviews;
51
+ GO
52
+
53
+ -- Look at the dataset we will use in this sample
54
+ SELECT TOP (100 ) * FROM product_reviews_training_data;
55
+ GO
56
+
57
+ -- ***************************************************************************************************
58
+ -- STEP 3 Create a stored procedure for training a
59
+ -- text classifier model for product review sentiment classification (Positive, Negative, Neutral)
60
+ -- 1 = Negative, 2 = Neutral, 3 = Positive
61
+ -- ***************************************************************************************************
62
+ CREATE OR ALTER PROCEDURE [dbo].[create_text_classification_model]
63
+ AS
64
+ BEGIN
65
+ DECLARE @model varbinary (max )
66
+ , @train_script nvarchar (max );
67
+
68
+ -- The Python script we want to execute
69
+ SET @train_script = N'
70
+ ##Import necessary packages
71
+ from microsoftml import rx_logistic_regression,featurize_text, n_gram
72
+ import pickle
73
+
74
+ ## Defining the tag column as a categorical type
75
+ training_data["tag"] = training_data["tag"].astype("category")
76
+
77
+ ## Create a machine learning model for multiclass text classification.
78
+ ## We are using a text featurizer function to split the text in features of 2-word chunks
79
+ model = rx_logistic_regression(formula = "tag ~ features", data = training_data, method = "multiClass", ml_transforms=[
80
+ featurize_text(language="English",
81
+ cols=dict(features="pr_review_content"),
82
+ word_feature_extractor=n_gram(2, weighting="TfIdf"))])
83
+
84
+ ## Serialize the model so that we can store it in a table
85
+ modelbin = pickle.dumps(model)
86
+ ' ;
87
+
88
+ EXECUTE sp_execute_external_script
89
+ @language = N ' Python'
90
+ , @script = @train_script
91
+ , @input_data_1 = N ' SELECT * FROM product_reviews_training_data'
92
+ , @input_data_1_name = N ' training_data'
93
+ , @params = N ' @modelbin varbinary(max) OUTPUT'
94
+ , @modelbin = @model OUTPUT ;
95
+
96
+ -- Save model to DB Table
97
+ DELETE FROM dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' ;
98
+ INSERT INTO dbo .models (language , model_name, model) VALUES (' Python' , ' rx_logistic_regression' , @model);
99
+ END ;
100
+ GO
101
+
102
+ -- ***************************************************************************************************
103
+ -- STEP 4 Execute the stored procedure that creates and saves the machine learning model in a table
104
+ -- ***************************************************************************************************
105
+
106
+ EXECUTE [dbo].[create_text_classification_model];
107
+ -- Take a look at the model object saved in the model table
108
+ SELECT * FROM dbo .models ;
109
+ GO
110
+
111
+ -- ******************************************************************************************************************
112
+ -- STEP 5 --Stored procedure that uses the model we just created to predict/classify the sentiment of product reviews
113
+ -- ******************************************************************************************************************
114
+ CREATE OR ALTER PROCEDURE [dbo].[predict_review_sentiment]
115
+ AS
116
+ BEGIN
117
+ -- text classifier for online review sentiment classification (Positive, Negative, Neutral)
118
+ DECLARE
119
+ @model_bin varbinary (max )
120
+ , @prediction_script nvarchar (max );
121
+
122
+ -- Select the model binary object from the model table
123
+ SET @model_bin = (select model from dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' );
124
+
125
+
126
+ -- The Python script we want to execute
127
+ SET @prediction_script = N'
128
+ from microsoftml import rx_predict
129
+ from revoscalepy import rx_data_step
130
+ import pickle
131
+
132
+ ## The input data from the query in @input_data_1 is populated in test_data
133
+ ## We are selecting 10% of the entire dataset for testing the model
134
+
135
+ ## Unserialize the model
136
+ model = pickle.loads(model_bin)
137
+
138
+ ## Use the rx_logistic_regression model
139
+ predictions = rx_predict(model = model, data = test_data, extra_vars_to_write = ["tag", "pr_review_content"], overwrite = True)
140
+
141
+ ## Converting to output data set
142
+ result = rx_data_step(predictions)
143
+ ' ;
144
+
145
+ EXECUTE sp_execute_external_script
146
+ @language = N ' Python'
147
+ , @script = @prediction_script
148
+ , @input_data_1 = N ' SELECT * FROM product_reviews_test_data'
149
+ , @input_data_1_name = N ' test_data'
150
+ , @output_data_1_name = N ' result'
151
+ , @params = N ' @model_bin varbinary(max)'
152
+ , @model_bin = @model_bin
153
+ WITH RESULT SETS ((" Review" NVARCHAR (MAX )," Tag" FLOAT , " Predicted_Score_Negative" FLOAT , " Predicted_Score_Neutral" FLOAT , " Predicted_Score_Positive" FLOAT ));
154
+ END
155
+ GO
156
+
157
+
158
+ -- ***************************************************************************************************
159
+ -- STEP 6 Execute the multi class prediction using the model we trained earlier
160
+ -- ***************************************************************************************************
161
+ EXECUTE [dbo].[predict_review_sentiment]
162
+ GO
163
+
164
+
165
+
166
+
167
+
0 commit comments