1+ -- The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
2+ USE [tpcxbb_1gb]
3+ GO
4+
5+ -- **************************************************************
6+ -- STEP 1 Create a table for storing the machine learning model
7+ -- **************************************************************
8+ DROP TABLE IF EXISTS [dbo].[models]
9+ GO
10+ CREATE TABLE [dbo].[models](
11+ [language] [varchar](30 ) NOT NULL ,
12+ [model_name] [varchar](30 ) NOT NULL ,
13+ [model] [varbinary](max ) NOT NULL ,
14+ [create_time] [datetime2](7 ) NULL DEFAULT (sysdatetime ()),
15+ [created_by] [nvarchar](500 ) NULL DEFAULT (suser_sname ()),
16+ PRIMARY KEY CLUSTERED
17+ (
18+ [language],
19+ [model_name]
20+ )
21+ )
22+ GO
23+
24+ -- *************************************************************************************************************
25+ -- STEP 2 Look at the dataset we will use in this sample
26+ -- Tag is a label indicating the sentiment of a review. These are actual values we will use to train the model
27+ -- For training purposes, we will use 90% percent of the data.
28+ -- For testing / scoring purposes, we will use 10% percent of the data.
29+ -- *************************************************************************************************************
30+ CREATE OR ALTER VIEW product_reviews_training_data
31+ AS
32+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .9 AS INT ))
33+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
34+ CASE
35+ WHEN pr_review_rating < 3 THEN 1
36+ WHEN pr_review_rating = 3 THEN 2
37+ ELSE 3
38+ END AS tag
39+ FROM product_reviews;
40+ GO
41+
42+ CREATE OR ALTER VIEW product_reviews_test_data
43+ AS
44+ SELECT TOP (CAST ( ( SELECT COUNT (* ) FROM product_reviews)* .1 AS INT ))
45+ CAST (pr_review_content AS NVARCHAR (4000 )) AS pr_review_content,
46+ CASE
47+ WHEN pr_review_rating < 3 THEN 1
48+ WHEN pr_review_rating = 3 THEN 2
49+ ELSE 3
50+ END AS tag
51+ FROM product_reviews;
52+ GO
53+
54+ -- Look at the dataset we will use in this sample
55+ SELECT TOP (100 ) * FROM product_reviews_training_data;
56+ GO
57+
58+ -- ***************************************************************************************************
59+ -- STEP 3 Create a stored procedure for training a
60+ -- text classifier model for product review sentiment classification (Positive, Negative, Neutral)
61+ -- 1 = Negative, 2 = Neutral, 3 = Positive
62+ -- ***************************************************************************************************
63+ CREATE OR ALTER PROCEDURE [dbo].[create_text_classification_model]
64+ AS
65+ BEGIN
66+ DECLARE @model varbinary (max )
67+ , @train_script nvarchar (max );
68+
69+ -- The Python script we want to execute
70+ SET @train_script = N'
71+ ##Import necessary packages
72+ from microsoftml import rx_logistic_regression,featurize_text, n_gram
73+ import pickle
74+
75+ ## Defining the tag column as a categorical type
76+ training_data["tag"] = training_data["tag"].astype("category")
77+
78+ ## Create a machine learning model for multiclass text classification.
79+ ## We are using a text featurizer function to split the text in features of 2-word chunks
80+ model = rx_logistic_regression(formula = "tag ~ features", data = training_data, method = "multiClass", ml_transforms=[
81+ featurize_text(language="English",
82+ cols=dict(features="pr_review_content"),
83+ word_feature_extractor=n_gram(2, weighting="TfIdf"))])
84+
85+ ## Serialize the model so that we can store it in a table
86+ modelbin = pickle.dumps(model)
87+ ' ;
88+
89+ EXECUTE sp_execute_external_script
90+ @language = N ' Python'
91+ , @script = @train_script
92+ , @input_data_1 = N ' SELECT * FROM product_reviews_training_data'
93+ , @input_data_1_name = N ' training_data'
94+ , @params = N ' @modelbin varbinary(max) OUTPUT'
95+ , @modelbin = @model OUTPUT ;
96+
97+ -- Save model to DB Table
98+ DELETE FROM dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' ;
99+ INSERT INTO dbo .models (language , model_name, model) VALUES (' Python' , ' rx_logistic_regression' , @model);
100+ END ;
101+ GO
102+
103+ -- ***************************************************************************************************
104+ -- STEP 4 Execute the stored procedure that creates and saves the machine learning model in a table
105+ -- ***************************************************************************************************
106+
107+ EXECUTE [dbo].[create_text_classification_model];
108+ -- Take a look at the model object saved in the model table
109+ SELECT * FROM dbo .models ;
110+ GO
111+
112+ -- ******************************************************************************************************************
113+ -- STEP 5 --Stored procedure that uses the model we just created to predict/classify the sentiment of product reviews
114+ -- ******************************************************************************************************************
115+ CREATE OR ALTER PROCEDURE [dbo].[predict_review_sentiment]
116+ AS
117+ BEGIN
118+ -- text classifier for online review sentiment classification (Positive, Negative, Neutral)
119+ DECLARE
120+ @model_bin varbinary (max )
121+ , @prediction_script nvarchar (max );
122+
123+ -- Select the model binary object from the model table
124+ SET @model_bin = (select model from dbo .models WHERE model_name = ' rx_logistic_regression' and language = ' Python' );
125+
126+
127+ -- The Python script we want to execute
128+ SET @prediction_script = N'
129+ from microsoftml import rx_predict
130+ from revoscalepy import rx_data_step
131+ import pickle
132+
133+ ## The input data from the query in @input_data_1 is populated in test_data
134+ ## We are selecting 10% of the entire dataset for testing the model
135+
136+ ## Unserialize the model
137+ model = pickle.loads(model_bin)
138+
139+ ## Use the rx_logistic_regression model
140+ predictions = rx_predict(model = model, data = test_data, extra_vars_to_write = ["tag", "pr_review_content"], overwrite = True)
141+
142+ ## Converting to output data set
143+ result = rx_data_step(predictions)
144+ ' ;
145+
146+ EXECUTE sp_execute_external_script
147+ @language = N ' Python'
148+ , @script = @prediction_script
149+ , @input_data_1 = N ' SELECT * FROM product_reviews_test_data'
150+ , @input_data_1_name = N ' test_data'
151+ , @output_data_1_name = N ' result'
152+ , @params = N ' @model_bin varbinary(max)'
153+ , @model_bin = @model_bin
154+ WITH RESULT SETS ((" Review" NVARCHAR (MAX )," Tag" FLOAT , " Predicted_Score_Negative" FLOAT , " Predicted_Score_Neutral" FLOAT , " Predicted_Score_Positive" FLOAT ));
155+ END
156+ GO
157+
158+
159+ -- ***************************************************************************************************
160+ -- STEP 6 Execute the multi class prediction using the model we trained earlier
161+ -- ***************************************************************************************************
162+ EXECUTE [dbo].[predict_review_sentiment]
163+ GO
164+
165+
166+
167+
168+
0 commit comments