Skip to content

Commit 0d8ee70

Browse files
authored
Merge pull request #320 from NelGson/master
Added sentiment analysis script - ML services Python
2 parents e2a931b + a851aa7 commit 0d8ee70

File tree

3 files changed

+292
-0
lines changed

3 files changed

+292
-0
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
To install the pretrained model in SQL Server, open an elevated CMD promtp:
3+
1. Navigate to the SQL Server installation path:
4+
C:\<SQL SERVER Installation path>\Microsoft SQL Server\140\Setup Bootstrap\SQL2017\x64
5+
2. Run the following command:
6+
RSetup.exe /install /component MLM /<version>/language 1033 /destdir <SQL_DB_instance_folder>\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs
7+
Example:
8+
RSetup.exe /install /component MLM /version 9.2.0.24 /language 1033 /destdir "C:\Program Files\Microsoft SQL Server\MSSQL14.MSSQLSERVER\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs"
9+
The models will be downloaded and extracted.
10+
*/
11+
12+
13+
USE [tpcxbb_1gb]
14+
GO
15+
16+
--******************************************************************************************************************
17+
-- STEP 1 Stored procedure that uses a pretrained model to determine sentiment of a text, such as a product review
18+
--******************************************************************************************************************
19+
CREATE OR ALTER PROCEDURE [dbo].[get_sentiment]
20+
(@text NVARCHAR(MAX))
21+
AS
22+
BEGIN
23+
DECLARE @script nvarchar(max);
24+
25+
--Check that text is not empty
26+
IF NULLIF(@text, '') is null
27+
BEGIN
28+
THROW 50001, 'Please specify a text value to be analyzed.', 1;
29+
RETURN
30+
END
31+
32+
33+
--The Python script we want to execute
34+
SET @script = N'
35+
import pandas as p
36+
from microsoftml import rx_featurize, get_sentiment
37+
38+
analyze_this = text
39+
40+
# Create the data
41+
text_to_analyze = p.DataFrame(data=dict(Text=[analyze_this]))
42+
43+
# Get the sentiment scores
44+
sentiment_scores = rx_featurize(data=text_to_analyze,ml_transforms=[get_sentiment(cols=dict(scores="Text"))])
45+
46+
# Lets translate the score to something more meaningful
47+
sentiment_scores["Sentiment"] = sentiment_scores.scores.apply(lambda score: "Positive" if score > 0.6 else "Negative")
48+
';
49+
50+
EXECUTE sp_execute_external_script
51+
@language = N'Python'
52+
, @script = @script
53+
, @output_data_1_name = N'sentiment_scores'
54+
, @params = N'@text nvarchar(max)'
55+
, @text = @text
56+
WITH RESULT SETS (("Text" NVARCHAR(MAX),"Score" FLOAT, "Sentiment" NVARCHAR(30)));
57+
58+
END
59+
60+
GO
61+
62+
--******************************************************************************************************************
63+
-- STEP 2 Execute the stored procedure to get sentiment of your own text
64+
--The below examples test a negative and a positive review text
65+
--******************************************************************************************************************
66+
-- Negative review
67+
EXECUTE [dbo].[get_sentiment] N'These are not a normal stress reliever. First of all, they got sticky, hairy and dirty on the first day I received them. Second, they arrived with tiny wrinkles in their bodies and they were cold. Third, their paint started coming off. Fourth when they finally warmed up they started to stick together. Last, I thought they would be foam but, they are a sticky rubber. If these were not rubber, this review would not be so bad.';
68+
GO
69+
70+
--Positive review
71+
EXECUTE [dbo].[get_sentiment] N'These are the cutest things ever!! Super fun to play with and the best part is that it lasts for a really long time. So far these have been thrown all over the place with so many of my friends asking to borrow them because they are so fun to play with. Super soft and squishy just the perfect toy for all ages.'
72+
GO
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
To install the pretrained model in SQL Server, open an elevated CMD promtp:
3+
1. Navigate to the SQL Server installation path:
4+
C:\<SQL SERVER Installation path>\Microsoft SQL Server\140\Setup Bootstrap\SQL2017\x64
5+
2. Run the following command:
6+
RSetup.exe /install /component MLM /<version>/language 1033 /destdir <SQL_DB_instance_folder>\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs
7+
Example:
8+
RSetup.exe /install /component MLM /version 9.2.0.24 /language 1033 /destdir "C:\Program Files\Microsoft SQL Server\MSSQL14.MSSQLSERVER\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs"
9+
The models will be downloaded and extracted.
10+
The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
11+
*/
12+
13+
14+
USE [tpcxbb_1gb]
15+
GO
16+
17+
--******************************************************************************************************************
18+
-- STEP 1 Stored procedure that uses a pretrained model to determine sentiment of a text, such as a product review
19+
--******************************************************************************************************************
20+
CREATE OR ALTER PROCEDURE [dbo].[get_review_sentiment]
21+
AS
22+
BEGIN
23+
DECLARE @script nvarchar(max);
24+
25+
--The Python script we want to execute
26+
SET @script = N'
27+
from microsoftml import rx_featurize, get_sentiment
28+
29+
# Get the sentiment scores
30+
sentiment_scores = rx_featurize(data=reviews, ml_transforms=[get_sentiment(cols=dict(scores="review"))])
31+
32+
# Lets translate the score to something more meaningful
33+
sentiment_scores["Sentiment"] = sentiment_scores.scores.apply(lambda score: "Positive" if score > 0.6 else "Negative")
34+
';
35+
36+
EXECUTE sp_execute_external_script
37+
@language = N'Python'
38+
, @script = @script
39+
, @input_data_1 = N'SELECT CAST(pr_review_content AS NVARCHAR(4000)) AS review FROM product_reviews'
40+
, @input_data_1_name = N'reviews'
41+
, @output_data_1_name = N'sentiment_scores'
42+
WITH RESULT SETS (("Review" NVARCHAR(MAX),"Score" FLOAT, "Sentiment" NVARCHAR(30)));
43+
44+
END
45+
46+
GO
47+
48+
--******************************************************************************************************************
49+
-- STEP 2 Execute the stored procedure
50+
--******************************************************************************************************************
51+
EXECUTE [dbo].[get_review_sentiment];
52+
GO
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
--The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
2+
USE [tpcxbb_1gb]
3+
GO
4+
5+
--**************************************************************
6+
-- STEP 1 Create a table for storing the machine learning model
7+
--**************************************************************
8+
DROP TABLE IF EXISTS [dbo].[models]
9+
GO
10+
CREATE TABLE [dbo].[models](
11+
[language] [varchar](30) NOT NULL,
12+
[model_name] [varchar](30) NOT NULL,
13+
[model] [varbinary](max) NOT NULL,
14+
[create_time] [datetime2](7) NULL DEFAULT (sysdatetime()),
15+
[created_by] [nvarchar](500) NULL DEFAULT (suser_sname()),
16+
PRIMARY KEY CLUSTERED
17+
(
18+
[language],
19+
[model_name]
20+
)
21+
)
22+
GO
23+
24+
--*************************************************************************************************************
25+
-- STEP 2 Look at the dataset we will use in this sample
26+
-- Tag is a label indicating the sentiment of a review. These are actual values we will use to train the model
27+
-- For training purposes, we will use 90% percent of the data.
28+
-- For testing / scoring purposes, we will use 10% percent of the data.
29+
--*************************************************************************************************************
30+
CREATE OR ALTER VIEW product_reviews_training_data
31+
AS
32+
SELECT TOP(CAST( ( SELECT COUNT(*) FROM product_reviews)*.9 AS INT))
33+
CAST(pr_review_content AS NVARCHAR(4000)) AS pr_review_content,
34+
CASE
35+
WHEN pr_review_rating <3 THEN 1
36+
WHEN pr_review_rating =3 THEN 2
37+
ELSE 3
38+
END AS tag
39+
FROM product_reviews;
40+
GO
41+
42+
CREATE OR ALTER VIEW product_reviews_test_data
43+
AS
44+
SELECT TOP(CAST( ( SELECT COUNT(*) FROM product_reviews)*.1 AS INT))
45+
CAST(pr_review_content AS NVARCHAR(4000)) AS pr_review_content,
46+
CASE
47+
WHEN pr_review_rating <3 THEN 1
48+
WHEN pr_review_rating =3 THEN 2
49+
ELSE 3
50+
END AS tag
51+
FROM product_reviews;
52+
GO
53+
54+
-- Look at the dataset we will use in this sample
55+
SELECT TOP(100) * FROM product_reviews_training_data;
56+
GO
57+
58+
--***************************************************************************************************
59+
-- STEP 3 Create a stored procedure for training a
60+
-- text classifier model for product review sentiment classification (Positive, Negative, Neutral)
61+
-- 1 = Negative, 2 = Neutral, 3 = Positive
62+
--***************************************************************************************************
63+
CREATE OR ALTER PROCEDURE [dbo].[create_text_classification_model]
64+
AS
65+
BEGIN
66+
DECLARE @model varbinary(max)
67+
, @train_script nvarchar(max);
68+
69+
--The Python script we want to execute
70+
SET @train_script = N'
71+
##Import necessary packages
72+
from microsoftml import rx_logistic_regression,featurize_text, n_gram
73+
import pickle
74+
75+
## Defining the tag column as a categorical type
76+
training_data["tag"] = training_data["tag"].astype("category")
77+
78+
## Create a machine learning model for multiclass text classification.
79+
## We are using a text featurizer function to split the text in features of 2-word chunks
80+
model = rx_logistic_regression(formula = "tag ~ features", data = training_data, method = "multiClass", ml_transforms=[
81+
featurize_text(language="English",
82+
cols=dict(features="pr_review_content"),
83+
word_feature_extractor=n_gram(2, weighting="TfIdf"))])
84+
85+
## Serialize the model so that we can store it in a table
86+
modelbin = pickle.dumps(model)
87+
';
88+
89+
EXECUTE sp_execute_external_script
90+
@language = N'Python'
91+
, @script = @train_script
92+
, @input_data_1 = N'SELECT * FROM product_reviews_training_data'
93+
, @input_data_1_name = N'training_data'
94+
, @params = N'@modelbin varbinary(max) OUTPUT'
95+
, @modelbin = @model OUTPUT;
96+
97+
--Save model to DB Table
98+
DELETE FROM dbo.models WHERE model_name = 'rx_logistic_regression' and language = 'Python';
99+
INSERT INTO dbo.models (language, model_name, model) VALUES('Python', 'rx_logistic_regression', @model);
100+
END;
101+
GO
102+
103+
--***************************************************************************************************
104+
-- STEP 4 Execute the stored procedure that creates and saves the machine learning model in a table
105+
--***************************************************************************************************
106+
107+
EXECUTE [dbo].[create_text_classification_model];
108+
--Take a look at the model object saved in the model table
109+
SELECT * FROM dbo.models;
110+
GO
111+
112+
--******************************************************************************************************************
113+
-- STEP 5 --Stored procedure that uses the model we just created to predict/classify the sentiment of product reviews
114+
--******************************************************************************************************************
115+
CREATE OR ALTER PROCEDURE [dbo].[predict_review_sentiment]
116+
AS
117+
BEGIN
118+
-- text classifier for online review sentiment classification (Positive, Negative, Neutral)
119+
DECLARE
120+
@model_bin varbinary(max)
121+
, @prediction_script nvarchar(max);
122+
123+
-- Select the model binary object from the model table
124+
SET @model_bin = (select model from dbo.models WHERE model_name = 'rx_logistic_regression' and language = 'Python');
125+
126+
127+
--The Python script we want to execute
128+
SET @prediction_script = N'
129+
from microsoftml import rx_predict
130+
from revoscalepy import rx_data_step
131+
import pickle
132+
133+
## The input data from the query in @input_data_1 is populated in test_data
134+
## We are selecting 10% of the entire dataset for testing the model
135+
136+
## Unserialize the model
137+
model = pickle.loads(model_bin)
138+
139+
## Use the rx_logistic_regression model
140+
predictions = rx_predict(model = model, data = test_data, extra_vars_to_write = ["tag", "pr_review_content"], overwrite = True)
141+
142+
## Converting to output data set
143+
result = rx_data_step(predictions)
144+
';
145+
146+
EXECUTE sp_execute_external_script
147+
@language = N'Python'
148+
, @script = @prediction_script
149+
, @input_data_1 = N'SELECT * FROM product_reviews_test_data'
150+
, @input_data_1_name = N'test_data'
151+
, @output_data_1_name = N'result'
152+
, @params = N'@model_bin varbinary(max)'
153+
, @model_bin = @model_bin
154+
WITH RESULT SETS (("Review" NVARCHAR(MAX),"Tag" FLOAT, "Predicted_Score_Negative" FLOAT, "Predicted_Score_Neutral" FLOAT, "Predicted_Score_Positive" FLOAT));
155+
END
156+
GO
157+
158+
159+
--***************************************************************************************************
160+
-- STEP 6 Execute the multi class prediction using the model we trained earlier
161+
--***************************************************************************************************
162+
EXECUTE [dbo].[predict_review_sentiment]
163+
GO
164+
165+
166+
167+
168+

0 commit comments

Comments
 (0)