Merge pull request #320 from NelGson/master

uc-msft · web-flow · commit 0d8ee70374f5 · 2017-10-30T22:05:20.000-07:00
Added sentiment analysis script - ML services Python
diff --git a/samples/features/machine-learning-services/python/sentiment-analysis/pretrainedmodel_sentiment_analysis.sql b/samples/features/machine-learning-services/python/sentiment-analysis/pretrainedmodel_sentiment_analysis.sql
@@ -0,0 +1,72 @@
+/*
+To install the pretrained model in SQL Server, open an elevated CMD promtp:
+1. Navigate to the SQL Server installation path: 
+C:\<SQL SERVER Installation path>\Microsoft SQL Server\140\Setup Bootstrap\SQL2017\x64
+2. Run the following command: 
+RSetup.exe /install /component MLM /<version>/language 1033 /destdir <SQL_DB_instance_folder>\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs
+Example:
+RSetup.exe /install /component MLM /version 9.2.0.24 /language 1033 /destdir "C:\Program Files\Microsoft SQL Server\MSSQL14.MSSQLSERVER\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs"
+The models will be downloaded and extracted.
+*/
+
+
+USE [tpcxbb_1gb]
+GO
+
+--******************************************************************************************************************
+-- STEP 1 Stored procedure that uses a pretrained model to determine sentiment of a text, such as a product review
+--******************************************************************************************************************
+CREATE OR ALTER PROCEDURE [dbo].[get_sentiment] 
+(@text NVARCHAR(MAX))
+AS
+BEGIN
+	DECLARE	@script nvarchar(max);
+
+--Check that text is not empty
+IF NULLIF(@text, '') is null 
+BEGIN
+THROW 50001, 'Please specify a text value to be analyzed.', 1; 
+RETURN
+END	
+
+
+	--The Python script we want to execute
+	SET @script = N'
+import pandas as p
+from microsoftml import rx_featurize, get_sentiment
+
+analyze_this = text
+
+# Create the data
+text_to_analyze = p.DataFrame(data=dict(Text=[analyze_this]))
+
+# Get the sentiment scores
+sentiment_scores = rx_featurize(data=text_to_analyze,ml_transforms=[get_sentiment(cols=dict(scores="Text"))])
+
+# Lets translate the score to something more meaningful
+sentiment_scores["Sentiment"] = sentiment_scores.scores.apply(lambda score: "Positive" if score > 0.6 else "Negative")
+';
+	
+	EXECUTE sp_execute_external_script
+				@language = N'Python'
+				, @script = @script
+				, @output_data_1_name = N'sentiment_scores'
+				, @params = N'@text nvarchar(max)'
+				, @text = @text
+				WITH RESULT SETS (("Text" NVARCHAR(MAX),"Score" FLOAT, "Sentiment" NVARCHAR(30)));			
+
+END
+				  
+GO
+
+--******************************************************************************************************************
+-- STEP 2 Execute the stored procedure to get sentiment of your own text
+--The below examples test a negative and a positive review text
+--******************************************************************************************************************
+-- Negative review
+EXECUTE [dbo].[get_sentiment] N'These are not a normal stress reliever. First of all, they got sticky, hairy and dirty on the first day I received them. Second, they arrived with tiny wrinkles in their bodies and they were cold. Third, their paint started coming off. Fourth when they finally warmed up they started to stick together. Last, I thought they would be foam but, they are a sticky rubber. If these were not rubber, this review would not be so bad.';
+GO
+
+--Positive review
+EXECUTE [dbo].[get_sentiment] N'These are the cutest things ever!! Super fun to play with and the best part is that it lasts for a really long time. So far these have been thrown all over the place with so many of my friends asking to borrow them because they are so fun to play with. Super soft and squishy just the perfect toy for all ages.'
+GO
diff --git a/samples/features/machine-learning-services/python/sentiment-analysis/product-review-pretrained-model-sentiment-analysis.sql b/samples/features/machine-learning-services/python/sentiment-analysis/product-review-pretrained-model-sentiment-analysis.sql
@@ -0,0 +1,52 @@
+/*
+To install the pretrained model in SQL Server, open an elevated CMD promtp:
+1. Navigate to the SQL Server installation path: 
+C:\<SQL SERVER Installation path>\Microsoft SQL Server\140\Setup Bootstrap\SQL2017\x64
+2. Run the following command: 
+RSetup.exe /install /component MLM /<version>/language 1033 /destdir <SQL_DB_instance_folder>\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs
+Example:
+RSetup.exe /install /component MLM /version 9.2.0.24 /language 1033 /destdir "C:\Program Files\Microsoft SQL Server\MSSQL14.MSSQLSERVER\PYTHON_SERVICES\Lib\site-packages\microsoftml\mxLibs"
+The models will be downloaded and extracted.
+The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
+*/
+
+
+USE [tpcxbb_1gb]
+GO
+
+--******************************************************************************************************************
+-- STEP 1 Stored procedure that uses a pretrained model to determine sentiment of a text, such as a product review
+--******************************************************************************************************************
+CREATE OR ALTER PROCEDURE [dbo].[get_review_sentiment]
+AS
+BEGIN
+	DECLARE	@script nvarchar(max);
+	
+	--The Python script we want to execute
+	SET @script = N'
+from microsoftml import rx_featurize, get_sentiment
+
+# Get the sentiment scores
+sentiment_scores = rx_featurize(data=reviews, ml_transforms=[get_sentiment(cols=dict(scores="review"))])
+
+# Lets translate the score to something more meaningful
+sentiment_scores["Sentiment"] = sentiment_scores.scores.apply(lambda score: "Positive" if score > 0.6 else "Negative")
+';
+	
+	EXECUTE sp_execute_external_script
+				@language = N'Python'
+				, @script = @script
+				, @input_data_1 = N'SELECT CAST(pr_review_content AS NVARCHAR(4000)) AS review FROM product_reviews'
+				, @input_data_1_name = N'reviews'
+				, @output_data_1_name = N'sentiment_scores'
+				WITH RESULT SETS (("Review" NVARCHAR(MAX),"Score" FLOAT, "Sentiment" NVARCHAR(30)));			
+
+END
+				  
+GO
+
+--******************************************************************************************************************
+-- STEP 2 Execute the stored procedure
+--******************************************************************************************************************
+EXECUTE [dbo].[get_review_sentiment];
+GO
diff --git a/samples/features/machine-learning-services/python/sentiment-analysis/product-review-text-classification.sql b/samples/features/machine-learning-services/python/sentiment-analysis/product-review-text-classification.sql
@@ -0,0 +1,168 @@
+--The database used for this sample can be downloaded here: https://sqlchoice.blob.core.windows.net/sqlchoice/static/tpcxbb_1gb.bak
+USE [tpcxbb_1gb]
+GO
+
+--**************************************************************
+-- STEP 1 Create a table for storing the machine learning model
+--**************************************************************
+DROP TABLE IF EXISTS [dbo].[models]
+GO
+CREATE TABLE [dbo].[models](
+	[language] [varchar](30) NOT NULL,
+	[model_name] [varchar](30) NOT NULL,
+	[model] [varbinary](max) NOT NULL,
+	[create_time] [datetime2](7) NULL DEFAULT (sysdatetime()),
+	[created_by] [nvarchar](500) NULL DEFAULT (suser_sname()),
+	PRIMARY KEY CLUSTERED 
+	(
+	[language],
+	[model_name]
+	)
+)
+GO
+
+--*************************************************************************************************************
+-- STEP 2 Look at the dataset we will use in this sample
+-- Tag is a label indicating the sentiment of a review. These are actual values we will use to train the model
+-- For training purposes, we will use 90% percent of the data.
+-- For testing / scoring purposes, we will use 10% percent of the data.
+--*************************************************************************************************************
+CREATE OR ALTER VIEW product_reviews_training_data
+AS
+SELECT TOP(CAST( ( SELECT COUNT(*) FROM   product_reviews)*.9 AS INT))
+		CAST(pr_review_content AS NVARCHAR(4000)) AS pr_review_content,
+		CASE 
+			WHEN pr_review_rating <3 THEN 1 
+			WHEN pr_review_rating =3 THEN 2 
+			ELSE 3 
+		END AS tag 
+FROM   product_reviews;
+GO
+
+CREATE OR ALTER VIEW product_reviews_test_data
+AS
+SELECT TOP(CAST( ( SELECT COUNT(*) FROM   product_reviews)*.1 AS INT))
+		CAST(pr_review_content AS NVARCHAR(4000)) AS pr_review_content,
+		CASE 
+			WHEN pr_review_rating <3 THEN 1 
+			WHEN pr_review_rating =3 THEN 2 
+			ELSE 3 
+		END AS tag 
+FROM   product_reviews;
+GO
+
+-- Look at the dataset we will use in this sample
+SELECT TOP(100) * FROM product_reviews_training_data;
+GO
+
+--***************************************************************************************************
+-- STEP 3 Create a stored procedure for training a
+-- text classifier model for product review sentiment classification (Positive, Negative, Neutral)
+-- 1 = Negative, 2 = Neutral, 3 = Positive
+--***************************************************************************************************
+CREATE OR ALTER PROCEDURE [dbo].[create_text_classification_model]
+AS
+BEGIN
+	DECLARE	  @model varbinary(max)
+			, @train_script nvarchar(max);
+	
+--The Python script we want to execute
+	SET @train_script = N'
+##Import necessary packages
+from microsoftml import rx_logistic_regression,featurize_text, n_gram
+import pickle
+
+## Defining the tag column as a categorical type
+training_data["tag"] = training_data["tag"].astype("category")
+
+## Create a machine learning model for multiclass text classification. 
+## We are using a text featurizer function to split the text in features of 2-word chunks
+model = rx_logistic_regression(formula = "tag ~ features", data = training_data, method = "multiClass", ml_transforms=[
+                        featurize_text(language="English",
+                                     cols=dict(features="pr_review_content"),
+                                      word_feature_extractor=n_gram(2, weighting="TfIdf"))])
+
+## Serialize the model so that we can store it in a table
+modelbin = pickle.dumps(model)
+';
+	
+	EXECUTE sp_execute_external_script
+						@language = N'Python'
+					  , @script = @train_script
+					  , @input_data_1 = N'SELECT * FROM product_reviews_training_data'
+					  , @input_data_1_name = N'training_data'
+					  , @params  = N'@modelbin varbinary(max) OUTPUT' 
+					  , @modelbin = @model OUTPUT;
+
+	--Save model to DB Table				  
+	DELETE FROM dbo.models WHERE model_name = 'rx_logistic_regression' and language = 'Python';
+	INSERT INTO dbo.models (language, model_name, model) VALUES('Python', 'rx_logistic_regression', @model);
+END;
+GO
+
+--***************************************************************************************************
+-- STEP 4 Execute the stored procedure that creates and saves the machine learning model in a table
+--***************************************************************************************************
+
+EXECUTE [dbo].[create_text_classification_model];
+--Take a look at the model object saved in the model table
+SELECT * FROM dbo.models;
+GO
+
+--******************************************************************************************************************
+-- STEP 5 --Stored procedure that uses the model we just created to predict/classify the sentiment of product reviews
+--******************************************************************************************************************
+CREATE OR ALTER PROCEDURE [dbo].[predict_review_sentiment]
+AS
+BEGIN
+	-- text classifier for online review sentiment classification (Positive, Negative, Neutral)
+	DECLARE	
+			 @model_bin varbinary(max)
+			, @prediction_script nvarchar(max);
+	
+	-- Select the model binary object from the model table
+	SET @model_bin = (select model from dbo.models WHERE model_name = 'rx_logistic_regression' and language = 'Python');
+	
+
+	--The Python script we want to execute
+	SET @prediction_script = N'
+from microsoftml import rx_predict
+from revoscalepy import rx_data_step 
+import pickle
+
+## The input data from the query in  @input_data_1 is populated in test_data
+## We are selecting 10% of the entire dataset for testing the model
+
+## Unserialize the model
+model = pickle.loads(model_bin)
+
+## Use the rx_logistic_regression model 
+predictions = rx_predict(model = model, data = test_data, extra_vars_to_write = ["tag", "pr_review_content"], overwrite = True)
+
+## Converting to output data set
+result = rx_data_step(predictions)
+';
+	
+	EXECUTE sp_execute_external_script
+				@language = N'Python'
+				, @script = @prediction_script
+				, @input_data_1 = N'SELECT * FROM product_reviews_test_data'
+				, @input_data_1_name = N'test_data'
+				, @output_data_1_name = N'result'
+				, @params  = N'@model_bin varbinary(max)'
+				, @model_bin = @model_bin
+		WITH RESULT SETS (("Review" NVARCHAR(MAX),"Tag" FLOAT, "Predicted_Score_Negative" FLOAT, "Predicted_Score_Neutral" FLOAT, "Predicted_Score_Positive" FLOAT));			
+END
+GO
+
+
+--***************************************************************************************************
+-- STEP 6 Execute the multi class prediction using the model we trained earlier
+--***************************************************************************************************
+EXECUTE [dbo].[predict_review_sentiment] 
+GO
+
+
+
+
+