Fix linter errors

bzaczynski · bzaczynski · commit 99cd4674d9ce · 2020-10-26T13:49:36.000+01:00
diff --git a/nlp-sentiment-analysis/README.md b/nlp-sentiment-analysis/README.md
@@ -1,4 +1,38 @@
 # Use Sentiment Analysis With Python to Classify Reviews
-This folder contains resources and materials for Real Python's Use Sentiment Analysis With Python to Classify Reviews tutorial.
 
-`sentiment_analyzer.py` contains a fully built sentiment analyzer as used in the project section of the tutorial. To get the sentiment of a review of your choosing, replace the text in the `TEST_REVIEW` variable.
+Resources and materials for Real Python's [Use Sentiment Analysis With Python to Classify Reviews](https://realpython.com/use-sentiment-analysis-python-classify-movie-reviews/) tutorial.
+
+## Installation
+
+Create and activate a new virtual environment:
+
+```shell
+$ python -m venv .venv
+$ source .venv/bin/activate
+```
+
+Install Python dependencies into the active virtual environment:
+
+```shell
+(.venv) $ python -m pip install -r requirements.txt
+```
+
+Download English model for spaCy:
+
+```shell
+(.venv) $ python -m spacy download en_core_web_sm
+```
+
+Download and extract the [Large Movie Review Dataset](https://ai.stanford.edu/~amaas/data/sentiment/) compiled by [Andrew Maas](http://www.andrew-maas.net/):
+
+```shell
+$ curl -s https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz | tar xvz
+```
+
+## Usage
+
+Get the sentiment of a movie review stored in the `TEST_REVIEW` variable:
+
+```shell
+(.venv) $ python sentiment_analyzer.py
+```
diff --git a/nlp-sentiment-analysis/sentiment_analyzer.py b/nlp-sentiment-analysis/sentiment_analyzer.py
@@ -19,9 +19,7 @@
 
 
 def train_model(
-    training_data: list,
-    test_data: list,
-    iterations: int = 20
+    training_data: list, test_data: list, iterations: int = 20
 ) -> None:
     # Build pipeline
     nlp = spacy.load("en_core_web_sm")
@@ -60,7 +58,7 @@ def train_model(
                 evaluation_results = evaluate_model(
                     tokenizer=nlp.tokenizer,
                     textcat=textcat,
-                    test_data=test_data
+                    test_data=test_data,
                 )
                 print(
                     f"{loss['textcat']}\t{evaluation_results['precision']}"
@@ -73,9 +71,7 @@ def train_model(
         nlp.to_disk("model_artifacts")
 
 
-def evaluate_model(
-    tokenizer, textcat, test_data: list
-) -> dict:
+def evaluate_model(tokenizer, textcat, test_data: list) -> dict:
     reviews, labels = zip(*test_data)
     reviews = (tokenizer(review) for review in reviews)
     true_positives = 0
@@ -87,9 +83,7 @@ def evaluate_model(
         for predicted_label, score in review.cats.items():
             # Every cats dictionary includes both labels, you can get all
             # the info you need with just the pos label
-            if (
-                predicted_label == "neg"
-            ):
+            if predicted_label == "neg":
                 continue
             if score >= 0.5 and true_label == "pos":
                 true_positives += 1
@@ -128,9 +122,7 @@ def test_model(input_data: str = TEST_REVIEW):
 
 
 def load_training_data(
-    data_directory: str = "aclImdb/train",
-    split: float = 0.8,
-    limit: int = 0
+    data_directory: str = "aclImdb/train", split: float = 0.8, limit: int = 0
 ) -> tuple:
     # Load from files
     reviews = []
@@ -145,7 +137,8 @@ def load_training_data(
                         spacy_label = {
                             "cats": {
                                 "pos": "pos" == label,
-                                "neg": "neg" == label}
+                                "neg": "neg" == label,
+                            }
                         }
                         reviews.append((text, spacy_label))
     random.shuffle(reviews)