Skip to content

Commit 99cd467

Browse files
committed
Fix linter errors
1 parent a4d5a40 commit 99cd467

File tree

2 files changed

+43
-16
lines changed

2 files changed

+43
-16
lines changed

nlp-sentiment-analysis/README.md

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,38 @@
11
# Use Sentiment Analysis With Python to Classify Reviews
2-
This folder contains resources and materials for Real Python's Use Sentiment Analysis With Python to Classify Reviews tutorial.
32

4-
`sentiment_analyzer.py` contains a fully built sentiment analyzer as used in the project section of the tutorial. To get the sentiment of a review of your choosing, replace the text in the `TEST_REVIEW` variable.
3+
Resources and materials for Real Python's [Use Sentiment Analysis With Python to Classify Reviews](https://realpython.com/use-sentiment-analysis-python-classify-movie-reviews/) tutorial.
4+
5+
## Installation
6+
7+
Create and activate a new virtual environment:
8+
9+
```shell
10+
$ python -m venv .venv
11+
$ source .venv/bin/activate
12+
```
13+
14+
Install Python dependencies into the active virtual environment:
15+
16+
```shell
17+
(.venv) $ python -m pip install -r requirements.txt
18+
```
19+
20+
Download English model for spaCy:
21+
22+
```shell
23+
(.venv) $ python -m spacy download en_core_web_sm
24+
```
25+
26+
Download and extract the [Large Movie Review Dataset](https://ai.stanford.edu/~amaas/data/sentiment/) compiled by [Andrew Maas](http://www.andrew-maas.net/):
27+
28+
```shell
29+
$ curl -s https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz | tar xvz
30+
```
31+
32+
## Usage
33+
34+
Get the sentiment of a movie review stored in the `TEST_REVIEW` variable:
35+
36+
```shell
37+
(.venv) $ python sentiment_analyzer.py
38+
```

nlp-sentiment-analysis/sentiment_analyzer.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@
1919

2020

2121
def train_model(
22-
training_data: list,
23-
test_data: list,
24-
iterations: int = 20
22+
training_data: list, test_data: list, iterations: int = 20
2523
) -> None:
2624
# Build pipeline
2725
nlp = spacy.load("en_core_web_sm")
@@ -60,7 +58,7 @@ def train_model(
6058
evaluation_results = evaluate_model(
6159
tokenizer=nlp.tokenizer,
6260
textcat=textcat,
63-
test_data=test_data
61+
test_data=test_data,
6462
)
6563
print(
6664
f"{loss['textcat']}\t{evaluation_results['precision']}"
@@ -73,9 +71,7 @@ def train_model(
7371
nlp.to_disk("model_artifacts")
7472

7573

76-
def evaluate_model(
77-
tokenizer, textcat, test_data: list
78-
) -> dict:
74+
def evaluate_model(tokenizer, textcat, test_data: list) -> dict:
7975
reviews, labels = zip(*test_data)
8076
reviews = (tokenizer(review) for review in reviews)
8177
true_positives = 0
@@ -87,9 +83,7 @@ def evaluate_model(
8783
for predicted_label, score in review.cats.items():
8884
# Every cats dictionary includes both labels, you can get all
8985
# the info you need with just the pos label
90-
if (
91-
predicted_label == "neg"
92-
):
86+
if predicted_label == "neg":
9387
continue
9488
if score >= 0.5 and true_label == "pos":
9589
true_positives += 1
@@ -128,9 +122,7 @@ def test_model(input_data: str = TEST_REVIEW):
128122

129123

130124
def load_training_data(
131-
data_directory: str = "aclImdb/train",
132-
split: float = 0.8,
133-
limit: int = 0
125+
data_directory: str = "aclImdb/train", split: float = 0.8, limit: int = 0
134126
) -> tuple:
135127
# Load from files
136128
reviews = []
@@ -145,7 +137,8 @@ def load_training_data(
145137
spacy_label = {
146138
"cats": {
147139
"pos": "pos" == label,
148-
"neg": "neg" == label}
140+
"neg": "neg" == label,
141+
}
149142
}
150143
reviews.append((text, spacy_label))
151144
random.shuffle(reviews)

0 commit comments

Comments
 (0)