Skip to content

Commit e9fb32c

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent d25ccd8 commit e9fb32c

File tree

13 files changed

+212
-173
lines changed

13 files changed

+212
-173
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from summarize_gutenberg.cli import app
22

3-
app()
3+
app()

src/summarize_gutenberg/api.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from summarize_gutenberg.db import DB
66

7+
78
@dataclass
89
class Book:
910
id: int = field(default=None)
@@ -15,11 +16,11 @@ class Book:
1516
@classmethod
1617
def from_dict(cls, d):
1718
return Book(**d)
19+
1820
def to_dict(self):
1921
return asdict(self)
2022

2123

22-
2324
class BooksDB:
2425
def __init__(self, db_path):
2526
self._db_path = db_path
@@ -37,7 +38,7 @@ def get_book(self, book_id: int) -> Book:
3738
if db_item is not None:
3839
return Book.from_dict(db_item)
3940
# else:
40-
# raise InvalidBookId(book_id)
41+
# raise InvalidBookId(book_id)
4142

4243
def list_books(self):
4344
"""Return a list of books."""
@@ -59,7 +60,7 @@ def delete_book(self, book_id: int) -> None:
5960
"""Remove a book from db with given book_id."""
6061
self._db.delete(book_id)
6162
# except KeyError as exc:
62-
# raise InvalidBookId(book_id) from exc
63+
# raise InvalidBookId(book_id) from exc
6364

6465
def delete_all(self) -> None:
6566
"""Remove all books from db."""
@@ -69,4 +70,4 @@ def close(self):
6970
self._db.close()
7071

7172
def path(self):
72-
return self._db_path
73+
return self._db_path

src/summarize_gutenberg/cli.py

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import typer
33
from rich import print, box
4-
from rich.prompt import Prompt, IntPrompt, Confirm
4+
from rich.prompt import Prompt, IntPrompt
55
from rich.table import Table
66
from pathlib import Path
77
from contextlib import contextmanager
@@ -14,11 +14,13 @@
1414
FILE_DIR = Path("./files/")
1515
SUMMARY_DIR = FILE_DIR / "summaries"
1616

17+
1718
def dir_check():
1819
"""Make sure the directories for saving files exist"""
1920
FILE_DIR.mkdir(parents=True, exist_ok=True)
2021
SUMMARY_DIR.mkdir(parents=True, exist_ok=True)
2122

23+
2224
dir_check()
2325

2426
app = typer.Typer()
@@ -33,7 +35,8 @@ def get_default_books():
3335
for book in books:
3436
with books_db() as db:
3537
db.add_book(Book.from_dict(books[book]))
36-
38+
39+
3740
@app.command()
3841
def default():
3942
"""
@@ -47,28 +50,30 @@ def default():
4750
get_default_books()
4851

4952
table = Table(box=box.SQUARE_DOUBLE_HEAD, border_style="magenta")
50-
table.add_column('No.')
51-
table.add_column('[bold cyan]Title', max_width=75, no_wrap=False)
52-
table.add_column('[bold magenta]Author')
53-
table.add_column('[bold yellow]Fulltext URL')
53+
table.add_column("No.")
54+
table.add_column("[bold cyan]Title", max_width=75, no_wrap=False)
55+
table.add_column("[bold magenta]Author")
56+
table.add_column("[bold yellow]Fulltext URL")
5457

5558
with books_db() as db:
5659
books = db.list_books()
5760
for order_num, book in enumerate(books, start=1):
58-
table.add_row(f'{str(order_num)}.', book.title, book.author, f"[yellow]{book.url}")
61+
table.add_row(f"{str(order_num)}.", book.title, book.author, f"[yellow]{book.url}")
5962
order_num += 1
60-
print('\n')
63+
print("\n")
6164
print(table)
62-
print('\n')
65+
print("\n")
6366

6467
max_choice = len(books)
6568
choice = Prompt.ask("Select a book by number")
6669
while not choice.isdigit() or int(choice) < 1 or int(choice) > max_choice:
6770
choice = Prompt.ask("[red]Please choose a number between 1 and 32")
6871

6972
selected_book = books[int(choice) - 1]
70-
71-
print(f"\nYou have chosen [bold cyan]{selected_book.title}[/bold cyan] by [bold magenta]{selected_book.author}[/bold magenta].")
73+
74+
print(
75+
f"\nYou have chosen [bold cyan]{selected_book.title}[/bold cyan] by [bold magenta]{selected_book.author}[/bold magenta]."
76+
)
7277
filepath = FILE_DIR / Path(selected_book.filename)
7378

7479
if filepath.exists():
@@ -78,23 +83,24 @@ def default():
7883
write_text_to_file(selected_book.url, filepath)
7984
print(f"\nText of {selected_book.title} saved to {filepath}.")
8085

81-
choice = Prompt.ask("\nDo you want to [P]rint or [S]ave your summary?", choices=['p', 's'])
86+
choice = Prompt.ask("\nDo you want to [P]rint or [S]ave your summary?", choices=["p", "s"])
8287
chunks = IntPrompt.ask("How many lines per chunk?", default=400)
8388

8489
# if chunks < 50:
8590
# print("[red bold]Warning[/red bold]: choosing a low value could take a lot of time and resources.")
8691
# confirmation = Confirm.ask("Are you sure?")
87-
88-
if choice == 'p':
92+
93+
if choice == "p":
8994
print_summary(filepath, chunks)
9095
else:
9196
target_filepath = SUMMARY_DIR / Path(selected_book.filename)
9297
save_summary(filepath, target_filepath, chunks)
93-
print(f'\nSummary saved to {target_filepath}.')
98+
print(f"\nSummary saved to {target_filepath}.")
9499

95100
with books_db() as db:
96101
db.delete_all()
97102

103+
98104
def get_path():
99105
db_path_env = os.getenv("BOOKS_DB_DIR", "")
100106
if db_path_env:
@@ -103,6 +109,7 @@ def get_path():
103109
db_path = Path(__file__).parent / "books_db"
104110
return db_path
105111

112+
106113
@contextmanager
107114
def books_db():
108115
db_path = get_path()
@@ -111,5 +118,3 @@ def books_db():
111118
yield db
112119
finally:
113120
db.close()
114-
115-

src/summarize_gutenberg/db.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33

44
class DB:
55
def __init__(self, db_path, db_file_prefix):
6-
self._db = tinydb.TinyDB(
7-
db_path / f"{db_file_prefix}.json", create_dirs=True
8-
)
6+
self._db = tinydb.TinyDB(db_path / f"{db_file_prefix}.json", create_dirs=True)
97

108
def create(self, item: dict) -> int:
119
id = self._db.insert(item)

src/summarize_gutenberg/get_books.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,14 @@ def url_check(formats):
7575

7676
return url
7777

78+
7879
def create_filename(title):
7980
"""
8081
Create a filename for the book from a shortened version of its title.
8182
"""
8283
res_list = []
83-
colons = {':', ';'}
84-
puncts = {',', ' ', '.', '—', '-', "'", '"'}
84+
colons = {":", ";"}
85+
puncts = {",", " ", ".", "—", "-", "'", '"'}
8586

8687
for char in title:
8788
if char in colons:
@@ -90,10 +91,11 @@ def create_filename(title):
9091
continue
9192
else:
9293
res_list.append(char.lower())
93-
94-
res_list.append('.txt')
9594

96-
return ''.join(res_list)
95+
res_list.append(".txt")
96+
97+
return "".join(res_list)
98+
9799

98100
def fetch_default_books():
99101
"""
@@ -123,7 +125,7 @@ def fetch_default_books():
123125

124126

125127
def process_books(books):
126-
"""
128+
"""
127129
Create a dictionary of fetched books where the key is a sequential number and the value is a dictionary of book info.
128130
"""
129131
book_data = {}
@@ -148,9 +150,10 @@ def process_books(books):
148150

149151
return book_data
150152

153+
151154
if __name__ == "__main__":
152-
books = process_books(fetch_default_books())
155+
books = process_books(fetch_default_books())
153156

154157
book_list = [book for book in books]
155158
for book in book_list:
156-
print(books[book]['filename'])
159+
print(books[book]["filename"])

src/summarize_gutenberg/get_text.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def is_valid_utf8(byte_sequence):
112112
except UnicodeDecodeError:
113113
return False
114114

115+
115116
def strip_headers(text):
116117
lines = text.splitlines()
117118
sep = os.linesep
@@ -159,6 +160,7 @@ def strip_headers(text):
159160

160161
return str(sep.join(out), encoding="utf-8")
161162

163+
162164
def write_text_to_file(url, file_path):
163165
text_request = requests.get(url, stream=True)
164166

@@ -168,7 +170,7 @@ def write_text_to_file(url, file_path):
168170
text_content = text_request.content
169171
cleaned_text = strip_headers(text_content)
170172

171-
with open(file_path, "w", encoding='utf-8') as file:
173+
with open(file_path, "w", encoding="utf-8") as file:
172174
file.write(cleaned_text)
173175

174-
return file_path
176+
return file_path

src/summarize_gutenberg/make_summary.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
tokenizer = AutoTokenizer.from_pretrained("pszemraj/pegasus-x-large-book-summary")
44
model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/pegasus-x-large-book-summary")
55

6+
67
def read_in_chunks(filepath, chunk_size=800):
7-
with open(filepath, 'r', encoding='utf-8') as file:
8-
current_chunk = []
9-
for line in file:
10-
cleaned_line = line.strip()
11-
current_chunk.append(cleaned_line)
12-
if len(current_chunk) == chunk_size:
13-
yield current_chunk
14-
current_chunk = []
15-
16-
if current_chunk:
8+
with open(filepath, "r", encoding="utf-8") as file:
9+
current_chunk = []
10+
for line in file:
11+
cleaned_line = line.strip()
12+
current_chunk.append(cleaned_line)
13+
if len(current_chunk) == chunk_size:
1714
yield current_chunk
15+
current_chunk = []
16+
17+
if current_chunk:
18+
yield current_chunk
19+
1820

1921
def make_summary(chunk):
2022
inputs = tokenizer.encode(chunk, return_tensors="pt", truncation=True)
@@ -25,13 +27,12 @@ def make_summary(chunk):
2527
return summary
2628

2729

28-
2930
def save_summary(source, target, chunk_size):
30-
with open(target,'w') as target:
31+
with open(target, "w") as target:
3132
for chunk in read_in_chunks(source, chunk_size=chunk_size):
32-
target.write(make_summary(' '.join(chunk)))
33+
target.write(make_summary(" ".join(chunk)))
34+
3335

3436
def print_summary(source, chunk_size):
3537
for chunk in read_in_chunks(source, chunk_size=chunk_size):
36-
print(make_summary(' '.join(chunk)))
37-
38+
print(make_summary(" ".join(chunk)))

tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from summarize_gutenberg.api import Book
88

9+
910
@pytest.fixture()
1011
def book_fixture():
1112
"""
@@ -19,4 +20,4 @@ def book_fixture():
1920
filename="yesterdaystomorrows.txt",
2021
)
2122

22-
return book
23+
return book

tests/test_author_parse.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,24 @@
22
from summarize_gutenberg.get_books import author_parse
33

44
authors = [
5-
('Aristotle', 'Aristotle'), # single name
6-
('Austen, Jane', 'Jane Austen'), # first & last
7-
('Stevenson, Robert Louis', 'Robert Louis Stevenson'), # first last middle
8-
('Chesterton, G. K. (Gilbert Keith)', 'G. K. Chesterton'), # parenthetical
9-
('H. D. (Hilda Doolittle)', 'H. D.'), # irregular parenthetical
10-
('Tolkien, J. R. R. (John Ronald Reuel)', 'J. R. R. Tolkien'), # parenthetical with three initials
11-
('Von Arnim, Elizabeth', 'Elizabeth Von Arnim'), # von
12-
('Sanchez, Nellie Van de Grift', 'Nellie Van de Grift Sanchez'), # van
13-
('Martinez de la Torre, Rafael', 'Rafael Martinez de la Torre'), # de la
14-
('Cervantes Saavedra, Miguel de', 'Miguel de Cervantes Saavedra'), # de
15-
('Alger, Horatio, Jr.', 'Horatio Alger Jr.'), # jr
16-
(None, '') # none
5+
("Aristotle", "Aristotle"), # single name
6+
("Austen, Jane", "Jane Austen"), # first & last
7+
("Stevenson, Robert Louis", "Robert Louis Stevenson"), # first last middle
8+
("Chesterton, G. K. (Gilbert Keith)", "G. K. Chesterton"), # parenthetical
9+
("H. D. (Hilda Doolittle)", "H. D."), # irregular parenthetical
10+
(
11+
"Tolkien, J. R. R. (John Ronald Reuel)",
12+
"J. R. R. Tolkien",
13+
), # parenthetical with three initials
14+
("Von Arnim, Elizabeth", "Elizabeth Von Arnim"), # von
15+
("Sanchez, Nellie Van de Grift", "Nellie Van de Grift Sanchez"), # van
16+
("Martinez de la Torre, Rafael", "Rafael Martinez de la Torre"), # de la
17+
("Cervantes Saavedra, Miguel de", "Miguel de Cervantes Saavedra"), # de
18+
("Alger, Horatio, Jr.", "Horatio Alger Jr."), # jr
19+
(None, ""), # none
1720
]
1821

19-
@pytest.mark.parametrize('input, expected', authors)
22+
23+
@pytest.mark.parametrize("input, expected", authors)
2024
def test_author_parse(input, expected):
21-
assert author_parse(input) == expected, f'Expected {expected}, but got {author_parse(input)}'
25+
assert author_parse(input) == expected, f"Expected {expected}, but got {author_parse(input)}"

0 commit comments

Comments
 (0)