jwjacobson · pre-commit-ci · Sep 1, 2025 · Sep 1, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 repos:
 - repo: https://github.com/astral-sh/ruff-pre-commit
   # Ruff version.
-  rev: v0.4.1
+  rev: v0.12.11
   hooks:
     # Run the linter.
     - id: ruff

diff --git a/src/summarize_gutenberg/__main__.py b/src/summarize_gutenberg/__main__.py
@@ -1,3 +1,3 @@
 from summarize_gutenberg.cli import app
 
-app()
+app()
diff --git a/src/summarize_gutenberg/api.py b/src/summarize_gutenberg/api.py
@@ -4,6 +4,7 @@
 
 from summarize_gutenberg.db import DB
 
+
 @dataclass
 class Book:
     id: int = field(default=None)
@@ -15,11 +16,11 @@ class Book:
     @classmethod
     def from_dict(cls, d):
         return Book(**d)
+
     def to_dict(self):
         return asdict(self)
 
 
-
 class BooksDB:
     def __init__(self, db_path):
         self._db_path = db_path
@@ -37,7 +38,7 @@ def get_book(self, book_id: int) -> Book:
         if db_item is not None:
             return Book.from_dict(db_item)
         # else:
-            # raise InvalidBookId(book_id)
+        # raise InvalidBookId(book_id)
 
     def list_books(self):
         """Return a list of books."""
@@ -59,7 +60,7 @@ def delete_book(self, book_id: int) -> None:
         """Remove a book from db with given book_id."""
         self._db.delete(book_id)
         # except KeyError as exc:
-            # raise InvalidBookId(book_id) from exc
+        # raise InvalidBookId(book_id) from exc
 
     def delete_all(self) -> None:
         """Remove all books from db."""
@@ -69,4 +70,4 @@ def close(self):
         self._db.close()
 
     def path(self):
-        return self._db_path
+        return self._db_path
diff --git a/src/summarize_gutenberg/cli.py b/src/summarize_gutenberg/cli.py
@@ -1,7 +1,7 @@
 import os
 import typer
 from rich import print, box
-from rich.prompt import Prompt, IntPrompt, Confirm
+from rich.prompt import Prompt, IntPrompt
 from rich.table import Table
 from pathlib import Path
 from contextlib import contextmanager
@@ -14,11 +14,13 @@
 FILE_DIR = Path("./files/")
 SUMMARY_DIR = FILE_DIR / "summaries"
 
+
 def dir_check():
     """Make sure the directories for saving files exist"""
     FILE_DIR.mkdir(parents=True, exist_ok=True)
     SUMMARY_DIR.mkdir(parents=True, exist_ok=True)
 
+
 dir_check()
 
 app = typer.Typer()
@@ -33,7 +35,8 @@ def get_default_books():
     for book in books:
         with books_db() as db:
             db.add_book(Book.from_dict(books[book]))
-
+
+
 @app.command()
 def default():
     """
@@ -47,28 +50,30 @@ def default():
     get_default_books()
 
     table = Table(box=box.SQUARE_DOUBLE_HEAD, border_style="magenta")
-    table.add_column('No.')
-    table.add_column('[bold cyan]Title', max_width=75, no_wrap=False)
-    table.add_column('[bold magenta]Author')
-    table.add_column('[bold yellow]Fulltext URL')
+    table.add_column("No.")
+    table.add_column("[bold cyan]Title", max_width=75, no_wrap=False)
+    table.add_column("[bold magenta]Author")
+    table.add_column("[bold yellow]Fulltext URL")
 
     with books_db() as db:
         books = db.list_books()
         for order_num, book in enumerate(books, start=1):
-            table.add_row(f'{str(order_num)}.', book.title, book.author, f"[yellow]{book.url}")
+            table.add_row(f"{str(order_num)}.", book.title, book.author, f"[yellow]{book.url}")
             order_num += 1
-    print('\n')
+    print("\n")
     print(table)
-    print('\n')
+    print("\n")
 
     max_choice = len(books)
     choice = Prompt.ask("Select a book by number")
     while not choice.isdigit() or int(choice) < 1 or int(choice) > max_choice:
         choice = Prompt.ask("[red]Please choose a number between 1 and 32")
 
     selected_book = books[int(choice) - 1]
-
-    print(f"\nYou have chosen [bold cyan]{selected_book.title}[/bold cyan] by [bold magenta]{selected_book.author}[/bold magenta].")
+
+    print(
+        f"\nYou have chosen [bold cyan]{selected_book.title}[/bold cyan] by [bold magenta]{selected_book.author}[/bold magenta]."
+    )
     filepath = FILE_DIR / Path(selected_book.filename)
 
     if filepath.exists():
@@ -78,23 +83,24 @@ def default():
         write_text_to_file(selected_book.url, filepath)
         print(f"\nText of {selected_book.title} saved to {filepath}.")
 
-    choice = Prompt.ask("\nDo you want to [P]rint or [S]ave your summary?", choices=['p', 's'])
+    choice = Prompt.ask("\nDo you want to [P]rint or [S]ave your summary?", choices=["p", "s"])
     chunks = IntPrompt.ask("How many lines per chunk?", default=400)
 
     # if chunks < 50:
     #     print("[red bold]Warning[/red bold]: choosing a low value could take a lot of time and resources.")
     #     confirmation = Confirm.ask("Are you sure?")
-        
-    if choice == 'p':
+
+    if choice == "p":
         print_summary(filepath, chunks)
     else:
         target_filepath = SUMMARY_DIR / Path(selected_book.filename)
         save_summary(filepath, target_filepath, chunks)
-        print(f'\nSummary saved to {target_filepath}.')
+        print(f"\nSummary saved to {target_filepath}.")
 
     with books_db() as db:
         db.delete_all()
 
+
 def get_path():
     db_path_env = os.getenv("BOOKS_DB_DIR", "")
     if db_path_env:
@@ -103,6 +109,7 @@ def get_path():
         db_path = Path(__file__).parent / "books_db"
     return db_path
 
+
 @contextmanager
 def books_db():
     db_path = get_path()
@@ -111,5 +118,3 @@ def books_db():
         yield db
     finally:
         db.close()
-
-
diff --git a/src/summarize_gutenberg/db.py b/src/summarize_gutenberg/db.py
@@ -3,9 +3,7 @@
 
 class DB:
     def __init__(self, db_path, db_file_prefix):
-        self._db = tinydb.TinyDB(
-            db_path / f"{db_file_prefix}.json", create_dirs=True
-        )
+        self._db = tinydb.TinyDB(db_path / f"{db_file_prefix}.json", create_dirs=True)
 
     def create(self, item: dict) -> int:
         id = self._db.insert(item)

diff --git a/src/summarize_gutenberg/get_books.py b/src/summarize_gutenberg/get_books.py
@@ -75,13 +75,14 @@ def url_check(formats):
 
     return url
 
+
 def create_filename(title):
     """
     Create a filename for the book from a shortened version of its title.
     """
     res_list = []
-    colons = {':', ';'}
-    puncts = {',', ' ', '.', '—', '-', "'", '"'}
+    colons = {":", ";"}
+    puncts = {",", " ", ".", "—", "-", "'", '"'}
 
     for char in title:
         if char in colons:
@@ -90,10 +91,11 @@ def create_filename(title):
             continue
         else:
             res_list.append(char.lower())
-
-    res_list.append('.txt')
 
-    return ''.join(res_list)    
+    res_list.append(".txt")
+
+    return "".join(res_list)
+
 
 def fetch_default_books():
     """
@@ -123,7 +125,7 @@ def fetch_default_books():
 
 
 def process_books(books):
-    """ 
+    """
     Create a dictionary of fetched books where the key is a sequential number and the value is a dictionary of book info.
     """
     book_data = {}
@@ -148,9 +150,10 @@ def process_books(books):
 
     return book_data
 
+
 if __name__ == "__main__":
-    books = process_books(fetch_default_books()) 
+    books = process_books(fetch_default_books())
 
     book_list = [book for book in books]
     for book in book_list:
-        print(books[book]['filename'])
+        print(books[book]["filename"])
diff --git a/src/summarize_gutenberg/get_text.py b/src/summarize_gutenberg/get_text.py
@@ -112,6 +112,7 @@ def is_valid_utf8(byte_sequence):
     except UnicodeDecodeError:
         return False
 
+
 def strip_headers(text):
     lines = text.splitlines()
     sep = os.linesep
@@ -159,6 +160,7 @@ def strip_headers(text):
 
     return str(sep.join(out), encoding="utf-8")
 
+
 def write_text_to_file(url, file_path):
     text_request = requests.get(url, stream=True)
 
@@ -168,7 +170,7 @@ def write_text_to_file(url, file_path):
     text_content = text_request.content
     cleaned_text = strip_headers(text_content)
 
-    with open(file_path, "w", encoding='utf-8') as file:
+    with open(file_path, "w", encoding="utf-8") as file:
         file.write(cleaned_text)
 
-    return file_path
+    return file_path
diff --git a/src/summarize_gutenberg/make_summary.py b/src/summarize_gutenberg/make_summary.py
@@ -3,18 +3,20 @@
 tokenizer = AutoTokenizer.from_pretrained("pszemraj/pegasus-x-large-book-summary")
 model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/pegasus-x-large-book-summary")
 
+
 def read_in_chunks(filepath, chunk_size=800):
-    with open(filepath, 'r', encoding='utf-8') as file:
-            current_chunk = []
-            for line in file:
-                cleaned_line = line.strip()
-                current_chunk.append(cleaned_line)
-                if len(current_chunk) == chunk_size:
-                    yield current_chunk
-                    current_chunk = []  
-
-            if current_chunk:
+    with open(filepath, "r", encoding="utf-8") as file:
+        current_chunk = []
+        for line in file:
+            cleaned_line = line.strip()
+            current_chunk.append(cleaned_line)
+            if len(current_chunk) == chunk_size:
                 yield current_chunk
+                current_chunk = []
+
+        if current_chunk:
+            yield current_chunk
+
 
 def make_summary(chunk):
     inputs = tokenizer.encode(chunk, return_tensors="pt", truncation=True)
@@ -25,13 +27,12 @@ def make_summary(chunk):
     return summary
 
 
-
 def save_summary(source, target, chunk_size):
-    with open(target,'w') as target:  
+    with open(target, "w") as target:
         for chunk in read_in_chunks(source, chunk_size=chunk_size):
-            target.write(make_summary(' '.join(chunk)))
+            target.write(make_summary(" ".join(chunk)))
+
 
 def print_summary(source, chunk_size):
     for chunk in read_in_chunks(source, chunk_size=chunk_size):
-        print(make_summary(' '.join(chunk)))
-
+        print(make_summary(" ".join(chunk)))
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,6 +6,7 @@
 
 from summarize_gutenberg.api import Book
 
+
 @pytest.fixture()
 def book_fixture():
     """
@@ -19,4 +20,4 @@ def book_fixture():
         filename="yesterdaystomorrows.txt",
     )
 
-    return book
+    return book
diff --git a/tests/test_author_parse.py b/tests/test_author_parse.py
@@ -2,20 +2,24 @@
 from summarize_gutenberg.get_books import author_parse
 
 authors = [
-('Aristotle', 'Aristotle'), # single name
-('Austen, Jane', 'Jane Austen'), # first & last
-('Stevenson, Robert Louis', 'Robert Louis Stevenson'), # first last middle
-('Chesterton, G. K. (Gilbert Keith)', 'G. K. Chesterton'), # parenthetical
-('H. D. (Hilda Doolittle)', 'H. D.'), # irregular parenthetical
-('Tolkien, J. R. R. (John Ronald Reuel)', 'J. R. R. Tolkien'), # parenthetical with three initials
-('Von Arnim, Elizabeth', 'Elizabeth Von Arnim'), # von
-('Sanchez, Nellie Van de Grift', 'Nellie Van de Grift Sanchez'), # van
-('Martinez de la Torre, Rafael', 'Rafael Martinez de la Torre'), # de la
-('Cervantes Saavedra, Miguel de', 'Miguel de Cervantes Saavedra'), # de
-('Alger, Horatio, Jr.', 'Horatio Alger Jr.'), # jr
-(None, '') # none
+    ("Aristotle", "Aristotle"),  # single name
+    ("Austen, Jane", "Jane Austen"),  # first & last
+    ("Stevenson, Robert Louis", "Robert Louis Stevenson"),  # first last middle
+    ("Chesterton, G. K. (Gilbert Keith)", "G. K. Chesterton"),  # parenthetical
+    ("H. D. (Hilda Doolittle)", "H. D."),  # irregular parenthetical
+    (
+        "Tolkien, J. R. R. (John Ronald Reuel)",
+        "J. R. R. Tolkien",
+    ),  # parenthetical with three initials
+    ("Von Arnim, Elizabeth", "Elizabeth Von Arnim"),  # von
+    ("Sanchez, Nellie Van de Grift", "Nellie Van de Grift Sanchez"),  # van
+    ("Martinez de la Torre, Rafael", "Rafael Martinez de la Torre"),  # de la
+    ("Cervantes Saavedra, Miguel de", "Miguel de Cervantes Saavedra"),  # de
+    ("Alger, Horatio, Jr.", "Horatio Alger Jr."),  # jr
+    (None, ""),  # none
 ]
 
-@pytest.mark.parametrize('input, expected', authors)
+
+@pytest.mark.parametrize("input, expected", authors)
 def test_author_parse(input, expected):
-    assert author_parse(input) == expected, f'Expected {expected}, but got {author_parse(input)}'
+    assert author_parse(input) == expected, f"Expected {expected}, but got {author_parse(input)}"