Skip to content

Commit 41a10fc

Browse files
authored
Merge pull request #12 from THEGOLDENPRO/v1.4
V1.4
2 parents 6965f76 + a11f219 commit 41a10fc

File tree

15 files changed

+394
-150
lines changed

15 files changed

+394
-150
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,6 @@ build
55
aghpb_api.egg-info
66
.vscode
77
git_repo
8-
.ruff_cache
8+
.ruff_cache
9+
results.prof
10+
books_cache.json

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ USER root
55
WORKDIR /app
66

77
COPY /api ./api
8-
COPY requirements.txt .
8+
COPY pyproject.toml .
99
COPY Makefile .
1010

1111
RUN apt-get update && apt-get install -y git make
@@ -14,7 +14,7 @@ RUN mkdir assets
1414
RUN make pull-repo
1515
RUN cd ./assets/git_repo && git config features.manyFiles 1
1616

17-
RUN pip install -r requirements.txt
17+
RUN pip install .
1818

1919
EXPOSE 8000
2020
ENV LISTEN_PORT = 8000

Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
build:
2-
pip install -r requirements.txt
2+
pip install . -U
33

44
run:
55
uvicorn api.main:app --reload
@@ -10,6 +10,13 @@ test:
1010
pull-repo:
1111
git clone https://github.com/cat-milk/Anime-Girls-Holding-Programming-Books ./assets/git_repo
1212

13+
update-repo:
14+
cd ./assets/git_repo && git pull
15+
16+
bench-test:
17+
python scripts/bench_book_load.py
18+
snakeviz results.prof
19+
1320
docker-build:
1421
python scripts/docker_build.py
1522

api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.3.6"
1+
__version__ = "1.4"

api/anime_girls.py

Lines changed: 117 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -1,129 +1,34 @@
11
from __future__ import annotations
2-
from typing_extensions import List, TypedDict, final
2+
from typing import TYPE_CHECKING
3+
from typing_extensions import List, Tuple
4+
5+
if TYPE_CHECKING:
6+
from typing import Dict
7+
from .book import BookData
38

4-
import os
59
import sys
10+
import json
611
import random
712
import subprocess
813
from pathlib import Path
914
from datetime import datetime
10-
from devgoldyutils import Colours
11-
from dataclasses import dataclass, field
12-
from fastapi.responses import FileResponse
15+
from devgoldyutils import Colours, shorter_path
1316

17+
from .book import Book
1418
from .errors import APIException
19+
from .constants import GIT_REPO_PATH, EXCLUDED_FILES, ALLOWED_FILE_EXTENSIONS
1520

16-
EXCLUDED_DIRS = [".git"]
17-
EXCLUDED_FILES = [".DS_Store"]
18-
GIT_REPO_PATH = "./assets/git_repo"
19-
GIT_REPO_URL = "https://github.com/cat-milk/Anime-Girls-Holding-Programming-Books"
20-
21-
@final
22-
class BookDict(TypedDict):
23-
search_id: str
24-
name: str
25-
category: str
26-
date_added: str
27-
commit_url: str
28-
commit_author: str
29-
30-
@dataclass
31-
class Book:
32-
path: str = field(repr=False)
33-
search_id: str
34-
35-
name: str = field(init=False)
36-
category: str = field(init=False)
37-
location: str = field(init=False, repr=False)
38-
date_added: datetime = field(init=False)
39-
commit_url: str = field(init=False)
40-
commit_author: str = field(init=False)
41-
42-
def __post_init__(self):
43-
file_name = os.path.split(self.path)[1]
44-
45-
self.name = file_name.split(".")[0].replace("_", " ").capitalize()
46-
self.category = Path(self.path).parent.name
47-
48-
git_path = f"/{self.category}/{file_name}"
49-
50-
# I use git here to scrape the date the image was added to the repo.
51-
args = [f'cd {GIT_REPO_PATH} && git log --diff-filter=A -- "{f"./{git_path}"}"']
52-
53-
if sys.platform == "win32":
54-
args = ["cd", GIT_REPO_PATH, "&&", "git", "log", "--diff-filter=A", "--", f"./{git_path}"]
55-
56-
p = subprocess.Popen(
57-
args,
58-
stdout = subprocess.PIPE,
59-
shell = True
60-
)
61-
output, _ = p.communicate()
62-
git_log = output.decode()
63-
64-
self.commit_author = git_log.splitlines()[1].split('Author: ')[1].split("<")[0][:-1]
65-
self.commit_url = GIT_REPO_URL + f"/commit/{git_log.splitlines()[0].split('commit ')[1]}"
66-
self.date_added = datetime.strptime((git_log.splitlines()[2]), "Date: %a %b %d %H:%M:%S %Y %z")
67-
68-
self.location = "/git_repo" + git_path
69-
70-
def to_dict(self) -> BookDict:
71-
return {
72-
"search_id": self.search_id,
73-
"name": self.name,
74-
"category": self.category,
75-
"date_added": str(self.date_added),
76-
"commit_url": self.commit_url,
77-
"commit_author": self.commit_author
78-
}
79-
80-
def to_file_response(self) -> FileResponse:
81-
"""Returns file response object."""
82-
try: # Testing to see if the author name can encode. If not just set it as null.
83-
self.commit_author.encode("latin-1")
84-
except UnicodeEncodeError as e:
85-
self.commit_author = "null"
86-
print(e)
87-
88-
return FileResponse(
89-
self.path,
90-
headers = {
91-
"Book-Name": self.name,
92-
"Book-Category": self.category,
93-
"Book-Search-ID": self.search_id,
94-
"Book-Date-Added": str(self.date_added),
95-
"Book-Commit-URL": self.commit_url,
96-
"Book-Commit-Author": self.commit_author,
97-
"Last-Modified": str(self.date_added),
98-
99-
"Pragma": "no-cache",
100-
"Expires": "0",
101-
"Cache-Control": "no-cache, no-store, must-revalidate, public, max-age=0"
102-
}
103-
)
21+
__all__ = (
22+
"ProgrammingBooks",
23+
)
10424

105-
class AGHPB():
106-
"""Interface to the anime girls holding programming books directory."""
25+
class ProgrammingBooks():
26+
"""A class for interfacing with the anime girls holding programming books repo."""
10727
def __init__(self) -> None:
108-
self.books: List[Book] = []
109-
self.categories = [x for x in os.listdir(GIT_REPO_PATH) if os.path.isdir(f"{GIT_REPO_PATH}/{x}") and not x in EXCLUDED_DIRS]
110-
111-
print(Colours.ORANGE.apply("Loading books..."))
112-
113-
_id = 0
114-
for category in self.categories:
28+
self._repo_path = Path(GIT_REPO_PATH)
11529

116-
for book in os.listdir(f"{GIT_REPO_PATH}/{category}"):
117-
if book in EXCLUDED_FILES:
118-
continue
119-
120-
book = Book(f"{GIT_REPO_PATH}/{category}/{book}", str(_id))
121-
self.books.append(book)
122-
123-
sys.stdout.write(f"Book '{Colours.PINK_GREY.apply(book.category)} - {Colours.BLUE.apply(book.name)}' added!\n")
124-
_id += 1
125-
126-
print(Colours.GREEN.apply("[Done!]"))
30+
self.__update_repo()
31+
self.books, self.categories = self.__phrase_books()
12732

12833
def random_category(self) -> str:
12934
return random.choice(self.categories)
@@ -141,6 +46,105 @@ def random_book(self, category: str) -> Book:
14146

14247
return random.choice([book for book in self.books if book.category == actual_category])
14348

49+
def __update_repo(self):
50+
print(
51+
Colours.CLAY.apply(f"Attempting to update git repo at '{self._repo_path}'...")
52+
)
53+
54+
process = subprocess.Popen(
55+
["git", "pull"],
56+
text = True,
57+
stdout = subprocess.PIPE,
58+
cwd = self._repo_path
59+
)
60+
61+
process.wait()
62+
output, _ = process.communicate()
63+
64+
if not process.returncode == 0:
65+
print(Colours.RED.apply("Git errored!!!"))
66+
67+
print("Git Output: " + output)
68+
69+
def __phrase_books(self) -> Tuple[List[Book], List[str]]:
70+
books = []
71+
categories = []
72+
73+
file_count = "???"
74+
75+
print(Colours.ORANGE.apply("Loading books..."))
76+
77+
if sys.platform == "linux": # NOTE: Only works on Linux.
78+
file_count = subprocess.check_output(f'find "{self._repo_path.absolute()}" | wc -l', shell = True, text = True)[:-1]
79+
80+
cached_books = self.__get_cache()
81+
82+
search_id = 0
83+
84+
for index, file in enumerate(self._repo_path.rglob("*")):
85+
86+
if file.suffix not in ALLOWED_FILE_EXTENSIONS: # also excludes folders.
87+
continue
88+
89+
if file.name in EXCLUDED_FILES:
90+
sys.stdout.write(f"Ignoring the file '{Colours.GREY.apply(file.name)}'...\n")
91+
continue
92+
93+
cached_book = cached_books.get(str(file))
94+
95+
add_msg = f"{Colours.GREY.apply(f'({index}/{file_count})')} Adding book from '{Colours.PINK_GREY.apply(shorter_path(file))}'...\n"
96+
sys.stdout.write(Colours.BLUE.apply("[CACHED] ") + add_msg if cached_book is not None else add_msg)
97+
98+
if cached_book is not None:
99+
book = Book(
100+
file,
101+
str(search_id),
102+
name = cached_book["name"],
103+
category = cached_book["category"],
104+
date_added = datetime.fromisoformat(cached_book["date_added"]),
105+
commit_url = cached_book["commit_url"],
106+
commit_author = cached_book["commit_author"],
107+
commit_hash = cached_book["commit_hash"]
108+
)
109+
110+
else:
111+
book = Book(file, str(search_id))
112+
cached_books[str(file)] = book.to_dict()
113+
114+
if file.parent.name not in categories:
115+
categories.append(file.parent.name)
116+
117+
books.append(book)
118+
search_id += 1
119+
120+
self.__set_cache(cached_books)
121+
122+
print(Colours.GREEN.apply("[Done!]"))
123+
return books, categories
124+
125+
def __get_cache(self) -> Dict[str, BookData]:
126+
cached_books = {}
127+
128+
books_cache_file = Path("./books_cache.json")
129+
130+
if books_cache_file.exists():
131+
132+
with books_cache_file.open() as file:
133+
cached_books = json.load(file)
134+
135+
else:
136+
137+
with books_cache_file.open("w") as file:
138+
print("Creating books cache file...")
139+
file.write("{}")
140+
141+
return cached_books
142+
143+
def __set_cache(self, data: Dict[str, BookData]) -> None:
144+
145+
with open("./books_cache.json", "w") as file:
146+
json.dump(data, file)
147+
144148

145149
class CategoryNotFound(APIException):
146150
def __init__(self, category: str) -> None:

0 commit comments

Comments
 (0)