Skip to content

Commit 5ae06bf

Browse files
authored
Merge pull request #8 from ashiq-km/yml_workflow
Yml workflow
2 parents 43b48b9 + edd2fe6 commit 5ae06bf

File tree

4 files changed

+31
-13
lines changed

4 files changed

+31
-13
lines changed

app/fastapi_app.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# We will create an API that has one job: take a word. look up the math, and return the similar words.
1+
# We will create an API that has one job:
2+
# take a word. look up the math, and return the similar words.
23

34

45
from contextlib import asynccontextmanager
@@ -22,7 +23,8 @@ async def lifespan(app: FastAPI):
2223
print("Loading model...")
2324
if not config.MODEL_FILE.exists():
2425

25-
# In Production, you might want to download the model from S3 / DVC here
26+
# In Production, you might want to
27+
# download the model from S3 / DVC here
2628
raise FileNotFoundError("Model file not found. Run training first.")
2729

2830
# Load the full model
@@ -51,7 +53,10 @@ async def lifespan(app: FastAPI):
5153

5254
@app.get("/")
5355
def home():
54-
return {"message": "Welcome to the Godfather API. Go to the /docs for testing."}
56+
return {
57+
"message": "Welcome to the Godfather API. \
58+
Go to the /docs for testing."
59+
}
5560

5661

5762
@app.get("/similar/{word}")
@@ -94,7 +99,9 @@ def get_similarity(w1: str, w2: str):
9499

95100
if w1 not in model_wv or w2 not in model_wv:
96101
raise HTTPException(
97-
status_code=404, detail="One of the words is missing from the vocabulary."
102+
status_code=404,
103+
detail="One of the words is \
104+
missing from the vocabulary.",
98105
)
99106

100107
score = model_wv.similarity(w1, w2)

app/streamlit_app.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
st.title("🌹 The Godfather: Word Embeddings")
1616
st.markdown(
1717
"""
18-
Explore semantic relationships in the Godfather novel using AI.
18+
Explore semantic relationships in the Godfather novel using AI.
1919
🔍 Find similar words, perform analogies, and visualize relationships.
2020
"""
2121
)
@@ -88,7 +88,10 @@ def download_and_load_model():
8888
for w, score in similar:
8989
st.progress(score, text=f"{w} ({score:.2f})")
9090
else:
91-
st.warning(f"⚠️ The word '{word_input}' is not in the vocabulary.")
91+
st.warning(
92+
f"⚠️ The word '{word_input}' \
93+
is not in the vocabulary."
94+
)
9295

9396
# --- TAB 2: ANALOGIES ---
9497
with tab2:
@@ -129,9 +132,9 @@ def download_and_load_model():
129132
st.sidebar.header("Godfather AI Controls")
130133
st.sidebar.markdown(
131134
"""
132-
- Use tabs to explore embeddings
133-
- Input words for similarity or analogies
134-
- Model automatically downloads if missing
135+
- Use tabs to explore embeddings
136+
- Input words for similarity or analogies
137+
- Model automatically downloads if missing
135138
- Works offline after first run
136139
"""
137140
)

src/preprocess.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@ def get_text_from_pdf(pdf_path):
2424

2525
reader = pypdf.PdfReader(str(pdf_path))
2626

27-
for page in tqdm(reader.pages, desc=f"Reading {pdf_path.name}", leave=False):
27+
for page in tqdm(
28+
reader.pages,
29+
desc=f"Reading \
30+
{pdf_path.name}",
31+
leave=False,
32+
):
2833
page_text = page.extract_text()
2934

3035
if page_text:
@@ -64,7 +69,8 @@ def main():
6469

6570
all_sentences = []
6671

67-
for pdf_file in tqdm(config.RAW_DATA_FILES, desc="Processing PDFs", leave=False):
72+
for pdf_file in tqdm(config.RAW_DATA_FILES,
73+
desc="Processing PDFs", leave=False):
6874
raw_text = get_text_from_pdf(pdf_file)
6975
sentences = clean_tokenize(raw_text)
7076
all_sentences.extend(sentences)
@@ -76,7 +82,8 @@ def main():
7682
print(f"Saving to {config.PROCESSED_DATA_FILE}...")
7783

7884
with open(config.PROCESSED_DATA_FILE, "w", encoding="utf-8") as f:
79-
for sentence in tqdm(all_sentences, desc="Writing sentences", leave=False):
85+
for sentence in tqdm(all_sentences,
86+
desc="Writing sentences", leave=False):
8087
f.write(" ".join(sentence) + "\n")
8188

8289
print("Preprocessing complete!")

src/train.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def train_model():
2525
# Check if data exists
2626
if not config.PROCESSED_DATA_FILE.exists():
2727
raise FileNotFoundError(
28-
f"Processed data not found at {config.PROCESSED_DATA_FILE}. Run preprocess.py first."
28+
f"Processed data not found at {config.PROCESSED_DATA_FILE}. \
29+
Run preprocess.py first."
2930
)
3031

3132
# Load sentences using LineSentence (memory efficient)

0 commit comments

Comments
 (0)