Skip to content

Commit bfd38ad

Browse files
committed
skills + technologies => only skills (no redundancy)
1 parent 63e61bd commit bfd38ad

File tree

16 files changed

+82
-108
lines changed

16 files changed

+82
-108
lines changed

backend/.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ REDIS_JOB_PREFIX=cv:job:
3939
REDIS_JOB_QUEUE=cv_processing_queue
4040
REDIS_CLEANUP_QUEUE=cv_cleanup_queue
4141
REDIS_JOB_TIMEOUT=1800
42-
REDIS_WORKER_TIMEOUT=30
42+
REDIS_WORKER_TIMEOUT=3
4343
REDIS_MAX_RETRIES=3
4444

4545
# -----------------------------------------------------------------------------

backend/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ No need to reinvent auth, migrations, or admin panels when Django gives you all
2121

2222
**Processor app** - This is where the magic starts. Upload a resume, it queues a job in Redis, extracts text with pdfplumber or OCR, sends it to an LLM (OpenAI's GPT-4o-mini) to structure it properly, then generates a review of what could be improved. The structured data is what makes everything else possible.
2323

24-
**Storage app** - Once we have structured data, it goes two places. Neo4j stores the actual resume data as a graph (people connected to companies, skills, technologies), while Qdrant stores embedding vectors for semantic search. The graph gives us relationship queries ("who worked at Google?"), vectors give us semantic similarity ("find someone like this person").
24+
**Storage app** - Once we have structured data, it goes two places. Neo4j stores the actual resume data as a graph (people connected to companies and skills), while Qdrant stores embedding vectors for semantic search. The graph gives us relationship queries ("who worked at Google?"), vectors give us semantic similarity ("find someone like this person").
2525

2626
**Search app** - This is where it all comes together. You can do semantic search (vector similarity), structured search (graph queries), or hybrid search (both combined). The results show not just who matched, but exactly which parts of their resume matched and why.
2727

backend/core/domain/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
Resume,
3030
ScientificContribution,
3131
Skill,
32-
Technology,
3332
WorkAuthorization,
3433
WorkMode,
3534
)
@@ -60,7 +59,6 @@
6059
"EmploymentDuration",
6160
"CompanyInfo",
6261
"KeyPoint",
63-
"Technology",
6462
"Skill",
6563
"Project",
6664
"InstitutionInfo",

backend/core/domain/resume.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ class EmbeddingVector(BaseModel):
1212
email: str | None = None
1313
# Searchable metadata fields
1414
skills: list[str] = Field(default_factory=list)
15-
technologies: list[str] = Field(default_factory=list)
1615
companies: list[str] = Field(default_factory=list)
1716
role: str | None = None
1817
location: str | None = None
@@ -129,10 +128,6 @@ class KeyPoint(BaseModel):
129128
text: str
130129

131130

132-
class Technology(BaseModel):
133-
name: str
134-
135-
136131
class Skill(BaseModel):
137132
name: str
138133

@@ -145,15 +140,17 @@ class EmploymentHistoryItem(BaseModel):
145140
duration: EmploymentDuration
146141
location: Location | None = None
147142
key_points: list[KeyPoint] = Field(default_factory=list)
148-
technologies: list[Technology] = Field(default_factory=list)
143+
skills: list[Skill] = Field(default_factory=list)
149144

150145
@model_validator(mode="before")
151146
@classmethod
152147
def accept_legacy_employment(cls, v: dict):
153148
if "company" in v and isinstance(v["company"], str):
154149
v["company"] = {"name": v["company"]}
155-
if "tech_stack" in v and "technologies" not in v:
156-
v["technologies"] = v.pop("tech_stack")
150+
if "tech_stack" in v and "skills" not in v:
151+
v["skills"] = v.pop("tech_stack")
152+
if "technologies" in v and "skills" not in v:
153+
v["skills"] = v.pop("technologies")
157154
if "start_date" in v or "end_date" in v or "date_format" in v or "duration_months" in v:
158155
v["duration"] = {
159156
"date_format": v.pop("date_format", "MM.YYYY"),
@@ -163,24 +160,26 @@ def accept_legacy_employment(cls, v: dict):
163160
}
164161
if "key_points" in v:
165162
v["key_points"] = [kp if isinstance(kp, dict) else {"text": kp} for kp in v["key_points"]]
166-
if "technologies" in v:
167-
v["technologies"] = [t if isinstance(t, dict) else {"name": t} for t in v["technologies"]]
163+
if "skills" in v:
164+
v["skills"] = [s if isinstance(s, dict) else {"name": s} for s in v["skills"]]
168165
return v
169166

170167

171168
class Project(BaseModel):
172169
title: str
173170
url: str | None = None
174-
technologies: list[Technology] = Field(default_factory=list)
171+
skills: list[Skill] = Field(default_factory=list)
175172
key_points: list[KeyPoint] = Field(default_factory=list)
176173

177174
@model_validator(mode="before")
178175
@classmethod
179176
def accept_legacy_project(cls, v: dict):
180-
if "tech_stack" in v and "technologies" not in v:
181-
v["technologies"] = v.pop("tech_stack")
182-
if "technologies" in v:
183-
v["technologies"] = [t if isinstance(t, dict) else {"name": t} for t in v["technologies"]]
177+
if "tech_stack" in v and "skills" not in v:
178+
v["skills"] = v.pop("tech_stack")
179+
if "technologies" in v and "skills" not in v:
180+
v["skills"] = v.pop("technologies")
181+
if "skills" in v:
182+
v["skills"] = [s if isinstance(s, dict) else {"name": s} for s in v["skills"]]
184183
if "key_points" in v:
185184
v["key_points"] = [kp if isinstance(kp, dict) else {"text": kp} for kp in v["key_points"]]
186185
return v
@@ -322,10 +321,14 @@ def years_of_experience(self) -> float:
322321
def has_skill(self, skill: str) -> bool:
323322
return any(skill.lower() == s.name.lower() for s in self.skills)
324323

325-
def get_technologies(self) -> set[str]:
326-
techs: set[str] = set()
324+
def get_all_skills(self) -> set[str]:
325+
all_skills: set[str] = set()
326+
# Skills from main skills list
327+
all_skills.update(s.name for s in self.skills)
328+
# Skills from employment history
327329
for emp in self.employment_history:
328-
techs.update(t.name for t in emp.technologies)
330+
all_skills.update(s.name for s in emp.skills)
331+
# Skills from projects
329332
for proj in self.projects:
330-
techs.update(t.name for t in proj.technologies)
331-
return techs
333+
all_skills.update(s.name for s in proj.skills)
334+
return all_skills

backend/core/domain/search.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class SearchType(StrEnum):
1111
@dataclass
1212
class SearchFilters:
1313
skills: list[str] | None = None
14-
technologies: list[str] | None = None
1514
role: str | None = None
1615
company: str | None = None
1716
location: str | None = None
@@ -27,7 +26,6 @@ class FilterOption:
2726
@dataclass
2827
class FilterOptionsResult:
2928
skills: list[FilterOption] = field(default_factory=list)
30-
technologies: list[FilterOption] = field(default_factory=list)
3129
roles: list[FilterOption] = field(default_factory=list)
3230
companies: list[FilterOption] = field(default_factory=list)
3331
locations: list[FilterOption] = field(default_factory=list)

backend/core/model_registry.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
Resume,
2828
ScientificContribution,
2929
Skill,
30-
Technology,
3130
WorkAuthorization,
3231
)
3332
from storage.neo4j_models import (
@@ -55,7 +54,6 @@
5554
ResumeNode,
5655
ScientificContributionNode,
5756
SkillNode,
58-
TechnologyNode,
5957
WorkAuthorizationNode,
6058
)
6159

@@ -84,7 +82,6 @@ def initialize(cls) -> None:
8482
(EmploymentDuration, EmploymentDurationNode),
8583
(CompanyInfo, CompanyInfoNode),
8684
(KeyPoint, KeyPointInfoNode),
87-
(Technology, TechnologyNode),
8885
(Skill, SkillNode),
8986
(Project, ProjectNode),
9087
(InstitutionInfo, InstitutionInfoNode),

backend/core/tests.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
SearchRequest,
2424
SearchType,
2525
Skill,
26-
Technology,
2726
VectorHit,
2827
WorkMode,
2928
)
@@ -64,9 +63,9 @@ def setUp(self) -> None:
6463
KeyPoint(text="Led development of resume matching pipeline"),
6564
KeyPoint(text="Shipped embeddings search service at scale"),
6665
]
67-
technologies = [
68-
Technology(name="Django"),
69-
Technology(name="PostgreSQL"),
66+
skills_used = [
67+
Skill(name="Django"),
68+
Skill(name="PostgreSQL"),
7069
]
7170
history_payload = {
7271
"position": "Senior Backend Engineer",
@@ -76,7 +75,7 @@ def setUp(self) -> None:
7675
"duration": duration.model_dump(mode="json"),
7776
"location": location.model_dump(mode="json"),
7877
"key_points": [kp.model_dump(mode="json") for kp in key_points],
79-
"technologies": [tech.model_dump(mode="json") for tech in technologies],
78+
"skills": [skill.model_dump(mode="json") for skill in skills_used],
8079
}
8180
history_item = EmploymentHistoryItem.model_validate(history_payload)
8281

@@ -99,7 +98,7 @@ def test_resume_serialization_includes_nested_fields(self) -> None:
9998
history_entry = payload["employment_history"][0]
10099
self.assertEqual(history_entry["company"]["name"], "AI Labs")
101100
self.assertEqual(history_entry["duration"]["duration_months"], 42)
102-
self.assertEqual(history_entry["technologies"][0]["name"], "Django")
101+
self.assertEqual(history_entry["skills"][0]["name"], "Django")
103102

104103
profile = payload["professional_profile"]
105104
self.assertEqual(profile["preferences"]["role"], "Backend Engineer")
@@ -109,7 +108,7 @@ def test_resume_helper_methods(self) -> None:
109108
self.assertAlmostEqual(self.resume.years_of_experience(), 3.5)
110109
self.assertTrue(self.resume.has_skill("python"))
111110
self.assertFalse(self.resume.has_skill("Go"))
112-
self.assertEqual(self.resume.get_technologies(), {"Django", "PostgreSQL"})
111+
self.assertEqual(self.resume.get_all_skills(), {"Python", "Django", "PostgreSQL"})
113112

114113

115114
class SearchModelBehaviourTests(TestCase):

backend/processor/services/content_structure_service.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ def _prepare_prompt(self, text: str, links: list[dict]) -> str:
5454
5555
Validation Guardrails:
5656
- Company names must match exactly (case-sensitive)
57-
- Skills only from explicit skills sections
57+
- Skills include ALL technical and soft capabilities (Python, React, Docker, Leadership, etc.)
5858
- Links must exist in original document
59-
- Tech stack only from explicit "Stack:" or equivalent section
59+
- Extract skills from ANY mention: skills sections, tech stacks, tool lists, technologies used
6060
6161
SECTION-SPECIFIC RULES:
6262
@@ -71,7 +71,7 @@ def _prepare_prompt(self, text: str, links: list[dict]) -> str:
7171
- DO NOT include education (degrees, university attendance) as employment history.
7272
- Education (Bachelor's, Master's, PhD, etc.) must ONLY go in the education section, never in employment_history.
7373
- Responsibilities: Use exact bullet points verbatim.
74-
- Tech stack: Extract only from explicit "Stack:" or equivalent section.
74+
- Skills: Extract ALL technologies, tools, frameworks, languages mentioned (Stack:, Technologies:, Tools:, etc.)
7575
- If a starting month is not explicitly mentioned (e.g., "2022 - Present"), ASSUME "01.2022 - Present".
7676
7777
EDUCATION:
@@ -85,6 +85,7 @@ def _prepare_prompt(self, text: str, links: list[dict]) -> str:
8585
PROJECTS:
8686
- Only include personal projects that are explicitly stated as pet projects or were completed outside of employment.
8787
- DO NOT duplicate any project details already present in the employment_history section.
88+
- Skills: Extract ALL technologies, tools, frameworks mentioned in each project
8889
- If no qualifying projects are mentioned, set "projects" to null.
8990
9091
LANGUAGE PROFICIENCY:

backend/processor/services/processing_service.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -203,15 +203,15 @@ def _generate_embeddings_from_resume(self, resume: Resume) -> list[EmbeddingVect
203203
embeddings = self.embedding_service.encode_batch(texts)
204204

205205
# Extract metadata for search filtering (all lists are guaranteed to exist)
206-
all_techs = [tech.name for emp in resume.employment_history for tech in emp.technologies]
207-
all_techs.extend(tech.name for proj in resume.projects for tech in proj.technologies)
206+
all_skills = [s.name for s in resume.skills]
207+
all_skills.extend(s.name for emp in resume.employment_history for s in emp.skills)
208+
all_skills.extend(s.name for proj in resume.projects for s in proj.skills)
208209

209210
# Only include fields that EmbeddingVector expects
210211
vector_metadata = {
211212
"name": resume.personal_info.name,
212213
"email": resume.personal_info.contact.email,
213-
"skills": [s.name for s in resume.skills],
214-
"technologies": list(set(all_techs)), # dedupe
214+
"skills": list(set(all_skills)), # dedupe all skills from everywhere
215215
"companies": list({emp.company.name for emp in resume.employment_history if emp.company}),
216216
"role": resume.professional_profile.preferences.role
217217
if resume.professional_profile and resume.professional_profile.preferences

backend/search/serializers.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ class SearchFiltersSerializer(serializers.Serializer):
6262
"""Serializer for search filter parameters."""
6363

6464
skills = serializers.ListField(
65-
child=serializers.CharField(), required=False, allow_null=True, help_text="Skills to filter by"
66-
)
67-
technologies = serializers.ListField(
68-
child=serializers.CharField(), required=False, allow_null=True, help_text="Technologies to filter by"
65+
child=serializers.CharField(),
66+
required=False,
67+
allow_null=True,
68+
help_text="Skills to filter by (includes technologies)",
6969
)
7070
role = serializers.CharField(required=False, allow_null=True, help_text="Desired role to filter by")
7171
company = serializers.CharField(required=False, allow_null=True, help_text="Company to filter by")
@@ -173,8 +173,7 @@ class FilterOptionSerializer(serializers.Serializer):
173173

174174

175175
class FilterOptionsSerializer(serializers.Serializer):
176-
skills = FilterOptionSerializer(many=True, default=list, help_text="Available skills")
177-
technologies = FilterOptionSerializer(many=True, default=list, help_text="Available technologies")
176+
skills = FilterOptionSerializer(many=True, default=list, help_text="Available skills (includes technologies)")
178177
roles = FilterOptionSerializer(many=True, default=list, help_text="Available roles")
179178
companies = FilterOptionSerializer(many=True, default=list, help_text="Available companies")
180179
locations = FilterOptionSerializer(many=True, default=list, help_text="Available locations")

0 commit comments

Comments
 (0)