-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathai_helper.py
More file actions
1201 lines (984 loc) · 46 KB
/
ai_helper.py
File metadata and controls
1201 lines (984 loc) · 46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import os
import google.generativeai as genai
from typing import Dict, List, Any, Optional
import json
import re
import tempfile
import fitz # PyMuPDF
from PIL import Image
import pytesseract
from pdf2image import convert_from_path, convert_from_bytes
from datetime import datetime
import streamlit as st
# Configure the Gemini API with your key - prioritize session state API key if available
# GOOGLE_API_KEY = st.session_state.get('api_key') or os.environ.get("GOOGLE_API_KEY")
# global model
# if GOOGLE_API_KEY:
# # print("Warning: GOOGLE_API_KEY not set. AI features will not work.")
def configure_gemini_api(GOOGLE_API_KEY: Optional[str] = None):
"""
Configure the Gemini API with the provided API key and set up the model for use.
"""
try:
# Configure the Gemini API
genai.configure(api_key=GOOGLE_API_KEY)
# Use the specific model provided by the user
model_name = 'gemini-2.5-pro-exp-03-25'
model = None
try:
model = genai.GenerativeModel(model_name)
# Test with a simple prompt to verify it works
test_response = model.generate_content("Hello")
print(f"Successfully connected to Gemini AI using model: {model_name} \n {test_response.text}")
st.session_state.model = model
return True
except Exception as e:
print(f"Error connecting to model {model_name}: {e}")
# Capture the original error message before trying fallbacks
original_error = str(e)
if "API key" in original_error or "authentication" in original_error.lower() or "invalid" in original_error.lower():
# This appears to be an API key issue - raise error immediately
raise Exception(f"Invalid API key: {original_error}")
# Try fallback models if not an API key issue
fallback_models = [
"gemini-2.5-pro-exp-03-25",
"gemini-2.5-pro-preview-03-25",
"gemini-2.0-flash-exp",
"gemini-2.0-flash",
"gemini-2.0-flash-001",
"gemini-2.0-flash-exp-image-generation",
"gemini-2.0-flash-lite-001",
"gemini-2.0-flash-lite",
"gemini-2.0-flash-lite-preview-02-05",
"gemini-2.0-flash-lite-preview",
"gemini-2.0-pro-exp",
"gemini-2.0-pro-exp-02-05",
"gemini-2.0-flash-thinking-exp-01-21",
"gemini-2.0-flash-thinking-exp",
"gemini-2.0-flash-thinking-exp-1219",
"gemini-1.5-pro-latest",
"gemini-1.5-pro-001",
"gemini-1.5-pro-002",
"gemini-1.5-pro",
"gemini-1.5-flash-latest",
"gemini-1.5-flash-001",
"gemini-1.5-flash-001-tuning",
"gemini-1.5-flash",
"gemini-1.5-flash-002",
"gemini-1.5-flash-8b",
"gemini-1.5-flash-8b-001",
"gemini-1.5-flash-8b-latest",
"gemini-1.5-flash-8b-exp-0827",
"gemini-1.5-flash-8b-exp-0924"
]
for fallback_model in fallback_models:
try:
model = genai.GenerativeModel(fallback_model)
# Test with a simple prompt to verify it works
test_response = model.generate_content("Hello")
print(f"Successfully connected to Gemini AI using fallback model: {fallback_model}")
if model:
st.session_state.model = model
return True
except Exception as e2:
print(f"Error connecting to fallback model {fallback_model}: {e2}")
continue
if model is None:
print("Error: Could not connect to any Gemini model. AI features will not work.")
# Provide a dummy model that will be properly handled by the exception blocks
class DummyModel:
def generate_content(self, prompt):
raise Exception("No Gemini model available")
model = DummyModel()
except Exception as e:
print(f"Error configuring Gemini AI: {e}")
# Re-raise the exception to be handled by the caller
raise e
def extract_text_from_pdf(pdf_path_or_data, use_ocr=True):
"""
Extract text from PDF using both native text extraction and OCR when necessary
Args:
pdf_path_or_data: Path to PDF file or PDF data as bytes
use_ocr: Whether to use OCR for text extraction (default: True)
Returns:
Extracted text from the PDF
"""
try:
# Check if input is a file path or bytes
is_bytes = isinstance(pdf_path_or_data, bytes)
# Open the PDF using PyMuPDF (fitz)
if is_bytes:
pdf_document = fitz.open(stream=pdf_path_or_data, filetype="pdf")
else:
pdf_document = fitz.open(pdf_path_or_data)
# First attempt: Extract text directly from PDF
text = ""
text_extraction_success = False
for page_num in range(len(pdf_document)):
page = pdf_document[page_num]
page_text = page.get_text()
if page_text.strip():
text += page_text + "\n\n"
text_extraction_success = True
# If native text extraction succeeded and OCR is not required
if text_extraction_success and not use_ocr:
pdf_document.close()
return text.strip()
# Second attempt: Apply OCR if native extraction failed or OCR is explicitly requested
if use_ocr:
print("Using OCR to extract text from PDF...")
ocr_text = ""
# Convert PDF to images
if is_bytes:
# Create a temporary file to save the PDF data
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_pdf:
temp_pdf.write(pdf_path_or_data)
temp_path = temp_pdf.name
images = convert_from_path(temp_path)
os.unlink(temp_path) # Remove the temporary file
else:
images = convert_from_path(pdf_path_or_data)
# Apply OCR to each page with enhanced image quality
for i, img in enumerate(images):
# Enhance image quality for better OCR
from PIL import ImageEnhance
gray_img = img.convert('L') # Convert to grayscale
enhancer = ImageEnhance.Contrast(gray_img)
enhanced_img = enhancer.enhance(1.5) # Increase contrast by 50%
# Use pytesseract with optimal settings for resume text
page_text = pytesseract.image_to_string(
enhanced_img,
lang='eng', # English language
config='--psm 6' # Assume a single uniform block of text
)
ocr_text += page_text + "\n\n"
# Combine native text and OCR text if both are available,
# or use OCR text if native extraction failed
if text_extraction_success:
# Combine both texts, prioritizing the one with more content
if len(ocr_text) > len(text) * 1.2: # OCR has 20% more content
final_text = ocr_text
else:
final_text = text
else:
final_text = ocr_text
pdf_document.close()
return final_text.strip()
pdf_document.close()
return text.strip()
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return ""
def extract_resume_info_fallback(text: str) -> Dict[str, Any]:
"""
Fallback function to extract resume information using rule-based parsing
when AI-based extraction fails
Args:
text: The full text of the resume
Returns:
Dictionary with basic extracted information
"""
print("Using fallback function for resume information extraction")
# Initialize the structure for extracted information
resume_info = {
"contact_info": {
"name": None,
"email": None,
"phone": None,
"location": None
},
"education": [],
"experience": [],
"skills": {"technical": [], "soft": []},
"certifications": []
}
# Extract email using regex
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
emails = re.findall(email_pattern, text)
if emails:
resume_info["contact_info"]["email"] = emails[0]
# Extract phone using regex
phone_pattern = r'(?:\+\d{1,3}[-\.\s]?)?\(?\d{3}\)?[-\.\s]?\d{3}[-\.\s]?\d{4}'
phones = re.findall(phone_pattern, text)
if phones:
resume_info["contact_info"]["phone"] = phones[0]
# Extract skills (simplified approach)
common_tech_skills = ["python", "java", "javascript", "html", "css", "sql",
"aws", "docker", "kubernetes", "react", "angular", "node.js"]
common_soft_skills = ["leadership", "communication", "teamwork", "problem solving",
"time management", "critical thinking"]
for skill in common_tech_skills:
if re.search(r'\b' + skill + r'\b', text.lower()):
resume_info["skills"]["technical"].append(skill)
for skill in common_soft_skills:
if re.search(r'\b' + skill + r'\b', text.lower()):
resume_info["skills"]["soft"].append(skill)
# Return the extracted information
return resume_info
def extract_resume_info(text_or_pdf_path: str, is_pdf=False) -> Dict[str, Any]:
"""
Extract structured information from resume text or PDF using Gemini AI
Args:
text_or_pdf_path: Either the full text of the resume or path to PDF file
is_pdf: Whether the input is a PDF file path (default: False)
Returns:
Dictionary with extracted information
"""
# If input is a PDF file, extract text with enhanced PDF extraction
if is_pdf:
text = extract_text_from_pdf(text_or_pdf_path, use_ocr=True)
if not text:
print("Failed to extract text from PDF")
return {}
else:
text = text_or_pdf_path
prompt = f"""
You are an expert AI assistant specialized in parsing and extracting information from resumes.
Based on the resume text provided, extract the following information in JSON format:
1. Contact information (name, email, phone, location)
2. Education history (degree, institution, graduation date, GPA if available)
3. Work experience (company, job title, dates, descriptions, achievements)
4. Skills (both technical and soft skills)
5. Certifications and licenses
Return the output as a valid JSON object with the following structure:
{{
"contact_info": {{
"name": "Full Name",
"email": "email@example.com",
"phone": "123-456-7890",
"location": "City, State"
}},
"education": [
{{
"degree": "Degree Name",
"institution": "Institution Name",
"graduation_date": "YYYY-MM",
"gpa": "GPA if available"
}}
],
"experience": [
{{
"company": "Company Name",
"title": "Job Title",
"start_date": "YYYY-MM",
"end_date": "YYYY-MM or 'Present'",
"description": "Job description",
"achievements": ["Achievement 1", "Achievement 2"]
}}
],
"skills": {{"technical": ["skill1", "skill2"], "soft": ["skill1", "skill2"]}},
"certifications": ["Certification 1", "Certification 2"]
}}
Only respond with the JSON, nothing else. If you cannot find certain information, use null or empty arrays/objects as appropriate.
Here is the resume text:
{text}
"""
try:
model = st.session_state.get('model', None)
response = model.generate_content(prompt)
# Extract the JSON from the response
response_text = response.text
# Handle case where response might have markdown code block
if "```json" in response_text:
json_str = response_text.split("```json")[1].split("```")[0].strip()
elif "```" in response_text:
json_str = response_text.split("```")[1].strip()
else:
json_str = response_text.strip()
# Parse the JSON string into a dictionary
parsed_info = json.loads(json_str)
return parsed_info
except Exception as e:
print(f"Error extracting resume information with Gemini: {e}")
# Use a rules-based approach as fallback
return extract_resume_info_fallback(text)
def analyze_job_description_fallback(text: str) -> Dict[str, Any]:
"""
Fallback function to analyze job description using rule-based parsing
when AI-based analysis fails
Args:
text: The full text of the job description
Returns:
Dictionary with basic extracted job requirements
"""
print("Using fallback function for job description analysis")
# Initialize the structure for extracted information
job_info = {
"skills": {
"technical": [],
"soft": []
},
"experience": [],
"education": [],
"responsibilities": [],
"preferred_qualifications": []
}
# Extract skills (simplified approach)
common_tech_skills = ["python", "java", "javascript", "html", "css", "sql",
"aws", "docker", "kubernetes", "react", "angular", "node.js"]
common_soft_skills = ["leadership", "communication", "teamwork", "problem solving",
"time management", "critical thinking"]
for skill in common_tech_skills:
if re.search(r'\b' + skill + r'\b', text.lower()):
job_info["skills"]["technical"].append(skill)
for skill in common_soft_skills:
if re.search(r'\b' + skill + r'\b', text.lower()):
job_info["skills"]["soft"].append(skill)
# Look for common education requirements
education_patterns = [
r'\b(?:bachelor|master|phd|doctorate|bs|ms|ba|ma|mba)\b.*?\b(?:degree|education)\b',
r'\bdegree\s+in\s+[^.]*'
]
for pattern in education_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
for match in matches:
job_info["education"].append({"degree": match.strip(), "field": None})
# Look for experience requirements
experience_pattern = r'\b(\d+)[+]?\s+years?\s+(?:of\s+)?(?:experience|exp)\b'
experience_matches = re.findall(experience_pattern, text, re.IGNORECASE)
if experience_matches:
job_info["experience"].append({
"years": experience_matches[0],
"domain": "General"
})
return job_info
def analyze_job_description(text: str) -> Dict[str, Any]:
"""
Analyze job description using Gemini AI
Args:
text: The full text of the job description
Returns:
Dictionary with extracted job requirements
"""
prompt = f"""
You are an expert AI assistant specialized in analyzing job descriptions.
Based on the job description provided, extract the following information in JSON format:
1. Required skills (both technical and soft skills)
2. Required experience (years, specific domain experience)
3. Required education (degrees, certifications)
4. Job responsibilities
5. Preferred qualifications (nice-to-have but not required)
Return the output as a valid JSON object with the following structure:
{{
"skills": {{
"technical": ["skill1", "skill2"],
"soft": ["skill1", "skill2"]
}},
"experience": [
{{
"years": "X",
"domain": "Domain Name"
}}
],
"education": [
{{
"degree": "Degree Name",
"field": "Field of Study"
}}
],
"responsibilities": ["Responsibility 1", "Responsibility 2"],
"preferred_qualifications": ["Qualification 1", "Qualification 2"]
}}
Only respond with the JSON, nothing else.
Here is the job description:
{text}
"""
try:
model = st.session_state.get('model', None)
response = model.generate_content(prompt)
# Extract the JSON from the response
response_text = response.text
# Handle case where response might have markdown code block
if "```json" in response_text:
json_str = response_text.split("```json")[1].split("```")[0].strip()
elif "```" in response_text:
json_str = response_text.split("```")[1].strip()
else:
json_str = response_text.strip()
# Parse the JSON string into a dictionary
job_info = json.loads(json_str)
return job_info
except Exception as e:
print(f"Error analyzing job description with Gemini: {e}")
# Use a rules-based approach as fallback
return analyze_job_description_fallback(text)
def calculate_experience_match(resume_experience, job_experience):
"""Helper function to calculate experience match score"""
# Simple implementation
if not job_experience:
return 0.8 # Default score if no experience requirements
# Check if candidate has any experience
if not resume_experience:
return 0.2 # Low score if no experience
# Basic matching - calculate based on years of experience
job_years_required = 0
for exp in job_experience:
if isinstance(exp, dict) and 'years' in exp:
try:
job_years_required = int(str(exp['years']).split('-')[0])
break
except (ValueError, TypeError):
job_years_required = 1 # Default if parsing fails
# Estimate candidate's total years of experience
candidate_years = 0
for exp in resume_experience:
if isinstance(exp, dict) and 'start_date' in exp:
try:
# Calculate duration between start and end dates
start_year = int(exp['start_date'].split('-')[0])
if 'end_date' in exp and exp['end_date'] and exp['end_date'].lower() != 'present':
end_year = int(exp['end_date'].split('-')[0])
else:
end_year = datetime.now().year
candidate_years += (end_year - start_year)
except (ValueError, IndexError):
candidate_years += 1 # Default if parsing fails
# Calculate match ratio
if job_years_required > 0:
experience_ratio = min(1.0, candidate_years / job_years_required)
else:
experience_ratio = 0.7 # Default
return experience_ratio
def calculate_education_match(resume_education, job_education):
"""Helper function to calculate education match score"""
# Simple implementation
if not job_education:
return 0.9 # High default score if no education requirements
if not resume_education:
return 0.3 # Low score if no education
# Very basic matching
# Assume match if candidate has any degree
return 0.7 # Default education match
def generate_recommendations(skills_match, experience_match, education_match,
matching_skills, missing_skills, resume_info, job_info):
"""Generate recommendations based on match analysis"""
recommendations = []
# Skills recommendations
if skills_match < 0.7:
recommendations.append(f"Consider adding skills in: {', '.join(missing_skills[:3])}")
# Experience recommendations
if experience_match < 0.6:
recommendations.append("Highlight more relevant work experience for this role")
# Education recommendations
if education_match < 0.5:
recommendations.append("Consider additional education or certifications relevant to this role")
# Default recommendation if none generated
if not recommendations:
recommendations.append("Your profile appears to be a good match for this role")
return recommendations
def generate_feedback_summary(skills_match, experience_match, education_match, overall_match):
"""Generate overall feedback summary"""
if overall_match >= 80:
return "Strong match! Your profile aligns well with this job's requirements."
elif overall_match >= 60:
return "Good match. You meet many of the job requirements but could improve in some areas."
elif overall_match >= 40:
return "Moderate match. You have some relevant qualifications but may need additional skills or experience."
else:
return "Limited match. Consider developing more skills or experience for this type of role."
def calculate_match_score_fallback(resume_info: Dict[str, Any], job_info: Dict[str, Any]) -> Dict[str, Any]:
"""
Calculate the match score between a resume and job description using rule-based approach
Args:
resume_info: Structured resume information
job_info: Structured job requirements
Returns:
Dictionary with match scores and feedback
"""
print("Using fallback function for match scoring")
# Initialize scores
skills_match = 0
# Skills matching
# Extract lists of skills
resume_tech_skills = set([skill.lower() for skill in resume_info.get("skills", {}).get("technical", [])])
resume_soft_skills = set([skill.lower() for skill in resume_info.get("skills", {}).get("soft", [])])
job_tech_skills = set([skill.lower() for skill in job_info.get("skills", {}).get("technical", [])])
job_soft_skills = set([skill.lower() for skill in job_info.get("skills", {}).get("soft", [])])
# Use the enhanced matching function with synonym support
from relevance_scorer import extract_matching_items
skill_match_result = extract_matching_items(
list(resume_tech_skills) + list(resume_soft_skills),
list(job_tech_skills) + list(job_soft_skills),
matcher_type="skills"
)
matching_skills = skill_match_result["matching_items"]
missing_skills = skill_match_result["missing_items"]
# Calculate skill match percentage
total_job_skills = len(job_tech_skills) + len(job_soft_skills)
if total_job_skills > 0:
skills_match = min(1.0, len(matching_skills) / total_job_skills)
else:
skills_match = 0.7 # Default if no skills specified
# Experience match (improved)
experience_match, matching_experience, missing_experience = calculate_enhanced_experience_match(
resume_info.get("experience", []),
job_info.get("experience", [])
)
# Education match (improved)
education_match, matching_education, missing_education = calculate_enhanced_education_match(
resume_info.get("education", []),
job_info.get("education", [])
)
# Calculate overall match with weighted components
overall_match = (skills_match * 0.5) + (experience_match * 0.3) + (education_match * 0.2)
overall_match = round(overall_match * 100) # Convert to percentage
# Generate recommendations
recommendations = generate_recommendations(
skills_match, experience_match, education_match,
matching_skills, missing_skills,
resume_info, job_info
)
# Generate feedback summary
feedback_summary = generate_feedback_summary(
skills_match, experience_match, education_match, overall_match
)
# Return structured match analysis
return {
"scores": {
"skills_match": round(skills_match * 100),
"experience_match": round(experience_match * 100),
"education_match": round(education_match * 100),
"overall_match": overall_match
},
"matching_skills": matching_skills,
"missing_skills": missing_skills,
"matching_experience": matching_experience,
"missing_experience": missing_experience,
"matching_education": matching_education,
"missing_education": missing_education,
"recommendations": recommendations,
"feedback_summary": feedback_summary
}
def calculate_match_score(resume_info: Dict[str, Any], job_info: Dict[str, Any]) -> Dict[str, Any]:
"""
Calculate the match score between a resume and job description using Gemini AI
Args:
resume_info: Structured resume information
job_info: Structured job requirements
Returns:
Dictionary with match scores and feedback
"""
# Convert input data to JSON strings for the prompt
resume_json = json.dumps(resume_info, indent=2)
job_json = json.dumps(job_info, indent=2)
prompt = f"""
You are an expert AI assistant specialized in evaluating how well a candidate's resume matches a job description.
Analyze the provided resume information and job description information (both in JSON format) and calculate:
1. Skills match score (0-100): How well the candidate's technical and soft skills match the job requirements
2. Experience match score (0-100): How well the candidate's experience matches the job requirements
3. Education match score (0-100): How well the candidate's education matches the job requirements
4. Overall match score (0-100): A weighted calculation with skills (50%), experience (30%), and education (20%)
Also provide:
- A list of matching skills found in both the resume and job description
- A list of missing skills that are in the job description but not in the resume
- 2-3 specific recommendations for the candidate to improve their fit for this role
- A brief summary of the candidate's fit for this role (2-3 sentences)
Return the output as a valid JSON object with the following structure:
{{
"scores": {{
"skills_match": 85,
"experience_match": 70,
"education_match": 90,
"overall_match": 82
}},
"matching_skills": ["skill1", "skill2"],
"missing_skills": ["skill3", "skill4"],
"recommendations": ["Recommendation 1", "Recommendation 2"],
"feedback_summary": "A brief summary of the candidate's fit for this role"
}}
Only respond with the JSON, nothing else.
Resume Information:
{resume_json}
Job Description Information:
{job_json}
"""
try:
model = st.session_state.get('model', None)
response = model.generate_content(prompt)
# Extract the JSON from the response
response_text = response.text
# Handle case where response might have markdown code block
if "```json" in response_text:
json_str = response_text.split("```json")[1].split("```")[0].strip()
elif "```" in response_text:
json_str = response_text.split("```")[1].strip()
else:
json_str = response_text.strip()
# Parse the JSON string into a dictionary
match_analysis = json.loads(json_str)
return match_analysis
except Exception as e:
print(f"Error calculating match score with Gemini: {e}")
# Use a rules-based approach as fallback
return calculate_match_score_fallback(resume_info, job_info)
def calculate_enhanced_experience_match(resume_experience, job_experience):
"""
Calculate how well the candidate's experience matches the job requirements and
returns both the score and the matching/missing items
Args:
resume_experience: List of candidate's experience items
job_experience: List of job experience requirements
Returns:
Tuple of (match_score, matching_experience, missing_experience)
"""
if not job_experience:
return 0.7, [], [] # Default score if no experience requirements
# Check if candidate has any experience
if not resume_experience:
return 0.2, [], [f"{exp.get('domain', 'experience')} ({exp.get('years', '')} years)"
for exp in job_experience if isinstance(exp, dict)]
# Extract required years and domains from job requirements
required_years = 0
required_domains = []
for exp in job_experience:
if isinstance(exp, dict):
# Extract years requirement
years_text = exp.get("years", "")
if years_text:
# Extract numeric values from strings like "3+ years" or "2-4 years"
years_match = re.search(r'(\d+)', str(years_text))
if years_match:
years = int(years_match.group(1))
required_years = max(required_years, years)
# Extract domain requirement
domain = exp.get("domain", "")
if domain:
required_domains.append(domain.lower())
# Calculate total years of experience from resume
total_years = 0
candidate_domains = []
for exp in resume_experience:
if isinstance(exp, dict):
# Try to calculate duration for this position
start_date = exp.get("start_date", "")
end_date = exp.get("end_date", "")
# Extract years from dates
years = extract_years_from_dates(start_date, end_date)
total_years += years
# Extract domains from experience
title = exp.get("title", "").lower() if exp.get("title") else ""
company = exp.get("company", "").lower() if exp.get("company") else ""
description = exp.get("description", "").lower() if exp.get("description") else ""
# Create a combined text to search for domains
combined_text = f"{title} {company} {description}"
# Use title as domain if it's a senior position
if title and any(senior_term in title.lower() for senior_term in
["senior", "lead", "manager", "director", "head", "principal"]):
# Extract the domain part (e.g., "Senior Software Engineer" -> "Software Engineer")
domain_parts = title.split()
if len(domain_parts) > 1:
candidate_domains.append(" ".join(domain_parts[1:]))
else:
candidate_domains.append(title)
# Add other relevant domains from the experience
for domain in ["software", "data", "marketing", "sales", "finance", "design",
"research", "management", "engineering", "development"]:
if domain in combined_text and domain not in candidate_domains:
candidate_domains.append(domain)
# Match domains using synonym matching
from relevance_scorer import extract_matching_items
domain_match_result = extract_matching_items(
candidate_domains,
required_domains,
matcher_type="domain"
)
matching_domains = domain_match_result["matching_items"]
missing_domains = domain_match_result["missing_items"]
# Format matching and missing experience items
matching_experience = []
missing_experience = []
# Create formatted matching experience items
if total_years > 0 and required_years > 0 and total_years >= required_years:
matching_experience.append(f"{total_years} years of experience (meets {required_years}+ requirement)")
# Add matching domains
for domain in matching_domains:
matching_experience.append(f"Experience in {domain}")
# Create formatted missing experience items
if total_years > 0 and required_years > 0 and total_years < required_years:
missing_experience.append(f"{required_years - total_years} more years of experience")
# Add missing domains
for domain in missing_domains:
missing_experience.append(f"Experience in {domain}")
# Calculate experience match score
years_match = min(1.0, total_years / required_years) if required_years > 0 else 0.7
domains_match = len(matching_domains) / len(required_domains) if required_domains else 0.7
# Combine years and domain matches (years is more important)
experience_match = (0.7 * years_match) + (0.3 * domains_match)
return experience_match, matching_experience, missing_experience
def calculate_enhanced_education_match(resume_education, job_education):
"""
Calculate how well the candidate's education matches the job requirements and
returns both the score and the matching/missing items
Args:
resume_education: List of candidate's education items
job_education: List of job education requirements
Returns:
Tuple of (match_score, matching_education, missing_education)
"""
if not job_education:
return 0.7, [], [] # Default score if no education requirements
if not resume_education:
return 0.3, [], [f"{edu.get('degree', '')} in {edu.get('field', '')}"
for edu in job_education if isinstance(edu, dict)]
# Define education levels for comparison
edu_levels = {
"high school": 1,
"associate": 2,
"bachelor": 3,
"bachelors": 3,
"undergraduate": 3,
"bs": 3,
"ba": 3,
"master": 4,
"masters": 4,
"ms": 4,
"ma": 4,
"mba": 4,
"phd": 5,
"doctorate": 5
}
# Extract required degree level and fields from job requirements
required_level = 0
required_fields = []
for edu in job_education:
if isinstance(edu, dict):
degree = edu.get("degree", "").lower() if edu.get("degree") else ""
field = edu.get("field", "").lower() if edu.get("field") else ""
# Determine the education level
for level_name, level_value in edu_levels.items():
if level_name in degree:
required_level = max(required_level, level_value)
break
# Add field requirement
if field:
required_fields.append(field)
elif degree:
# If no specific field but degree is specified
required_fields.append(f"{degree}")
elif isinstance(edu, str):
# Handle case where education is a string
edu_lower = edu.lower()
for level_name, level_value in edu_levels.items():
if level_name in edu_lower:
required_level = max(required_level, level_value)
# Extract potential field
if "in" in edu_lower:
field_part = edu_lower.split("in", 1)[1].strip()
if field_part:
required_fields.append(field_part)
else:
required_fields.append(level_name)
break
# Extract candidate's education information
highest_level = 0
candidate_fields = []
for edu in resume_education:
if isinstance(edu, dict):
degree = edu.get("degree", "").lower() if edu.get("degree") else ""
institution = edu.get("institution", "").lower() if edu.get("institution") else ""
# Determine education level
for level_name, level_value in edu_levels.items():
if level_name in degree:
highest_level = max(highest_level, level_value)
break
# Extract field from degree
field_parts = degree.split("in", 1)
if len(field_parts) > 1:
field = field_parts[1].strip()
if field:
candidate_fields.append(field)
else:
# Use whole degree as field
candidate_fields.append(degree)
# Add institution as potential field match
if institution:
candidate_fields.append(institution)
# Match fields using synonym matching
from relevance_scorer import extract_matching_items
field_match_result = extract_matching_items(
candidate_fields,
required_fields,
matcher_type="education"
)
matching_fields = field_match_result["matching_items"]
missing_fields = field_match_result["missing_items"]
# Format matching and missing education items
matching_education = []
missing_education = []
# Check if required education level is met
level_names = {v: k for k, v in edu_levels.items()}
if required_level > 0 and highest_level >= required_level:
matching_education.append(f"{level_names.get(highest_level, 'Education')} level (meets {level_names.get(required_level, '')} requirement)")
# Add matching fields
for field in matching_fields:
matching_education.append(f"Education in {field}")
# Add missing education level if not met
if required_level > 0 and highest_level < required_level:
missing_education.append(f"{level_names.get(required_level, 'Higher education')} level required")
# Add missing fields
for field in missing_fields:
missing_education.append(f"Education in {field}")
# Calculate match scores
level_match = min(1.0, highest_level / required_level) if required_level > 0 else 0.7
field_match = len(matching_fields) / len(required_fields) if required_fields else 0.7
# Combine level and field matches
education_match = (0.6 * level_match) + (0.4 * field_match)
return education_match, matching_education, missing_education