diff --git a/backend/app/job_scraper.py b/backend/app/job_scraper.py index df136ac..e94ba65 100644 --- a/backend/app/job_scraper.py +++ b/backend/app/job_scraper.py @@ -21,53 +21,135 @@ def get_headers(): } -def scrape_job_description(job_url: str) -> dict: +def extract_job_details(soup, url): + """ + Extract job title, company name, and platform from job posting page. """ - Scrape job description from various job posting websites. - Includes fallback for blocked requests. + platform = "Other" + if "indeed.com" in url: + platform = "Indeed" + elif "linkedin.com" in url: + platform = "LinkedIn" + + try: + # Job Title Extraction + title_selectors = { + "indeed.com": ["h1.jobsearch-JobInfoHeader-title", "div.jobsearch-JobInfoHeader-title-container h1", "h1.icl-u-xs-mb--xs"], + "linkedin.com": ["h1.top-card-layout__title", "h1.job-details-jobs-unified-top-card__job-title", "h1.topcard__title"], + "generic": ["h1", "h1.job-title", "div.job-title", "title"], + } - Args: - job_url: URL of the job posting + job_title = None + # Try platform-specific selectors first + if "indeed.com" in url: + for selector in title_selectors["indeed.com"]: + element = soup.select_one(selector) + if element and element.text.strip(): + job_title = element.text.strip() + break + elif "linkedin.com" in url: + for selector in title_selectors["linkedin.com"]: + element = soup.select_one(selector) + if element and element.text.strip(): + job_title = element.text.strip() + break + + # If no title found, try generic selectors + if not job_title: + for selector in title_selectors["generic"]: + element = soup.select_one(selector) + if element and element.text.strip(): + job_title = element.text.strip() + break + + # Company Name Extraction + company_selectors = { + "indeed.com": [ + 'div[data-company-name="true"]', + "div.jobsearch-CompanyInfoContainer span.jobsearch-CompanyInfoWithoutHeaderImage", + "div.jobsearch-InlineCompanyRating > div:first-child", + ], + "linkedin.com": [ + "a.company-name-link", + 'a[data-tracking-control-name="public_jobs_topcard-org-name"]', + "span.topcard__flavor", + "a.sub-nav-cta__optional-url", + ], + "generic": ['div[class*="company"]', 'span[class*="company"]', 'div[class*="employer"]', 'span[class*="employer"]'], + } + + company_name = None + # Try platform-specific selectors + if "indeed.com" in url: + for selector in company_selectors["indeed.com"]: + element = soup.select_one(selector) + if element and element.text.strip(): + company_name = element.text.strip() + break + elif "linkedin.com" in url: + for selector in company_selectors["linkedin.com"]: + element = soup.select_one(selector) + if element and element.text.strip(): + company_name = element.text.strip() + break + + # If no company found, try generic selectors + if not company_name: + for selector in company_selectors["generic"]: + element = soup.select_one(selector) + if element and element.text.strip(): + company_name = element.text.strip() + break + + return {"job_title": job_title or "Position Not Found", "company_name": company_name or "Company Not Found", "platform": platform} + + except Exception as e: + logger.error(f"Error extracting job details: {str(e)}") + return {"job_title": "Position Not Found", "company_name": "Company Not Found", "platform": platform} - Returns: - dict: Contains success status and either job description or error message + +def scrape_job_description(job_url: str) -> dict: + """ + Scrape job description and metadata from various job posting websites. """ try: - # Validate URL if not job_url or not urlparse(job_url).scheme: return {"success": False, "error": "Invalid URL provided"} - # Make request with enhanced headers response = requests.get(job_url, headers=get_headers(), timeout=10) - # Handle various response scenarios if response.status_code == 403: return { "success": True, - "description": "Unable to automatically fetch job description. Please enter the job description " - "manually or try a different job posting link.", + "description": "Unable to automatically fetch job description. Please enter the job description manually.", + "job_title": "Position Not Found", + "company_name": "Company Not Found", + "platform": "Unknown", "requires_manual_entry": True, } response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") + # Extract job details + job_details = extract_job_details(soup, job_url) + # Try different common job description selectors - selectors = [ + description_selectors = [ "div.job-description", "div[data-automation='jobDescription']", "#job-description", ".description__text", "div.description", - "div[class*='jobsearch-jobDescriptionText']", # Indeed - "div[class*='show-more-less-html']", # LinkedIn - "div[class*='job-description']", # Generic + "div[class*='jobsearch-jobDescriptionText']", + "div[class*='show-more-less-html']", + "div[class*='job-description']", ] - for selector in selectors: + for selector in description_selectors: job_description = soup.select_one(selector) if job_description: - return {"success": True, "description": job_description.get_text(strip=True)} + return {"success": True, "description": job_description.get_text(strip=True), **job_details} # If no description found with selectors, try finding by content description = soup.find( @@ -76,21 +158,32 @@ def scrape_job_description(job_url: str) -> dict: ) if description: - return {"success": True, "description": description.get_text(strip=True)} + return {"success": True, "description": description.get_text(strip=True), **job_details} return { "success": True, "description": "Unable to automatically extract job description. Please enter the job description manually.", + **job_details, "requires_manual_entry": True, } except requests.RequestException as e: logger.error(f"Request error for {job_url}: {str(e)}") - return {"success": True, "description": "Unable to fetch job description. Please enter it manually.", "requires_manual_entry": True} + return { + "success": True, + "description": "Unable to fetch job description. Please enter it manually.", + "job_title": "Position Not Found", + "company_name": "Company Not Found", + "platform": "Unknown", + "requires_manual_entry": True, + } except Exception as e: logger.error(f"Unexpected error scraping {job_url}: {str(e)}") return { "success": True, "description": "Error accessing job posting. Please enter the job description manually.", + "job_title": "Position Not Found", + "company_name": "Company Not Found", + "platform": "Unknown", "requires_manual_entry": True, } diff --git a/backend/app/resume_analyzer.py b/backend/app/resume_analyzer.py index 418ede0..a94cbfe 100644 --- a/backend/app/resume_analyzer.py +++ b/backend/app/resume_analyzer.py @@ -108,11 +108,14 @@ def generate_analysis(resume_content: str, job_links: list) -> Dict[str, Union[b 2. For matches above 75%, provide recommendations to excel in the role 3. Recommendations should be specific and actionable 4. Match percentage should be based on both technical skills and overall fit + 5. For each job, provide a clear job title and company name Return ONLY a JSON object with this exact structure: {{ "jobs": [ {{ + "job_title": "", + "company_name": "", "job_link": "", "match_percentage": , "matching_skills": [], @@ -151,7 +154,7 @@ def generate_analysis(resume_content: str, job_links: list) -> Dict[str, Union[b if not isinstance(analysis, dict) or "jobs" not in analysis: return {"success": False, "error": "Invalid response structure"} - # Ensure recommendations + # Ensure recommendations and required fields for job in analysis["jobs"]: if not job.get("recommendations"): job["recommendations"] = [ @@ -159,6 +162,11 @@ def generate_analysis(resume_content: str, job_links: list) -> Dict[str, Union[b "Quantify your impact with metrics", "Add specific examples of team leadership", ] + # Ensure job title and company name are present + if not job.get("job_title"): + job["job_title"] = "Position" + if not job.get("company_name"): + job["company_name"] = "Company" return {"success": True, "jobs": analysis["jobs"]} diff --git a/frontend/src/components/JobResults.jsx b/frontend/src/components/JobResults.jsx index ef4b4a4..c2a524e 100644 --- a/frontend/src/components/JobResults.jsx +++ b/frontend/src/components/JobResults.jsx @@ -1,4 +1,4 @@ -import { Text, Button, Stack, Badge, Modal, List, Group, Paper } from '@mantine/core'; +import { Text, Button, Stack, Badge, Modal, List, Group, Paper, Flex } from '@mantine/core'; import { useDisclosure } from '@mantine/hooks'; import { useState } from 'react'; import axios from 'axios'; @@ -7,12 +7,11 @@ import ResumeReview from './ResumeReview'; const JobResults = ({ results, resumeFile }) => { const [coverLetter, setCoverLetter] = useState(''); const [opened, { open, close }] = useDisclosure(false); - const [loadingJobs, setLoadingJobs] = useState({}); // Track loading state per job + const [loadingJobs, setLoadingJobs] = useState({}); if (!results || results.length === 0) return null; const handleGenerateCoverLetter = async (jobLink) => { - // Set loading state for specific job setLoadingJobs((prev) => ({ ...prev, [jobLink]: true })); try { @@ -30,11 +29,20 @@ const JobResults = ({ results, resumeFile }) => { console.error('Error generating cover letter:', error); alert(error.response?.data?.error || 'Error generating cover letter. Please try again.'); } finally { - // Clear loading state for specific job setLoadingJobs((prev) => ({ ...prev, [jobLink]: false })); } }; + const truncateUrl = (url) => { + try { + const maxLength = 60; + if (url.length <= maxLength) return url; + return url.substring(0, 30) + '...' + url.substring(url.length - 27); + } catch (error) { + return url; + } + }; + return ( @@ -44,24 +52,40 @@ const JobResults = ({ results, resumeFile }) => { {results.map((job, index) => ( - {/* Job Link and Match Score */} - - - {job.job_link} + {/* Job Header Section */} + + + + {job.job_title} + + = 80 + ? 'green' + : job.match_percentage >= 60 + ? 'yellow' + : 'red' + } + size="lg" + > + {job.match_percentage}% MATCH + + + + + {job.company_name} + + + • + + + {job.platform} + + + + {truncateUrl(job.job_link)} - = 80 - ? 'green' - : job.match_percentage >= 60 - ? 'yellow' - : 'red' - } - size="lg" - > - {job.match_percentage}% MATCH - - + {/* Matching Skills */}