Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 113 additions & 20 deletions backend/app/job_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,53 +21,135 @@ def get_headers():
}


def scrape_job_description(job_url: str) -> dict:
def extract_job_details(soup, url):
"""
Extract job title, company name, and platform from job posting page.
"""
Scrape job description from various job posting websites.
Includes fallback for blocked requests.
platform = "Other"
if "indeed.com" in url:
platform = "Indeed"
elif "linkedin.com" in url:
platform = "LinkedIn"

try:
# Job Title Extraction
title_selectors = {
"indeed.com": ["h1.jobsearch-JobInfoHeader-title", "div.jobsearch-JobInfoHeader-title-container h1", "h1.icl-u-xs-mb--xs"],
"linkedin.com": ["h1.top-card-layout__title", "h1.job-details-jobs-unified-top-card__job-title", "h1.topcard__title"],
"generic": ["h1", "h1.job-title", "div.job-title", "title"],
}

Args:
job_url: URL of the job posting
job_title = None
# Try platform-specific selectors first
if "indeed.com" in url:
for selector in title_selectors["indeed.com"]:
element = soup.select_one(selector)
if element and element.text.strip():
job_title = element.text.strip()
break
elif "linkedin.com" in url:
for selector in title_selectors["linkedin.com"]:
element = soup.select_one(selector)
if element and element.text.strip():
job_title = element.text.strip()
break

# If no title found, try generic selectors
if not job_title:
for selector in title_selectors["generic"]:
element = soup.select_one(selector)
if element and element.text.strip():
job_title = element.text.strip()
break

# Company Name Extraction
company_selectors = {
"indeed.com": [
'div[data-company-name="true"]',
"div.jobsearch-CompanyInfoContainer span.jobsearch-CompanyInfoWithoutHeaderImage",
"div.jobsearch-InlineCompanyRating > div:first-child",
],
"linkedin.com": [
"a.company-name-link",
'a[data-tracking-control-name="public_jobs_topcard-org-name"]',
"span.topcard__flavor",
"a.sub-nav-cta__optional-url",
],
"generic": ['div[class*="company"]', 'span[class*="company"]', 'div[class*="employer"]', 'span[class*="employer"]'],
}

company_name = None
# Try platform-specific selectors
if "indeed.com" in url:
for selector in company_selectors["indeed.com"]:
element = soup.select_one(selector)
if element and element.text.strip():
company_name = element.text.strip()
break
elif "linkedin.com" in url:
for selector in company_selectors["linkedin.com"]:
element = soup.select_one(selector)
if element and element.text.strip():
company_name = element.text.strip()
break

# If no company found, try generic selectors
if not company_name:
for selector in company_selectors["generic"]:
element = soup.select_one(selector)
if element and element.text.strip():
company_name = element.text.strip()
break

return {"job_title": job_title or "Position Not Found", "company_name": company_name or "Company Not Found", "platform": platform}

except Exception as e:
logger.error(f"Error extracting job details: {str(e)}")
return {"job_title": "Position Not Found", "company_name": "Company Not Found", "platform": platform}

Returns:
dict: Contains success status and either job description or error message

def scrape_job_description(job_url: str) -> dict:
"""
Scrape job description and metadata from various job posting websites.
"""
try:
# Validate URL
if not job_url or not urlparse(job_url).scheme:
return {"success": False, "error": "Invalid URL provided"}

# Make request with enhanced headers
response = requests.get(job_url, headers=get_headers(), timeout=10)

# Handle various response scenarios
if response.status_code == 403:
return {
"success": True,
"description": "Unable to automatically fetch job description. Please enter the job description "
"manually or try a different job posting link.",
"description": "Unable to automatically fetch job description. Please enter the job description manually.",
"job_title": "Position Not Found",
"company_name": "Company Not Found",
"platform": "Unknown",
"requires_manual_entry": True,
}

response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

# Extract job details
job_details = extract_job_details(soup, job_url)

# Try different common job description selectors
selectors = [
description_selectors = [
"div.job-description",
"div[data-automation='jobDescription']",
"#job-description",
".description__text",
"div.description",
"div[class*='jobsearch-jobDescriptionText']", # Indeed
"div[class*='show-more-less-html']", # LinkedIn
"div[class*='job-description']", # Generic
"div[class*='jobsearch-jobDescriptionText']",
"div[class*='show-more-less-html']",
"div[class*='job-description']",
]

for selector in selectors:
for selector in description_selectors:
job_description = soup.select_one(selector)
if job_description:
return {"success": True, "description": job_description.get_text(strip=True)}
return {"success": True, "description": job_description.get_text(strip=True), **job_details}

# If no description found with selectors, try finding by content
description = soup.find(
Expand All @@ -76,21 +158,32 @@ def scrape_job_description(job_url: str) -> dict:
)

if description:
return {"success": True, "description": description.get_text(strip=True)}
return {"success": True, "description": description.get_text(strip=True), **job_details}

return {
"success": True,
"description": "Unable to automatically extract job description. Please enter the job description manually.",
**job_details,
"requires_manual_entry": True,
}

except requests.RequestException as e:
logger.error(f"Request error for {job_url}: {str(e)}")
return {"success": True, "description": "Unable to fetch job description. Please enter it manually.", "requires_manual_entry": True}
return {
"success": True,
"description": "Unable to fetch job description. Please enter it manually.",
"job_title": "Position Not Found",
"company_name": "Company Not Found",
"platform": "Unknown",
"requires_manual_entry": True,
}
except Exception as e:
logger.error(f"Unexpected error scraping {job_url}: {str(e)}")
return {
"success": True,
"description": "Error accessing job posting. Please enter the job description manually.",
"job_title": "Position Not Found",
"company_name": "Company Not Found",
"platform": "Unknown",
"requires_manual_entry": True,
}
10 changes: 9 additions & 1 deletion backend/app/resume_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ def generate_analysis(resume_content: str, job_links: list) -> Dict[str, Union[b
2. For matches above 75%, provide recommendations to excel in the role
3. Recommendations should be specific and actionable
4. Match percentage should be based on both technical skills and overall fit
5. For each job, provide a clear job title and company name

Return ONLY a JSON object with this exact structure:
{{
"jobs": [
{{
"job_title": "<job title>",
"company_name": "<company name>",
"job_link": "<job url>",
"match_percentage": <number 0-100>,
"matching_skills": [<list of matching skills>],
Expand Down Expand Up @@ -151,14 +154,19 @@ def generate_analysis(resume_content: str, job_links: list) -> Dict[str, Union[b
if not isinstance(analysis, dict) or "jobs" not in analysis:
return {"success": False, "error": "Invalid response structure"}

# Ensure recommendations
# Ensure recommendations and required fields
for job in analysis["jobs"]:
if not job.get("recommendations"):
job["recommendations"] = [
"Highlight relevant project achievements",
"Quantify your impact with metrics",
"Add specific examples of team leadership",
]
# Ensure job title and company name are present
if not job.get("job_title"):
job["job_title"] = "Position"
if not job.get("company_name"):
job["company_name"] = "Company"

return {"success": True, "jobs": analysis["jobs"]}

Expand Down
66 changes: 45 additions & 21 deletions frontend/src/components/JobResults.jsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Text, Button, Stack, Badge, Modal, List, Group, Paper } from '@mantine/core';
import { Text, Button, Stack, Badge, Modal, List, Group, Paper, Flex } from '@mantine/core';
import { useDisclosure } from '@mantine/hooks';
import { useState } from 'react';
import axios from 'axios';
Expand All @@ -7,12 +7,11 @@ import ResumeReview from './ResumeReview';
const JobResults = ({ results, resumeFile }) => {
const [coverLetter, setCoverLetter] = useState('');
const [opened, { open, close }] = useDisclosure(false);
const [loadingJobs, setLoadingJobs] = useState({}); // Track loading state per job
const [loadingJobs, setLoadingJobs] = useState({});

if (!results || results.length === 0) return null;

const handleGenerateCoverLetter = async (jobLink) => {
// Set loading state for specific job
setLoadingJobs((prev) => ({ ...prev, [jobLink]: true }));

try {
Expand All @@ -30,11 +29,20 @@ const JobResults = ({ results, resumeFile }) => {
console.error('Error generating cover letter:', error);
alert(error.response?.data?.error || 'Error generating cover letter. Please try again.');
} finally {
// Clear loading state for specific job
setLoadingJobs((prev) => ({ ...prev, [jobLink]: false }));
}
};

const truncateUrl = (url) => {
try {
const maxLength = 60;
if (url.length <= maxLength) return url;
return url.substring(0, 30) + '...' + url.substring(url.length - 27);
} catch (error) {
return url;
}
};

return (
<Stack spacing="md">
<Text size="xl" weight={700}>
Expand All @@ -44,24 +52,40 @@ const JobResults = ({ results, resumeFile }) => {
{results.map((job, index) => (
<Paper key={index} shadow="xs" p="md" withBorder>
<Stack spacing="sm">
{/* Job Link and Match Score */}
<Group position="apart" align="center">
<Text size="sm" style={{ maxWidth: '70%' }} lineClamp={1}>
{job.job_link}
{/* Job Header Section */}
<Stack spacing={4}>
<Group position="apart" align="center">
<Text size="lg" weight={600} color="blue">
{job.job_title}
</Text>
<Badge
color={
job.match_percentage >= 80
? 'green'
: job.match_percentage >= 60
? 'yellow'
: 'red'
}
size="lg"
>
{job.match_percentage}% MATCH
</Badge>
</Group>
<Flex gap="xs" align="center">
<Text size="md" weight={500}>
{job.company_name}
</Text>
<Text size="sm" color="dimmed">
</Text>
<Text size="sm" color="dimmed">
{job.platform}
</Text>
</Flex>
<Text size="sm" color="dimmed" style={{ wordBreak: 'break-all' }}>
{truncateUrl(job.job_link)}
</Text>
<Badge
color={
job.match_percentage >= 80
? 'green'
: job.match_percentage >= 60
? 'yellow'
: 'red'
}
size="lg"
>
{job.match_percentage}% MATCH
</Badge>
</Group>
</Stack>

{/* Matching Skills */}
<div>
Expand Down