33This module handles PDF parsing, text extraction, and AI-based analysis.
44"""
55
6+ import gc
67import io
78import json
89import logging
1920logging .basicConfig (level = logging .INFO )
2021logger = logging .getLogger (__name__ )
2122
23+ # Maximum content length for job descriptions to prevent token limits
24+ MAX_JOB_DESCRIPTION_LENGTH = 1500
25+ MAX_RESUME_CONTENT_LENGTH = 5000
26+
2227
2328def extract_text_from_pdf (file_bytes : BinaryIO ) -> str :
2429 """
25- Extract text content from a PDF file.
30+ Extract text content from a PDF file with memory optimization .
2631
2732 Args:
2833 file_bytes: File object containing the PDF data
@@ -34,15 +39,40 @@ def extract_text_from_pdf(file_bytes: BinaryIO) -> str:
3439 ValueError: If there's an error reading the PDF
3540 """
3641 try :
42+ # Create a BytesIO buffer for efficient memory usage
3743 pdf_buffer = io .BytesIO (file_bytes .read ())
3844 file_bytes .seek (0 )
3945
40- pdf = PdfReader ( pdf_buffer )
46+ # Use context manager for better resource management
4147 text = ""
48+
49+ # Open PDF reader with the buffer
50+ pdf = PdfReader (pdf_buffer )
51+
52+ # Process pages one by one to reduce memory usage
4253 for page in pdf .pages :
43- text += page .extract_text () + "\n "
54+ # Extract text from the page and append to result
55+ page_text = page .extract_text ()
56+ if page_text :
57+ text += page_text + "\n "
58+
59+ # Close the buffer explicitly
60+ pdf_buffer .close ()
61+
62+ # Force garbage collection to free up memory
63+ gc .collect ()
64+
65+ # Truncate very long resume content to prevent token limits
66+ if len (text ) > MAX_RESUME_CONTENT_LENGTH :
67+ logger .info (f"Truncating resume content from { len (text )} to { MAX_RESUME_CONTENT_LENGTH } chars" )
68+ text = text [:MAX_RESUME_CONTENT_LENGTH ] + "..."
69+
4470 return text
71+
4572 except Exception as e :
73+ logger .error (f"Error reading PDF: { str (e )} " , exc_info = True )
74+ # Clean up resources on error
75+ gc .collect ()
4676 raise ValueError (f"Error reading PDF: { str (e )} " ) from e
4777
4878
@@ -66,6 +96,10 @@ def analyze_resume(resume: BinaryIO, job_details: List[Dict], custom_instruction
6696 resume_content = extract_text_from_pdf (resume )
6797 elif filename .endswith (".txt" ):
6898 resume_content = resume .read ().decode ("utf-8" )
99+ # Truncate very long resume content
100+ if len (resume_content ) > MAX_RESUME_CONTENT_LENGTH :
101+ logger .info (f"Truncating resume content from { len (resume_content )} to { MAX_RESUME_CONTENT_LENGTH } chars" )
102+ resume_content = resume_content [:MAX_RESUME_CONTENT_LENGTH ] + "..."
69103 else :
70104 return {
71105 "success" : False ,
@@ -93,13 +127,19 @@ def analyze_resume(resume: BinaryIO, job_details: List[Dict], custom_instruction
93127 if job_details and "job_description" in job_details [0 ] and job_details [0 ]["job_description" ]:
94128 ats_result = analyze_ats_compatibility (resume_content )
95129
130+ # Clean up memory
131+ del resume_content
132+ gc .collect ()
133+
96134 if ats_result and ats_result ["success" ]:
97135 return {"success" : True , "results" : analysis_result ["jobs" ], "ats_analysis" : ats_result ["analysis" ]}
98136 else :
99137 return {"success" : True , "results" : analysis_result ["jobs" ]}
100138
101139 except Exception as e :
102140 logger .error (f"Error in analyze_resume: { str (e )} " , exc_info = True )
141+ # Clean up memory on error
142+ gc .collect ()
103143 return {"success" : False , "error" : f"Error analyzing resume: { str (e )} " }
104144
105145
@@ -118,7 +158,7 @@ def generate_analysis(resume_content: str, job_details: List[Dict], custom_instr
118158 # Log for debugging
119159 logger .info (f"Analyzing resume against { len (job_details )} job entries" )
120160
121- # Format job details for the AI - with truncated job links
161+ # Format job details for the AI - with truncated job links and descriptions
122162 jobs_text = []
123163 for i , job in enumerate (job_details ):
124164 # Create a copy of the job dictionary to avoid modifying the original
@@ -154,9 +194,9 @@ def generate_analysis(resume_content: str, job_details: List[Dict], custom_instr
154194 if job_copy .get ("job_description" ):
155195 # Truncate job description if it's very long
156196 job_desc = job_copy .get ("job_description" )
157- if len (job_desc ) > 2000 : # Set a reasonable limit
158- logger .info (f"Truncating job description for job #{ i + 1 } from { len (job_desc )} to 2000 chars" )
159- job_text += f"Description: { job_desc [:2000 ]} ...\n "
197+ if len (job_desc ) > MAX_JOB_DESCRIPTION_LENGTH :
198+ logger .info (f"Truncating job description for job #{ i + 1 } from { len (job_desc )} to { MAX_JOB_DESCRIPTION_LENGTH } chars" )
199+ job_text += f"Description: { job_desc [:MAX_JOB_DESCRIPTION_LENGTH ]} ...\n "
160200 else :
161201 job_text += f"Description: { job_desc } \n "
162202
@@ -169,11 +209,17 @@ def generate_analysis(resume_content: str, job_details: List[Dict], custom_instr
169209 # Join all job details
170210 all_jobs_text = "\n \n " .join (jobs_text )
171211
212+ # Truncate resume content for prompt if needed again
213+ if len (resume_content ) > MAX_RESUME_CONTENT_LENGTH :
214+ prompt_resume = resume_content [:MAX_RESUME_CONTENT_LENGTH ] + "..."
215+ else :
216+ prompt_resume = resume_content
217+
172218 base_prompt = f"""
173219 You are a professional resume analyzer. Analyze this resume content against the job details provided.
174220
175221 Resume content to analyze:
176- { resume_content }
222+ { prompt_resume }
177223
178224 Job details to analyze against:
179225 { all_jobs_text }
@@ -284,10 +330,17 @@ def generate_analysis(resume_content: str, job_details: List[Dict], custom_instr
284330 if not job .get ("match_percentage" ):
285331 job ["match_percentage" ] = 50
286332
333+ # Clean up memory before returning
334+ del prompt
335+ del response
336+ gc .collect ()
337+
287338 return {"success" : True , "jobs" : analysis ["jobs" ]}
288339
289340 except Exception as e :
290341 logger .error (f"Error in generate_analysis: { str (e )} " , exc_info = True )
342+ # Clean up memory on error
343+ gc .collect ()
291344 return {"success" : False , "error" : f"Error generating analysis: { str (e )} " }
292345
293346
@@ -304,15 +357,29 @@ def generate_resume_review(resume_content: str, job_description: str, custom_ins
304357 dict: Review results including strengths, weaknesses, and improvement suggestions
305358 """
306359 try :
360+ # Truncate resume content for the prompt if it's too long
361+ if len (resume_content ) > MAX_RESUME_CONTENT_LENGTH :
362+ logger .info (f"Truncating resume content for review from { len (resume_content )} to { MAX_RESUME_CONTENT_LENGTH } chars" )
363+ prompt_resume = resume_content [:MAX_RESUME_CONTENT_LENGTH ] + "..."
364+ else :
365+ prompt_resume = resume_content
366+
367+ # Truncate job description if it's very long
368+ if len (job_description ) > MAX_JOB_DESCRIPTION_LENGTH :
369+ logger .info (f"Truncating job description for review from { len (job_description )} to { MAX_JOB_DESCRIPTION_LENGTH } chars" )
370+ prompt_job = job_description [:MAX_JOB_DESCRIPTION_LENGTH ] + "..."
371+ else :
372+ prompt_job = job_description
373+
307374 base_prompt = f"""
308375 You are a professional resume reviewer and career coach. Review this resume against the job description
309376 and provide detailed, actionable feedback to help improve the resume.
310377
311378 Resume content:
312- { resume_content }
379+ { prompt_resume }
313380
314381 Job description:
315- { job_description }
382+ { prompt_job }
316383
317384 IMPORTANT: Your response must be a valid JSON object with the exact structure shown below.
318385 Do not include any explanations, markdown, or text outside of the JSON object.
@@ -406,9 +473,17 @@ def generate_resume_review(resume_content: str, job_description: str, custom_ins
406473 if section not in existing_sections :
407474 review_data .setdefault ("improvement_suggestions" , []).append ({"section" : section , "suggestions" : ["Consider reviewing this section" ]})
408475
476+ # Clean up memory
477+ del prompt
478+ del response
479+ gc .collect ()
480+
409481 return {"success" : True , "review" : review_data }
410482
411483 except json .JSONDecodeError as e :
484+ # Clean up memory on error
485+ gc .collect ()
486+
412487 return {
413488 "success" : False ,
414489 "error" : f"Invalid response format from AI model: { str (e )} " ,
@@ -418,4 +493,6 @@ def generate_resume_review(resume_content: str, job_description: str, custom_ins
418493 return {"success" : False , "error" : "Failed to generate resume review" }
419494
420495 except Exception as e :
421- return {"success" : False , "error" : f"Error generating resume review: { str (e )} " }
496+ # Clean up memory on error
497+ gc .collect ()
498+ return {"success" : False , "error" : f"Error generating resume review: { str (e )} " }
0 commit comments