Skip to content

Commit ecfdfc5

Browse files
Steve McMillianclaude
andcommitted
feat: Raise email confidence threshold to 90% (Hunter.io only)
User requirement: Only use emails with ≥90% confidence Changes: - Hunter.io threshold: 70% → 90% - Disabled web scraping fallbacks (60-70% confidence) - Now ONLY uses Hunter.io emails (96-99% confidence) Test Results (6 production contacts): ✅ 100% of Hunter.io emails meet 90% threshold ✅ Confidence scores: 96%, 98%, 98%, 96%, 99%, 98% ✅ No low-confidence emails will be stored Impact: - Fewer emails found (Hunter.io database coverage ~30-40%) - But every email is highly accurate (96-99% confident) - No guessed/scraped emails in database - Quality over quantity approach Files modified: - agents/agent3_contact_enricher.py Tested: teams/golf-enrichment/tests/test_hunter_confidence.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 8cda1f8 commit ecfdfc5

File tree

1 file changed

+62
-54
lines changed

1 file changed

+62
-54
lines changed

production/golf-enrichment/agents/agent3_contact_enricher.py

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ async def enrich_contact_tool(args: dict[str, Any]) -> dict[str, Any]:
101101
confidence = data["data"].get("score", 0)
102102
linkedin_url = data["data"].get("linkedin_url") # BONUS!
103103

104-
if confidence >= 70:
104+
# Only use emails with 90%+ confidence (user requirement)
105+
if confidence >= 90:
105106
results["email"] = email
106107
results["email_method"] = "hunter_io"
107108
results["email_confidence"] = confidence
@@ -122,59 +123,66 @@ async def enrich_contact_tool(args: dict[str, Any]) -> dict[str, Any]:
122123
except Exception:
123124
pass # Continue to next step
124125

125-
# STEP 2: Web Search via Jina
126-
if not results["email"]:
127-
results["steps_attempted"].append("web_search")
128-
try:
129-
query = f'"{name}" "{company}" {title} email'
130-
search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
131-
132-
async with httpx.AsyncClient(timeout=30.0) as client:
133-
r = await client.get(f"https://r.jina.ai/{search_url}")
134-
content = r.text[:8000]
135-
136-
email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
137-
emails = re.findall(email_pattern, content, re.IGNORECASE)
138-
139-
if emails:
140-
results["email"] = emails[0]
141-
results["email_method"] = "web_search"
142-
results["email_confidence"] = 70
143-
return {
144-
"content": [{
145-
"type": "text",
146-
"text": json.dumps(results)
147-
}]
148-
}
149-
except Exception:
150-
pass
151-
152-
# STEP 3: Focused search
153-
if not results["email"]:
154-
results["steps_attempted"].append("focused_search")
155-
try:
156-
query = f'{name} {company} contact email'
157-
search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
158-
159-
async with httpx.AsyncClient(timeout=30.0) as client:
160-
r = await client.get(f"https://r.jina.ai/{search_url}")
161-
content = r.text[:8000]
162-
163-
email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
164-
emails = re.findall(email_pattern, content, re.IGNORECASE)
165-
166-
if emails:
167-
results["email"] = emails[0]
168-
results["email_method"] = "focused_search"
169-
results["email_confidence"] = 60
170-
return {
171-
"content": [{
172-
"type": "text",
173-
"text": json.dumps(results)
174-
}]
175-
}
176-
except Exception:
177-
pass
126+
# STEP 2 & 3: Web Search - DISABLED (only 60-70% confidence, below 90% threshold)
127+
# User requirement: Only use emails with 90%+ confidence
128+
# Hunter.io provides reliable confidence scores (90-98%)
129+
# Web scraping methods cannot guarantee 90%+ accuracy
130+
#
131+
# Keeping code for reference but skipping execution:
132+
if False: # Disabled - doesn't meet 90% confidence requirement
133+
# STEP 2: Web Search via Jina (70% confidence)
134+
if not results["email"]:
135+
results["steps_attempted"].append("web_search")
136+
try:
137+
query = f'"{name}" "{company}" {title} email'
138+
search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
139+
140+
async with httpx.AsyncClient(timeout=30.0) as client:
141+
r = await client.get(f"https://r.jina.ai/{search_url}")
142+
content = r.text[:8000]
143+
144+
email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
145+
emails = re.findall(email_pattern, content, re.IGNORECASE)
146+
147+
if emails:
148+
results["email"] = emails[0]
149+
results["email_method"] = "web_search"
150+
results["email_confidence"] = 70
151+
return {
152+
"content": [{
153+
"type": "text",
154+
"text": json.dumps(results)
155+
}]
156+
}
157+
except Exception:
158+
pass
159+
160+
# STEP 3: Focused search (60% confidence)
161+
if not results["email"]:
162+
results["steps_attempted"].append("focused_search")
163+
try:
164+
query = f'{name} {company} contact email'
165+
search_url = f"https://www.google.com/search?q={query.replace(' ', '+')}"
166+
167+
async with httpx.AsyncClient(timeout=30.0) as client:
168+
r = await client.get(f"https://r.jina.ai/{search_url}")
169+
content = r.text[:8000]
170+
171+
email_pattern = r'\b[A-Za-z0-9._%+-]+@' + re.escape(domain) + r'\b'
172+
emails = re.findall(email_pattern, content, re.IGNORECASE)
173+
174+
if emails:
175+
results["email"] = emails[0]
176+
results["email_method"] = "focused_search"
177+
results["email_confidence"] = 60
178+
return {
179+
"content": [{
180+
"type": "text",
181+
"text": json.dumps(results)
182+
}]
183+
}
184+
except Exception:
185+
pass
178186

179187
# STEP 4: Not Found (NO GUESSING - return nulls)
180188
results["steps_attempted"].append("not_found")

0 commit comments

Comments
 (0)