-
Notifications
You must be signed in to change notification settings - Fork 50
Expand file tree
/
Copy pathemail-sleuth.toml
More file actions
126 lines (109 loc) · 4.61 KB
/
email-sleuth.toml
File metadata and controls
126 lines (109 loc) · 4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Configuration file for Email Sleuth
# Settings here can be overridden by command-line arguments.
# Settings related to network operations (HTTP requests)
[network]
# Timeout for individual HTTP requests (e.g., fetching website pages) in seconds.
# Default: 10
request_timeout = 10
# Minimum time to sleep between consecutive HTTP requests to the same domain (seconds).
# Helps avoid rate limiting. Use fractional seconds for finer control.
# Default: 0.1
min_sleep = 0.1
# Maximum time to sleep between consecutive HTTP requests to the same domain (seconds).
# Introduces randomness to avoid predictable patterns.
# Default: 0.5
max_sleep = 0.5
# User-Agent string to use when making HTTP requests.
# Default: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36"
# Settings related to DNS lookups (e.g., finding MX records)
[dns]
# Timeout for DNS resolution queries in seconds.
# Default: 5
dns_timeout = 5
# List of DNS servers to use for lookups. If empty or omitted, system defaults may be used,
# but explicitly listing reliable public DNS servers is recommended.
# Default: ["8.8.8.8", "8.8.4.4", "1.1.1.1", "1.0.0.1"]
dns_servers = [
"8.8.8.8", # Google Public DNS 1
"8.8.4.4", # Google Public DNS 2
"1.1.1.1", # Cloudflare DNS 1
"1.0.0.1", # Cloudflare DNS 2
]
# Settings related to SMTP email verification
[smtp]
# Timeout for establishing SMTP connections and for individual SMTP commands (like HELO, MAIL FROM, RCPT TO) in seconds.
# Default: 5
smtp_timeout = 5
# The sender email address used in the 'MAIL FROM:' SMTP command during verification.
# Some servers may reject connections or treat probes differently based on this.
# Using a plausible (but often non-existent) email from a common domain is typical.
# Default: "verify-probe@example.com"
smtp_sender_email = "verify-probe@example.com"
# Maximum number of times to retry SMTP verification if the result is inconclusive
# (e.g., due to temporary errors or greylisting).
# Default: 2
max_verification_attempts = 2
# Settings related to web scraping (if implemented and enabled)
[scraping]
# List of common page paths (relative to the domain root) to scrape for email addresses.
# Default: Includes /contact, /about, /team, etc.
common_pages = [
"/contact",
"/about-us",
"/team",
"/people",
# Add more specific paths if known for certain industries/sites
]
# List of email prefixes considered "generic" (e.g., info@, support@).
# Emails starting with these prefixes are typically given lower confidence unless specifically verified.
# Default: Includes info, contact, support, sales, admin, etc.
generic_email_prefixes = [
"info",
"contact",
"support",
"sales",
"admin",
"hello",
"help",
"office",
"press",
"media",
"marketing",
"jobs",
"careers",
"privacy",
"legal",
"webmaster",
# Add or remove based on your needs
]
# Settings controlling the verification logic and thresholds
[verification]
# The minimum confidence score (0-10) required for an email to be selected as the primary result ("email" field).
# Scores are influenced by pattern likelihood, name matching, and SMTP verification results.
# Default: 4
confidence_threshold = 4
# The minimum confidence score (0-10) required for a *generic* email (e.g., info@)
# to be selected as the primary result. Usually set higher than the normal threshold
# to prefer specific user emails over generic ones.
# Default: 7 (and must be >= confidence_threshold)
generic_confidence_threshold = 7
# The maximum number of alternative email candidates (found_emails array) to include
# in the output JSON, sorted by confidence.
# Default: 5
max_alternatives = 5
# The confidence threshold (0-10) for early termination. When a candidate email is verified
# with a confidence score >= this value, processing will stop early and no further candidates
# will be checked. Set to 10 to always check all candidates, or lower to optimize performance.
# Default: 9
early_termination_threshold = 9
# Default maximum number of concurrent tasks (processing contacts or performing network operations).
# Can be overridden by the --concurrency CLI argument.
# Default: 8
max_concurrency = 8
# Optional: Settings for advanced/experimental verification methods
# These require additional setup (like running a WebDriver) and may be less stable than SMTP.
[advanced_verification]
enable_api_checks = true
enable_headless_checks = true
webdriver_url = "http://localhost:4444" # URL of running WebDriver instance