Skip to content

Commit b7482f3

Browse files
committed
release 1.0
release 1.0
1 parent adafc11 commit b7482f3

File tree

3 files changed

+278
-12
lines changed

3 files changed

+278
-12
lines changed

EmailHarvester.py

Lines changed: 234 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,34 +28,264 @@
2828
__copyright__ = "Copyright (c) 2016 @maldevel"
2929
__credits__ = ["maldevel"]
3030
__license__ = "GPLv3"
31-
__version__ = "3.0"
31+
__version__ = "1.0"
3232
__maintainer__ = "maldevel"
3333

3434

3535
################################
3636
import argparse
3737
import sys
38+
import time
39+
import requests
40+
import re
3841

42+
from termcolor import colored
3943
from argparse import RawTextHelpFormatter
44+
from sys import platform as _platform
4045
################################
4146

4247

48+
if _platform == 'win32':
49+
import colorama
50+
colorama.init()
51+
52+
53+
class myparser:
54+
def __init__(self, results, word):
55+
self.results = results
56+
self.word = word
57+
self.temp = []
58+
59+
def genericClean(self):
60+
self.results = re.sub('<KW>', '', self.results)
61+
self.results = re.sub('</KW>', '', self.results)
62+
self.results = re.sub('<title>', '', self.results)
63+
self.results = re.sub('</div>', '', self.results)
64+
self.results = re.sub('<p>', '', self.results)
65+
self.results = re.sub('</span>', '', self.results)
66+
self.results = re.sub('</a>', '', self.results)
67+
self.results = re.sub('<em>', '', self.results)
68+
self.results = re.sub('<b>', '', self.results)
69+
self.results = re.sub('</b>', '', self.results)
70+
self.results = re.sub('</em>', '', self.results)
71+
self.results = re.sub('%2f', ' ', self.results)
72+
self.results = re.sub('%3a', ' ', self.results)
73+
self.results = re.sub('<strong>', '', self.results)
74+
self.results = re.sub('</strong>', '', self.results)
75+
#self.results = re.sub('>', '', self.results)
76+
77+
def emails(self):
78+
self.genericClean()
79+
reg_emails = re.compile(
80+
'[a-zA-Z0-9\.\-_]*' +
81+
'@' +
82+
'(?:[a-zA-Z0-9\.\-]*\.)?' +
83+
self.word)
84+
self.temp = reg_emails.findall(self.results)
85+
emails = self.unique()
86+
return emails
87+
88+
def unique(self):
89+
self.new = []
90+
for x in self.temp:
91+
if x not in self.new:
92+
self.new.append(x)
93+
return self.new
94+
95+
96+
###################################################################
97+
98+
class SearchEngine:
99+
def __init__(self, urlPattern, word, limit, counterInit, counterStep):
100+
self.results = ""
101+
self.totalresults = ""
102+
self.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1"
103+
self.limit = int(limit)
104+
self.counter = int(counterInit)
105+
self.urlPattern = urlPattern
106+
self.step = int(counterStep)
107+
self.word = word
108+
109+
def do_search(self):
110+
try:
111+
urly = self.urlPattern.format(counter=str(self.counter), word=self.word)
112+
headers = {
113+
'User-Agent': self.userAgent,
114+
}
115+
r=requests.get(urly, headers=headers)
116+
except Exception as e:
117+
print(e)
118+
self.results = r.content.decode(r.encoding)
119+
self.totalresults += self.results
120+
121+
def process(self):
122+
while (self.counter < self.limit):
123+
self.do_search()
124+
time.sleep(1)
125+
print(green("\tSearching " + str(self.counter) + " results..."))
126+
self.counter += self.step
127+
128+
def get_emails(self):
129+
rawres = myparser(self.totalresults, self.word)
130+
return rawres.emails()
131+
132+
###################################################################
133+
134+
def yellow(text):
135+
return colored(text, 'yellow', attrs=['bold'])
136+
137+
def green(text):
138+
return colored(text, 'green', attrs=['bold'])
139+
140+
def blue(text):
141+
return colored(text, 'blue', attrs=['bold'])
142+
143+
def red(text):
144+
return colored(text, 'red', attrs=['bold'])
145+
146+
def unique(data):
147+
unique = []
148+
for x in data:
149+
if x not in unique:
150+
unique.append(x)
151+
return unique
152+
153+
###################################################################
154+
155+
def limit_type(x):
156+
x = int(x)
157+
if x <= 0:
158+
raise argparse.ArgumentTypeError("Minimum results limit is 1.")
159+
return x
160+
161+
def engine_type(x):
162+
if x not in ("google", "bing", "yahoo", "ask", "all"):
163+
raise argparse.ArgumentTypeError("Invalid search engine, try with: google, bing, yahoo, ask, all.")
164+
return x
165+
166+
43167
if __name__ == '__main__':
44168

45169
parser = argparse.ArgumentParser(description="""
46-
170+
47171
_____ _ _ _ _ _
48172
| ___| (_)| | | | | | | |
49173
| |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __
50174
| __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__|
51175
| |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| |
52176
\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_|
53-
54-
""",
177+
178+
A tool to retrieve Domain email addresses from Search Engines | @maldevel
179+
{}: {}
180+
""".format(red('Version'), yellow(__version__)),
55181
formatter_class=RawTextHelpFormatter)
56182

183+
parser.add_argument("-d", '--domain', metavar='DOMAIN', dest='domain', type=str, help="Domain to search.")
184+
parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file.")
185+
parser.add_argument("-e", '--engine', metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine(google, bing, yahoo, ask, all).")
186+
parser.add_argument("-l", '--limit', metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.")
187+
57188
if len(sys.argv) is 1:
58189
parser.print_help()
59190
sys.exit()
60191

61192
args = parser.parse_args()
193+
194+
domain = ""
195+
if(args.domain):
196+
domain = args.domain
197+
else:
198+
print('[{}] {}'.format(red('ERROR'), "Please specify a domain name to search."))
199+
sys.exit(2)
200+
201+
filename = ""
202+
if(args.filename):
203+
filename = args.filename
204+
205+
limit = args.limit
206+
engine = args.engine
207+
208+
209+
if engine == "google":
210+
print(green("[-] Searching in Google..\n"))
211+
search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100)
212+
search.process()
213+
all_emails = search.get_emails()
214+
215+
elif engine == "bing":
216+
print(green("[-] Searching in Bing..\n"))
217+
search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50)
218+
search.process()
219+
all_emails = search.get_emails()
220+
221+
elif engine == "ask":
222+
print(green("[-] Searching in ASK..\n"))
223+
search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100)
224+
search.process()
225+
all_emails = search.get_emails()
226+
227+
elif engine == "yahoo":
228+
print(green("[-] Searching in Yahoo..\n"))
229+
search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100)
230+
search.process()
231+
all_emails = search.get_emails()
232+
233+
elif engine == "all":
234+
print(green("[-] Searching everywhere..\n"))
235+
all_emails = []
236+
print(green("[-] Searching in Google..\n"))
237+
search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100)
238+
search.process()
239+
all_emails.extend(search.get_emails())
240+
print(green("\n[-] Searching in Bing..\n"))
241+
search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50)
242+
search.process()
243+
all_emails.extend(search.get_emails())
244+
print(green("\n[-] Searching in ASK..\n"))
245+
search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100)
246+
search.process()
247+
all_emails.extend(search.get_emails())
248+
print(green("\n[-] Searching in Yahoo..\n"))
249+
search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100)
250+
search.process()
251+
all_emails.extend(search.get_emails())
252+
all_emails = unique(all_emails)
253+
254+
print(green("\n\n[+] Emails found:"))
255+
print(green("------------------"))
256+
257+
if all_emails == []:
258+
print(red("No emails found"))
259+
sys.exit(3)
260+
else:
261+
for emails in all_emails:
262+
print(emails)
263+
264+
if filename != "":
265+
try:
266+
print(green("[+] Saving files..."))
267+
file = open(filename, 'w')
268+
for email in all_emails:
269+
try:
270+
file.write(email + "\n")
271+
except:
272+
print(red("Exception " + email))
273+
pass
274+
file.close
275+
except Exception as e:
276+
print(red("Error saving CSV file: " + e))
277+
278+
try:
279+
filename = filename.split(".")[0] + ".xml"
280+
file = open(filename, 'w')
281+
file.write('<?xml version="1.0" encoding="UTF-8"?><EmailHarvester>')
282+
for x in all_emails:
283+
file.write('<email>' + x + '</email>')
284+
file.write('</EmailHarvester>')
285+
file.flush()
286+
file.close()
287+
print(green("Files saved!"))
288+
except Exception as er:
289+
print(red("Error saving XML file: " + er))
290+
291+
sys.exit()

README.md

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,68 @@ EmailHarvester
22
====
33
* A tool to retrieve Domain email addresses from Search Engines
44

5+
This project was inspired by:
6+
* theHarvester(https://github.com/laramies/theHarvester) from laramies.
7+
* search_email_collector(https://github.com/rapid7/metasploit-framework/blob/master/modules/auxiliary/gather/search_email_collector.rb) from Carlos Perez.
8+
59

610
Requirements
711
=====
812
* Python 3.x
13+
* termcolor
14+
* colorama
15+
* requests
916

1017

1118
Features
1219
=====
13-
20+
* Retrieve Domain email addresses from Search Engines
21+
* Google
22+
* Bing
23+
* Yahoo
24+
* ASK
1425

1526
Download/Installation
1627
====
1728
* git clone https://github.com/maldevel/EmailHarvester
29+
* pip install -r requirements.txt --user
1830

1931

20-
Setup
32+
Usage
2133
=====
34+
```
35+
usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT]
2236
37+
_____ _ _ _ _ _
38+
| ___| (_)| | | | | | | |
39+
| |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __
40+
| __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__|
41+
| |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| |
42+
\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_|
2343
24-
Contents
25-
=====
26-
44+
A tool to retrieve Domain email addresses from Search Engines | @maldevel
45+
Version: 1.0
2746
28-
Usage
29-
=====
30-
```
47+
optional arguments:
48+
-h, --help show this help message and exit
49+
-d DOMAIN, --domain DOMAIN
50+
Domain to search.
51+
-s FILE, --save FILE Save the results into a TXT and XML file.
52+
-e ENGINE, --engine ENGINE
53+
Select search engine(google, bing, yahoo, ask, all).
54+
-l LIMIT, --limit LIMIT
55+
Limit the number of results.
3156
```
3257

3358

3459
Examples
3560
=====
61+
* Search in Google
62+
./EmailHarvester.py -d example.com -e google
63+
64+
* Search in all engines
65+
./EmailHarvester.py -d example.com -e all
66+
67+
* Limit results
68+
./EmailHarvester.py -d example.com -e all -l 200
3669

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
termcolor
2+
colorama
3+
requests

0 commit comments

Comments
 (0)