Skip to content

Commit 3c05a41

Browse files
committed
plugins
plugins system for search engines
1 parent 7a0c4f5 commit 3c05a41

File tree

6 files changed

+231
-63
lines changed

6 files changed

+231
-63
lines changed

EmailHarvester.py

Lines changed: 61 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626

2727
__author__ = "maldevel"
2828
__copyright__ = "Copyright (c) 2016 @maldevel"
29-
__credits__ = ["maldevel", "cclauss", "Christian Martorella"]
29+
__credits__ = ["maldevel", "PaulSec", "cclauss", "Christian Martorella"]
3030
__license__ = "GPLv3"
31-
__version__ = "1.1.6"
31+
__version__ = "1.2.6"
3232
__maintainer__ = "maldevel"
3333

3434

@@ -38,25 +38,24 @@
3838
import time
3939
import requests
4040
import re
41+
import os
4142

4243
from termcolor import colored
4344
from argparse import RawTextHelpFormatter
4445
from sys import platform as _platform
4546
from urllib.parse import urlparse
4647
################################
4748

48-
4949
if _platform == 'win32':
5050
import colorama
5151
colorama.init()
5252

53-
5453
class myparser:
5554
def __init__(self, results, word):
5655
self.results = results
5756
self.word = word
5857
self.temp = []
59-
58+
6059
def genericClean(self):
6160
for e in '''<KW> </KW> </a> <b> </b> </div> <em> </em> <p> </span>
6261
<strong> </strong> <title> <wbr> </wbr>'''.split():
@@ -82,8 +81,30 @@ def unique(self):
8281

8382
###################################################################
8483

85-
class SearchEngine:
86-
def __init__(self, urlPattern, word, limit, counterInit, counterStep, userAgent, proxy):
84+
class EmailHarvester(object):
85+
86+
def __init__(self):
87+
self.plugins = {}
88+
path = "plugins/"
89+
plugins = {}
90+
91+
sys.path.insert(0, path)
92+
for f in os.listdir(path):
93+
fname, ext = os.path.splitext(f)
94+
if ext == '.py':
95+
mod = __import__(fname)
96+
plugins[fname] = mod.Plugin(self)
97+
98+
def register_plugin(self, search_method, functions):
99+
self.plugins[search_method] = functions
100+
101+
def get_plugins(self):
102+
return self.plugins
103+
104+
def show_message(self, msg):
105+
print(green(msg))
106+
107+
def init_search(self, urlPattern, word, limit, counterInit, counterStep, userAgent, proxy):
87108
self.results = ""
88109
self.totalresults = ""
89110
self.userAgent = userAgent
@@ -117,7 +138,7 @@ def process(self):
117138
self.do_search()
118139
time.sleep(1)
119140
self.counter += self.step
120-
print(green("\tSearching " + str(self.counter) + " results..."))
141+
print("\tSearching " + str(self.counter) + " results...")
121142

122143
def get_emails(self):
123144
rawres = myparser(self.totalresults, self.word)
@@ -131,22 +152,19 @@ def yellow(text):
131152
def green(text):
132153
return colored(text, 'green', attrs=['bold'])
133154

134-
def blue(text):
135-
return colored(text, 'blue', attrs=['bold'])
136-
137155
def red(text):
138156
return colored(text, 'red', attrs=['bold'])
139157

140158
def unique(data):
141159
return list(set(data))
142160

161+
###################################################################
162+
143163
def checkProxyUrl(url):
144164
url_checked = urlparse(url)
145165
if (url_checked.scheme not in ('http', 'https')) | (url_checked.netloc == ''):
146166
raise argparse.ArgumentTypeError('Invalid {} Proxy URL (example: http://127.0.0.1:8080).'.format(url))
147167
return url_checked
148-
149-
###################################################################
150168

151169
def limit_type(x):
152170
x = int(x)
@@ -160,35 +178,6 @@ def engine_type(engine):
160178
return engine
161179
raise argparse.ArgumentTypeError("Invalid search engine, try with: {}.".format(', '.join(engines)))
162180

163-
###################################################################
164-
165-
def ask(domain, limit, userAgent, proxy):
166-
print(green("\n[+] Searching in ASK..\n"))
167-
url = "http://www.ask.com/web?q=%40{word}"
168-
search = SearchEngine(url, domain, limit, 0, 100, userAgent, proxy)
169-
search.process()
170-
return search.get_emails()
171-
172-
def bing(domain, limit, userAgent, proxy):
173-
print(green("\n[+] Searching in Bing..\n"))
174-
url = "http://www.bing.com/search?q=%40{word}&count=50&first={counter}"
175-
search = SearchEngine(url, domain, limit, 0, 50, userAgent, proxy)
176-
search.process()
177-
return search.get_emails()
178-
179-
def google(domain, limit, userAgent, proxy):
180-
print(green("\n[+] Searching in Google..\n"))
181-
url = 'http://www.google.com/search?num=100&start={counter}&hl=en&q=%40"{word}"'
182-
search = SearchEngine(url, domain, limit, 0, 100, userAgent, proxy)
183-
search.process()
184-
return search.get_emails()
185-
186-
def yahoo(domain, limit, userAgent, proxy):
187-
print(green("\n[+] Searching in Yahoo..\n"))
188-
url = "http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}"
189-
search = SearchEngine(url, domain, limit, 1, 100, userAgent, proxy)
190-
search.process()
191-
return search.get_emails()
192181

193182
###################################################################
194183

@@ -208,13 +197,23 @@ def yahoo(domain, limit, userAgent, proxy):
208197
""".format(red('Version'), yellow(__version__)),
209198
formatter_class=RawTextHelpFormatter)
210199

211-
parser.add_argument("-d", '--domain', metavar='DOMAIN', dest='domain', type=str, help="Domain to search.")
212-
parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file (both).")
213-
parser.add_argument("-e", '--engine', metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine(google, bing, yahoo, ask, all).")
214-
parser.add_argument("-l", '--limit', metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.")
215-
parser.add_argument('-u', '--user-agent', metavar='USER-AGENT', dest='uagent', type=str, help="Set the User-Agent request header.")
216-
parser.add_argument('-x', '--proxy', metavar='PROXY', dest='proxy', type=checkProxyUrl, help='Setup proxy server (example: http://127.0.0.1:8080)')
217-
parser.add_argument('--noprint', action='store_true', help='EmailHarvester will print discovered emails to terminal. It is possible to tell EmailHarvester not to print results to terminal with this option.')
200+
parser.add_argument("-d", '--domain', action="store", metavar='DOMAIN', dest='domain',
201+
default=None, type=str, help="Domain to search.")
202+
parser.add_argument("-s", '--save', action="store", metavar='FILE', dest='filename',
203+
default=None, type=str, help="Save the results into a TXT and XML file (both).")
204+
205+
#(google, bing, yahoo, ask, all) needs to be fixed/scan plugins folder to show available search engines??
206+
parser.add_argument("-e", '--engine', action="store", metavar='ENGINE', dest='engine',
207+
default="all", type=engine_type, help="Select search engine plugin(google, bing, yahoo, ask, all).")
208+
209+
parser.add_argument("-l", '--limit', action="store", metavar='LIMIT', dest='limit',
210+
type=limit_type, default=100, help="Limit the number of results.")
211+
parser.add_argument('-u', '--user-agent', action="store", metavar='USER-AGENT', dest='uagent',
212+
type=str, help="Set the User-Agent request header.")
213+
parser.add_argument('-x', '--proxy', action="store", metavar='PROXY', dest='proxy',
214+
default=None, type=checkProxyUrl, help='Setup proxy server (example: http://127.0.0.1:8080)')
215+
parser.add_argument('--noprint', action='store_true', default=False,
216+
help='EmailHarvester will print discovered emails to terminal. It is possible to tell EmailHarvester not to print results to terminal with this option.')
218217

219218

220219
if len(sys.argv) is 1:
@@ -238,26 +237,26 @@ def yahoo(domain, limit, userAgent, proxy):
238237
filename = args.filename or ""
239238
limit = args.limit
240239
engine = args.engine
240+
app = EmailHarvester()
241+
plugins = app.get_plugins()
241242

243+
244+
all_emails = []
242245
if engine == "all":
243246
print(green("[+] Searching everywhere.."))
244-
all_emails = (ask(domain, limit, userAgent, args.proxy) +
245-
bing(domain, limit, userAgent, args.proxy) +
246-
yahoo(domain, limit, userAgent, args.proxy) +
247-
google(domain, limit, userAgent, args.proxy))
248-
elif engine == "ask":
249-
all_emails = ask(domain, limit, userAgent, args.proxy)
250-
elif engine == "bing":
251-
all_emails = bing(domain, limit, userAgent, args.proxy)
252-
elif engine == "yahoo":
253-
all_emails = yahoo(domain, limit, userAgent, args.proxy)
254-
elif engine == "google":
255-
all_emails = google(domain, limit, userAgent, args.proxy)
247+
for search_engine in plugins:
248+
all_emails += plugins[search_engine]['search'](domain, limit, userAgent, args.proxy)
249+
elif engine not in plugins:
250+
print(red("Search engine plugin not found"))
251+
sys.exit(3)
252+
else:
253+
msg, all_emails = plugins[engine]['search'](domain, limit, userAgent, args.proxy)
254+
print(green(msg))
256255
all_emails = unique(all_emails)
257256

258257
if not all_emails:
259258
print(red("No emails found"))
260-
sys.exit(3)
259+
sys.exit(4)
261260

262261
msg = "\n\n[+] {} emails found:".format(len(all_emails))
263262
print(green(msg))

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Features
2323
* Limit search results.
2424
* Define your own User-Agent string.
2525
* Use proxy server.
26+
* Plugins system.
2627

2728

2829
Download/Installation
@@ -45,7 +46,7 @@ usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT]
4546
\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_|
4647
4748
A tool to retrieve Domain email addresses from Search Engines | @maldevel
48-
Version: 1.1.6
49+
Version: 1.2.6
4950
5051
optional arguments:
5152
-h, --help show this help message and exit

plugins/ask.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
This file is part of EmailHarvester
3+
Copyright (C) 2016 @maldevel
4+
https://github.com/maldevel/EmailHarvester
5+
6+
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
7+
8+
This program is free software: you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation, either version 3 of the License, or
11+
(at your option) any later version.
12+
13+
This program is distributed in the hope that it will be useful,
14+
but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
GNU General Public License for more details.
17+
18+
You should have received a copy of the GNU General Public License
19+
along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
For more see the file 'LICENSE' for copying permission.
22+
"""
23+
24+
#config = None
25+
app_emailharvester = None
26+
27+
28+
def search(domain, limit, userAgent, proxy):
29+
app_emailharvester.show_message("\n[+] Searching in ASK..\n")
30+
url = "http://www.ask.com/web?q=%40{word}"
31+
app_emailharvester.init_search(url, domain, limit, 0, 100, userAgent, proxy)
32+
app_emailharvester.process()
33+
return app_emailharvester.get_emails()
34+
35+
36+
class Plugin:
37+
def __init__(self, app):#, conf
38+
global app_emailharvester, config
39+
#config = conf
40+
app.register_plugin('ask', {'search': search})
41+
app_emailharvester = app
42+

plugins/bing.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
This file is part of EmailHarvester
3+
Copyright (C) 2016 @maldevel
4+
https://github.com/maldevel/EmailHarvester
5+
6+
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
7+
8+
This program is free software: you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation, either version 3 of the License, or
11+
(at your option) any later version.
12+
13+
This program is distributed in the hope that it will be useful,
14+
but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
GNU General Public License for more details.
17+
18+
You should have received a copy of the GNU General Public License
19+
along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
For more see the file 'LICENSE' for copying permission.
22+
"""
23+
24+
#config = None
25+
app_emailharvester = None
26+
27+
28+
def search(domain, limit, userAgent, proxy):
29+
app_emailharvester.show_message("\n[+] Searching in Bing..\n")
30+
url = "http://www.bing.com/search?q=%40{word}&count=50&first={counter}"
31+
app_emailharvester.init_search(url, domain, limit, 0, 50, userAgent, proxy)
32+
app_emailharvester.process()
33+
return app_emailharvester.get_emails()
34+
35+
36+
class Plugin:
37+
def __init__(self, app):#, conf
38+
global app_emailharvester, config
39+
#config = conf
40+
app.register_plugin('bing', {'search': search})
41+
app_emailharvester = app
42+

plugins/google.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
This file is part of EmailHarvester
3+
Copyright (C) 2016 @maldevel
4+
https://github.com/maldevel/EmailHarvester
5+
6+
EmailHarvester - A tool to retrieve Domain email addresses from Search Engines.
7+
8+
This program is free software: you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation, either version 3 of the License, or
11+
(at your option) any later version.
12+
13+
This program is distributed in the hope that it will be useful,
14+
but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
GNU General Public License for more details.
17+
18+
You should have received a copy of the GNU General Public License
19+
along with this program. If not, see <http://www.gnu.org/licenses/>.
20+
21+
For more see the file 'LICENSE' for copying permission.
22+
"""
23+
24+
#config = None
25+
app_emailharvester = None
26+
27+
28+
def search(domain, limit, userAgent, proxy):
29+
app_emailharvester.show_message("\n[+] Searching in Google..\n")
30+
url = 'http://www.google.com/search?num=100&start={counter}&hl=en&q=%40"{word}"'
31+
app_emailharvester.init_search(url, domain, limit, 0, 100, userAgent, proxy)
32+
app_emailharvester.process()
33+
return app_emailharvester.get_emails()
34+
35+
36+
class Plugin:
37+
def __init__(self, app):#, conf
38+
global app_emailharvester, config
39+
#config = conf
40+
app.register_plugin('google', {'search': search})
41+
app_emailharvester = app
42+

0 commit comments

Comments
 (0)