-
Notifications
You must be signed in to change notification settings - Fork 40
Open
Description
Hi,
I had trouble to use Parsero with https so I made some little changes. It's the first time I wrote Python so I apologize if the code sucks...
Here is the diff:
diff --git a/parsero.py b/parsero.py
old mode 100644
new mode 100755
index 4ee24ef..a5dabea
--- a/parsero.py
+++ b/parsero.py
@@ -34,6 +34,7 @@ Author:
class bcolors:
OKGREEN = '\033[92m'
+ REDIR = '\033[37m'
FAIL = '\033[91m'
ENDC = '\033[0m'
YELLOW = '\033[33m'
@@ -54,6 +55,7 @@ if sys.version_info < (3, 0, 0):
import urllib.request
import argparse
import time
+import http.client
try:
import urllib3
@@ -76,15 +78,27 @@ def logo():
print(bcolors.YELLOW + hello + bcolors.ENDC)
now = time.strftime("%c")
-def conn_check(url, only200):
- global pathlist
+def conn_check(url, only200, https):
+ global pathlist, http
pathlist = []
salida = 1
+
+ if https == True:
+ protocol = "https"
+ conn = http.client.HTTPSConnection(url)
+ else:
+ protocol = "http"
+ conn = http.client.HTTPConnection(url)
+
try:
- for line in urllib.request.urlopen("http://" + url + "/robots.txt"):
- lineStr = str(line, encoding='utf8')
+ conn.request("GET", "/robots.txt")
+ res = conn.getresponse()
+ data = str(res.read(), encoding='utf8')
+ datas = data.split('\n')
+ for line in datas:
+ lineStr = line
path = lineStr.split(': /')
- if "Disallow" == path[0]:
+ if ("Disallow" == path[0]) or ("Noindex" == path[0]):
pathlist.append(path[1].replace("\n", "").replace("\r", ""))
pathlist = list(set(pathlist))
try:
@@ -99,21 +113,24 @@ def conn_check(url, only200):
print("\n" + bcolors.FAIL + "Please, type a valid URL. This URL can't be resolved." + bcolors.ENDC)
print("\n" + bcolors.FAIL + "e.g: python3 parsero.py -u www.behindthefirewalls.com -o -sb" + bcolors.ENDC + "\n")
salida = 0
-
+
http = urllib3.PoolManager()
count = 0
count_ok = 0
-
+
for p in pathlist:
- disurl = "http://" + url + '/' + p
- r1 = http.request('GET', disurl, redirect=False, retries=5)
+ disurl = protocol+"://"+url+'/'+p
+ r1 = http.request('GET', disurl, redirect = False, retries = 5)
+ count = count + 1
if r1.status == 200:
print(bcolors.OKGREEN + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
count_ok = count_ok + 1
elif only200 == False:
- print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
- count = count + 1
-
+ if r1.status >= 300 and r1.status < 400:
+ print(bcolors.REDIR + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
+ else:
+ print(bcolors.FAIL + disurl + ' ' + str(r1.status) + ' ' + str(r1.reason) + bcolors.ENDC)
+
count_int = int(count)
count_ok_int = int(count_ok)
@@ -127,21 +144,25 @@ def conn_check(url, only200):
else:
print('\n' + bcolors.FAIL + '[+] %i links have been analyzed but any them are available...' % count_int + bcolors.ENDC)
-def search_bing(url, searchbing, only200):
+def search_bing(url, searchbing, only200, https):
+ if https == True:
+ protocol = "https"
+ else:
+ protocol = "http"
try:
print("\nSearching the Disallows entries in Bing...\n")
from bs4 import BeautifulSoup
count = 0
for p in pathlist:
- disurl = "http://" + url + '/' + p
+ disurl = protocol+"://" + url + '/' + p
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0')]
url2 = "http://www.bing.com/search?q=site:" + disurl
print(url2)
page = opener.open(url2)
- soup = BeautifulSoup(page)
+ soup = BeautifulSoup(page, 'lxml')
http = urllib3.PoolManager()
for cite in soup.findAll('cite'):
@@ -152,7 +173,10 @@ def search_bing(url, searchbing, only200):
if r2.status == 200:
print(bcolors.OKGREEN + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
elif only200 == False:
- print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
+ if r2.status >= 300 and r2.status < 400:
+ print(bcolors.REDIR + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
+ else:
+ print(bcolors.FAIL + ' - ' + cite.text + ' ' + str(r2.status) + ' ' + str(r2.reason) + bcolors.ENDC)
except UnicodeEncodeError:
pass
@@ -170,6 +194,7 @@ def main():
parse = argparse.ArgumentParser()
parse.add_argument('-u', action='store', dest='url', help='Type the URL which will be analyzed')
parse.add_argument('-o', action='store_true', dest='only200', help='Show only the "HTTP 200" status code')
+ parse.add_argument('-s', action='store_true', dest='https', help='Enable https')
parse.add_argument('-sb', action='store_true', dest='searchbing', help='Search in Bing indexed Disallows')
parse.add_argument('-f', action='store', dest='file', help='Scan a list of domains from a list')
@@ -204,12 +229,13 @@ def main():
if url.find("http://") == 0:
url = url.replace("http://", "")
start_time = time.time()
+ https = args.https
only200 = args.only200
searchbing = args.searchbing
date(url)
- conn_check(url, only200)
+ conn_check(url, only200, https)
if searchbing == True:
- search_bing(url, searchbing, only200)
+ search_bing(url, searchbing, only200, https)
print("\nFinished in %0.2f seconds.\n" % (time.time() - start_time))
if __name__ == "__main__":
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels