Skip to content

Commit b759230

Browse files
author
Alexandru Meterez
committed
Update for moving to puppeteer
1 parent 5f4d43b commit b759230

File tree

7 files changed

+7
-78
lines changed

7 files changed

+7
-78
lines changed

fred/.gitignore.swp

-12 KB
Binary file not shown.

fred/__init__.py

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,3 @@
11
from flask import Flask
22

3-
4-
class MyServer(Flask):
5-
6-
def __init__(self, *args, **kwargs):
7-
super(MyServer, self).__init__(*args, **kwargs)
8-
9-
# instanciate your variables here
10-
self.proxy = None
11-
self.server = None
12-
13-
14-
app = MyServer(__name__, static_url_path='')
15-
# app = Flask(__name__, static_url_path='')
3+
app = Flask(__name__, static_url_path='')

fred/config/proxy.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

fred/data/collect.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,11 @@
88

99

1010
async def intercept_network_response(response):
11-
print("URL:", response.url)
12-
print(response.status)
1311
network.append(str(response.status) + response.url)
14-
print("\n\n")
1512

1613

1714
async def intercept_console(response):
18-
print(response.text)
1915
javascript.append(response.text)
20-
print("\n\n")
2116

2217

2318
async def collect_msgs_and_screenshot(url, ss_path):
@@ -44,7 +39,7 @@ def collect_data(url, output_folder, output_filename):
4439
os.mkdir(output_folder)
4540

4641
asyncio.get_event_loop().run_until_complete(
47-
collect_msgs_and_screenshot(url, os.path.join(output_folder, output_filename) + '.png'))
42+
collect_msgs_and_screenshot(url, os.path.join(output_folder, output_filename)))
4843

4944
with open(os.path.join(output_folder, output_filename.split('.')[0] + '_js_log.json'), 'w') as f:
5045
json.dump(javascript, f, indent=2)

fred/data/log_preprocess.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import json
2-
from urllib.parse import urlparse
32
import re
43

54

@@ -11,23 +10,11 @@ def __init__(self, log_file):
1110
def _js_messages(self):
1211
messages = []
1312
for log in self.logs:
14-
processed_message = re.sub(r'http:\S+', '', log['message'])
13+
processed_message = re.sub(r'http:\S+', '', log)
1514
messages.append(processed_message)
1615
return messages
1716

1817

1918
class NetworkLogPreprocessor(object):
2019
def __init__(self, log_file):
21-
self.logs = json.load(open(log_file, 'r'))
22-
self.network_messages = self._network_messages()
23-
24-
def _network_messages(self):
25-
messages = []
26-
for entry in self.logs['log']['entries']:
27-
request_text = entry['request']['url']
28-
request_path = urlparse(request_text).path
29-
response = entry['response']['status']
30-
message = request_path + str(response)
31-
32-
messages.append(message)
33-
return messages
20+
self.network_messages = json.load(open(log_file, 'r'))

fred/endpoints/verify.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,15 @@
44
import uuid
55
from utils.utils import get_time, check_unique_prefix
66
from config.status_codes import STATUS_CODES
7-
from config.proxy import get_proxy
8-
import socket
9-
from urllib.parse import urlparse
107

118
states = {}
129
id_to_urls = {}
1310

1411

1512
class Verify(Resource):
16-
def __init__(self, app):
17-
self.app = app
18-
1913
def post(self):
2014
baseline_url = request.json['baseline_url']
2115
updated_url = request.json['updated_url']
22-
proxy = get_proxy(self.app)
23-
baseline_url_host = urlparse(baseline_url)
24-
updated_url_host = urlparse(updated_url)
25-
print(baseline_url_host.netloc)
26-
print(socket.gethostbyname(baseline_url))
27-
# proxy.remap_hosts(baseline_url_host.netloc, socket.gethostbyname(baseline_url))
28-
# proxy.remap_hosts(updated_url_host.netloc, socket.gethostbyname(updated_url))
29-
proxy.remap_hosts(baseline_url_host.netloc, 'www.google.com')
30-
proxy.remap_hosts(updated_url_host.netloc, 'www.google.com')
3116
max_depth = request.json['max_depth']
3217
max_urls = request.json['max_urls']
3318
prefix = request.json['prefix']
@@ -44,8 +29,7 @@ def post(self):
4429
'--max-depth',
4530
max_depth, '--max-urls', max_urls, '--prefix', prefix, '--auth-baseline-username', auth_baseline_username,
4631
'--auth-baseline-password', auth_baseline_password, '--auth-updated-username', auth_updated_username,
47-
'--auth-updated-password', auth_updated_password, '--proxy-host', proxy.host, '--proxy-port',
48-
str(proxy.port)])
32+
'--auth-updated-password', auth_updated_password])
4933
if p.poll() is not None and p.poll() > 0:
5034
return {'Error': 'Failed to launch crawler'}, 406
5135
id = str(uuid.uuid4().hex)

fred/run.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
1-
from config.proxy import get_server, init
2-
3-
from flask import Flask
41
from flask_restful import Api
52
from flask_cors import CORS
63
from endpoints.verify import Verify
74
from endpoints.get_ids import IDList
85
from endpoints.shutdown import Shutdown
96
from endpoints.verify import states, id_to_urls
107
from endpoints.get_result import Result
11-
from apscheduler.schedulers.background import BackgroundScheduler
12-
from flask import Flask, request, send_from_directory
8+
from flask import send_from_directory
139
import sys
1410

1511
sys.path.append('../')
@@ -39,12 +35,9 @@ def after_request(response):
3935
cors = CORS(app, resources={r"*": {"origins": "*"}})
4036
# print(hex(id(app)))
4137
api = Api(app)
42-
api.add_resource(Verify, "/api/verify", resource_class_kwargs={'app': app})
38+
api.add_resource(Verify, "/api/verify")
4339
api.add_resource(IDList, "/api/ids")
4440
api.add_resource(Shutdown, "/api/shutdown")
4541
api.add_resource(Result, "/api/result")
46-
init(app)
4742

4843
app.run(host='0.0.0.0', debug=True)
49-
server = get_server(app)
50-
server.stop()

0 commit comments

Comments
 (0)