Skip to content

Commit e958cc8

Browse files
committed
update https module
1 parent 3876607 commit e958cc8

File tree

5 files changed

+96
-17
lines changed

5 files changed

+96
-17
lines changed

database/get_leaf_cert.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
from cryptography.hazmat.backends import default_backend
1313
from cryptography.hazmat.primitives import hashes, serialization
1414

15-
def get_certificate(hostname, port=443):
15+
def get_certificate(hostname):
1616
"""OpenSSL with TCP get the certificate"""
1717
context = ssl.create_default_context()
1818
# Disable certificate verification for the first connection
1919
context.check_hostname = False
2020
context.verify_mode = ssl.CERT_NONE
2121

22-
with socket.create_connection((hostname, port)) as sock:
22+
with socket.create_connection((hostname, 443)) as sock:
2323
with context.wrap_socket(sock, server_hostname=hostname) as ssock:
2424
# Get certificate info
2525
cert_der = ssock.getpeercert(True)

database/https.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,34 @@ def validate_url(self, url):
8181
sys.exit(1)
8282

8383

84+
def head(self, curl):
85+
"""Take the curl object to head state, with redirect."""
86+
if isinstance(curl, pycurl.Curl):
87+
buffer = BytesIO()
88+
curl.setopt(pycurl.NOBODY, True)
89+
curl.setopt(pycurl.HEADERFUNCTION, buffer.write)
90+
curl.setopt(pycurl.FOLLOWLOCATION, True)
91+
temp_cert_path = None
92+
93+
if self.misconfigured_server:
94+
if not self.leaf_cert:
95+
sys.exit(1)
96+
temp_cert_path = REQUEST_HANDLER.set_leaf(curl)
97+
98+
try:
99+
curl.perform()
100+
except pycurl.error:
101+
print("Error performing request:", pycurl.error)
102+
finally:
103+
curl.close()
104+
105+
if temp_cert_path and os.path.exists(temp_cert_path):
106+
os.remove(temp_cert_path)
107+
108+
return buffer.getvalue().decode('utf-8')
109+
return ""
110+
111+
84112
def validate_data_type(self, content_type):
85113
"""Limit to used data types."""
86114
valid_content_types = {
@@ -89,7 +117,8 @@ def validate_data_type(self, content_type):
89117
'application/zip',
90118
'image/jpeg',
91119
'image/png',
92-
'text/html'
120+
'text/html',
121+
'head' # this is not MIME
93122
}
94123

95124
if content_type not in valid_content_types:
@@ -127,15 +156,19 @@ def get_leaf(self, url):
127156
logging.error("Failed to retrieve leaf certificate. Exiting.")
128157
sys.exit(1)
129158

130-
131-
def get_response(self, url, content_type):
132-
"""Handle all https requests"""
159+
def setup_before_get_response(self, url, content_type):
160+
"""validate known url and content type"""
133161
self.validate_url(url)
134162
self.validate_data_type(content_type)
135163

136164
if url.startswith("https://www.trle.net/") and not self.misconfigured_server:
137165
self.get_leaf(url)
138166

167+
168+
def get_response(self, url, content_type):
169+
"""Handle all https requests"""
170+
self.setup_before_get_response(url, content_type)
171+
139172
if content_type == 'application/zip':
140173
return DOWNLOADER.download_file(url)
141174

@@ -152,8 +185,12 @@ def get_response(self, url, content_type):
152185
headers_buffer = BytesIO()
153186
curl = pycurl.Curl() # pylint: disable=no-member
154187
curl.setopt(pycurl.URL, url)
155-
curl.setopt(pycurl.WRITEDATA, response_buffer)
188+
189+
if content_type == 'application/zip':
190+
return self.head(curl)
191+
156192
curl.setopt(pycurl.WRITEHEADER, headers_buffer)
193+
curl.setopt(pycurl.WRITEDATA, response_buffer)
157194

158195
if self.misconfigured_server:
159196
if not self.leaf_cert:
@@ -191,6 +228,11 @@ def get_response(self, url, content_type):
191228
if temp_cert_path and os.path.exists(temp_cert_path):
192229
os.remove(temp_cert_path)
193230

231+
return self.close_response(curl, headers, response_buffer, content_type)
232+
233+
234+
def close_response(self, curl, headers, response_buffer, content_type):
235+
"""Pack response and close curl"""
194236
if curl is None:
195237
logging.error("No curl instance")
196238
sys.exit(1)
@@ -244,6 +286,7 @@ def extract_content_type(self, headers):
244286
logging.error("Could not extract content type from header: %s", headers)
245287
return None
246288

289+
247290
class Downloader:
248291
"""Zip file downloader to be used in RequestHandler"""
249292
def __init__(self):
@@ -403,6 +446,7 @@ def get(url, content_type):
403446
'image/jpeg'
404447
'image/png'
405448
'text/html'
449+
'head'
406450
407451
url must start with:
408452
"https://www.trle.net/"

database/ideas.txt

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,26 @@ limit of around 2 GB.
255255

256256
This was a special walkthrough the script cant handle
257257
https://www.trle.net/sc/Levelwalk.php?lid=864
258+
259+
We should probaly just look at the link fist
260+
https://www.trle.net/walk/864.jpg
261+
https://www.trle.net/walk/666.htm
262+
263+
Security
264+
I have realized that we can validate the certificate carefully
265+
and have curl use certificate pinning
266+
267+
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
268+
curl_easy_setopt(curl, CURLOPT_PINNEDPUBLICKEY, "sha256//base64_encoded_hash_here"); // Pin the public key
269+
270+
and make sure the index database can't every change the host name,
271+
unless someone use a quantum computer or specialized illegal hardware, it could
272+
be difficult to tampered with the download of files. We must make sure we install
273+
in /usr/bin/ or in /opt/appname/bin or /usr/local/bin together with read only database
274+
we put in /usr/share /usr/local/share or /opt/appname/share as a base index or other data.
275+
276+
We cant control what happens in home, we can protect against obvious treats
277+
like another user on the computer trying to prank another user :)
278+
You're home is a dirty place and no one can help what happens there
279+
but we validate before we open any database there that is has sane
280+
permissions sanitized data in there.

database/index_view.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def print_trle_page(page):
1515
levels = page['levels']
1616

1717
# Column widths for even spacing
18-
column_widths = [20, 20, 70, 20, 15, 15, 10, 20]
18+
column_widths = [6, 20, 70, 17, 11, 16, 6, 10]
1919

2020
headers = ["ID", "Author", "Level Name", "Difficulty",
2121
"Duration", "Class", "Type", "Released"]
@@ -30,8 +30,8 @@ def print_trle_page(page):
3030
for idx, k in enumerate(row.keys()):
3131
cell = str(row[k]) # Convert each cell value to string
3232
width = column_widths[idx] # Get the correct column width
33-
truncated_text = cell[:width].ljust(width) # Truncate and pad the text
34-
cell_data.append(truncated_text)
33+
truncated_text = cell[:width-1].ljust(width-1) # Truncate and pad the text
34+
cell_data.append(truncated_text + ' ')
3535
print("".join(cell_data)) # Print the row in one line
3636

3737

database/scrape.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def get_soup(url):
272272
Returns:
273273
BeautifulSoup: A BeautifulSoup object representing the parsed HTML content.
274274
"""
275-
if validate_url(url) == None:
275+
if validate_url(url) is None:
276276
print(f"{url} had wrong domain")
277277
sys.exit(1)
278278
return BeautifulSoup(https.get(validate_url(url), 'text/html'), 'html.parser')
@@ -291,7 +291,7 @@ def get_image(url):
291291
Raises:
292292
SystemExit: If the file format is unsupported.
293293
"""
294-
if validate_url(url) == None:
294+
if validate_url(url) is None:
295295
print(f"{url} had wrong domain")
296296
sys.exit(1)
297297
ext = url_postfix(url).lower()
@@ -313,7 +313,7 @@ def get_jpg(url):
313313
Returns:
314314
bytes: The JPEG image content in bytes.
315315
"""
316-
if validate_url(url) == None:
316+
if validate_url(url) is None:
317317
print(f"{url} had wrong domain")
318318
sys.exit(1)
319319
return https.get(validate_url(url), 'image/jpeg')
@@ -329,7 +329,7 @@ def get_png(url):
329329
Returns:
330330
bytes: The PNG image content in bytes.
331331
"""
332-
if validate_url(url) == None:
332+
if validate_url(url) is None:
333333
print(f"{url} had wrong domain")
334334
sys.exit(1)
335335
return https.get(validate_url(url), 'image/png')
@@ -345,7 +345,7 @@ def get_json(url):
345345
Returns:
346346
dict: The JSON data parsed into a Python dictionary.
347347
"""
348-
if validate_url(url) == None:
348+
if validate_url(url) is None:
349349
print(f"{url} had wrong domain")
350350
sys.exit(1)
351351
return https.get(validate_url(url), 'application/json')
@@ -361,7 +361,7 @@ def get_zip(url):
361361
Returns:
362362
dict: The ZIP file content in a dictionary format, if applicable.
363363
"""
364-
if validate_url(url) == None:
364+
if validate_url(url) is None:
365365
print(f"{url} had wrong domain")
366366
sys.exit(1)
367367
return https.get(validate_url(url), 'application/zip')
@@ -714,7 +714,19 @@ def get_trle_walkthrough(level_soup):
714714

715715
# Fetches the walkthrough content from the extracted URL
716716
url = "https://www.trle.net" + iframe_src
717-
response = https.get(url, 'text/html')
717+
718+
# Check the type of "document"
719+
typ = url_postfix(url)
720+
if typ == 'jpg':
721+
# we should handle all images here but right now
722+
# we return "" this is a bit more complex
723+
# want cant just give binary image as text to
724+
# qt, we need to implement out own html "document" thu a filter
725+
response = ""
726+
# response = https.get(url, 'image/jpeg')
727+
else:
728+
response = https.get(url, 'text/html')
729+
718730
if response:
719731
return response
720732
return None

0 commit comments

Comments
 (0)