Skip to content

Commit efa037c

Browse files
committed
Fix #52
Removed the APIKey logic with the new auth token logic
1 parent e2a624f commit efa037c

File tree

1 file changed

+130
-85
lines changed

1 file changed

+130
-85
lines changed

howlongtobeatpy/howlongtobeatpy/HTMLRequests.py

Lines changed: 130 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
# ---------------------------------------------------------------------
22
# IMPORTS
33

4-
import re
54
import json
5+
import re
6+
import time
67
from enum import Enum
7-
from bs4 import BeautifulSoup
8+
89
import aiohttp
910
import requests
11+
from bs4 import BeautifulSoup
1012
from fake_useragent import UserAgent
1113

1214
# ---------------------------------------------------------------------
@@ -26,62 +28,69 @@ class SearchModifiers(Enum):
2628

2729
class SearchInformations:
2830
search_url = None
29-
api_key = None
3031

3132
def __init__(self, script_content: str):
32-
self.api_key = self.__extract_api_from_script(script_content)
3333
self.search_url = self.__extract_search_url_script(script_content)
3434
if HTMLRequests.BASE_URL.endswith("/") and self.search_url is not None:
3535
self.search_url = self.search_url.lstrip("/")
36-
37-
def __extract_api_from_script(self, script_content: str):
38-
"""
39-
Function that extract the htlb code to use in the request from the given script
40-
@return: the string of the api key found
41-
"""
42-
# Try multiple find one after the other as hltb keep changing format
43-
# Test 1 - The API Key is in the user id in the request json
44-
user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
45-
matches = re.findall(user_id_api_key_pattern, script_content)
46-
if matches:
47-
key = ''.join(matches)
48-
return key
49-
# Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
50-
concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
51-
matches = re.findall(concat_api_key_pattern, script_content)
52-
if matches:
53-
matches = str(matches).split('.concat')
54-
matches = [re.sub(r'["\(\)\[\]\']', '', match) for match in matches[1:]]
55-
key = ''.join(matches)
56-
return key
57-
# Unable to find :(
58-
return None
59-
36+
6037
def __extract_search_url_script(self, script_content: str):
6138
"""
62-
Function that extract the htlb search url to append from the script as /api/search
63-
@return: the search url to append
39+
Function that finds the 'fetch' call using 'method: "POST"',
40+
extracts the base endpoint path, and returns the full '/api/path'
41+
string (e.g., "/api/search").
42+
43+
This avoids relying on the exact string "search" by confirming
44+
the use of the POST method, which identifies the actual search endpoint.
45+
46+
@return: The full API endpoint string (e.g., "/api/search") or None.
6447
"""
48+
# Pattern explanation:
49+
# 1. Capture Group 1: Matches the path suffix (e.g., "search" or "find").
50+
# 2. Ensures the request options contain 'method: "POST"' to filter out the GET init call.
6551
pattern = re.compile(
66-
r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
67-
r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
68-
r'\s*,', # Matches up to the comma
69-
re.DOTALL
52+
# Capture Group 1: The path suffix after /api/ (e.g., "search" or "find/v2")
53+
r'fetch\s*\(\s*["\']\/api\/([a-zA-Z0-9_/]+)[^"\']*["\']\s*,\s*{.*?\s*method:\s*["\']POST["\'].*?}',
54+
re.DOTALL | re.IGNORECASE
7055
)
71-
matches = pattern.finditer(script_content)
72-
for match in matches:
73-
endpoint = match.group(1)
74-
concat_calls = match.group(2)
75-
# Extract all concatenated strings
76-
concat_strings = re.findall(r'\.concat\(\s*["\']([^"\']*)["\']\s*\)', concat_calls)
77-
concatenated_str = ''.join(concat_strings)
78-
# Check if the concatenated string matches the known string
79-
if concatenated_str == self.api_key:
80-
return endpoint
81-
# Unable to find :(
56+
57+
match = pattern.search(script_content)
58+
59+
if match:
60+
# Example captured string: "search" or "find/v2"
61+
path_suffix = match.group(1)
62+
63+
# Determine the root path (e.g., "search" from "search/v2")
64+
# This ensures we get the base endpoint name even if sub-paths are used.
65+
if '/' in path_suffix:
66+
base_path = path_suffix.split('/')[0]
67+
else:
68+
base_path = path_suffix
69+
70+
if base_path != "find":
71+
full_endpoint = f"/api/{base_path}"
72+
73+
return full_endpoint
74+
8275
return None
8376

8477

78+
class SearchAuthToken:
79+
search_url = "api/search/init"
80+
auth_token = None
81+
82+
def extract_auth_token_from_response(self, response_content: requests.Response):
83+
"""
84+
Extract the auth token from the request
85+
@return: The auth token in the response json if present, also assigned to self.auth_token
86+
"""
87+
data = response_content.json()
88+
return self.extract_auth_token_from_json(data)
89+
90+
def extract_auth_token_from_json(self, json_content):
91+
self.auth_token = json_content.get('token')
92+
return self.auth_token
93+
8594
class HTMLRequests:
8695
BASE_URL = 'https://howlongtobeat.com/'
8796
REFERER_HEADER = BASE_URL
@@ -90,7 +99,7 @@ class HTMLRequests:
9099
SEARCH_URL = BASE_URL + "api/s/"
91100

92101
@staticmethod
93-
def get_search_request_headers():
102+
def get_search_request_headers(auth_token = None):
94103
"""
95104
Generate the headers for the search request
96105
@return: The headers object for the request
@@ -102,10 +111,14 @@ def get_search_request_headers():
102111
'User-Agent': ua.random.strip(),
103112
'referer': HTMLRequests.REFERER_HEADER
104113
}
114+
115+
if auth_token is not None:
116+
headers['x-auth-token'] = str(auth_token)
117+
105118
return headers
106119

107120
@staticmethod
108-
def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int, search_info: SearchInformations):
121+
def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, page: int):
109122
"""
110123
Generate the data payload for the search request
111124
@param game_name: The name of the game to search
@@ -154,10 +167,6 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
154167
'useCache': True
155168
}
156169

157-
# If api_key is passed add it to the dict
158-
if search_info is not None and search_info.api_key is not None:
159-
payload['searchOptions']['users']['id'] = search_info.api_key
160-
161170
return json.dumps(payload)
162171

163172
@staticmethod
@@ -170,23 +179,16 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
170179
@param page: The page to explore of the research, unknown if this is actually used
171180
@return: The HTML code of the research if the request returned 200(OK), None otherwise
172181
"""
173-
headers = HTMLRequests.get_search_request_headers()
182+
auth_token = HTMLRequests.send_website_get_auth_token()
183+
headers = HTMLRequests.get_search_request_headers(auth_token)
174184
search_info_data = HTMLRequests.send_website_request_getcode(False)
175-
if search_info_data is None or search_info_data.api_key is None:
185+
if search_info_data is None or search_info_data.search_url is None:
176186
search_info_data = HTMLRequests.send_website_request_getcode(True)
177187
# Make the request
178188
if search_info_data.search_url is not None:
179189
HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
180-
# The main method currently is the call to the API search URL
181-
search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
182-
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
183-
resp = requests.post(search_url_with_key, headers=headers, data=payload, timeout=60)
184-
if resp.status_code == 200:
185-
return resp.text
186-
# Try to call with the standard url adding the api key to the user
187-
search_url = HTMLRequests.SEARCH_URL
188-
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
189-
resp = requests.post(search_url, headers=headers, data=payload, timeout=60)
190+
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page)
191+
resp = requests.post(HTMLRequests.SEARCH_URL, headers=headers, data=payload, timeout=60)
190192
if resp.status_code == 200:
191193
return resp.text
192194
return None
@@ -201,27 +203,22 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
201203
@param page: The page to explore of the research, unknown if this is actually used
202204
@return: The HTML code of the research if the request returned 200(OK), None otherwise
203205
"""
204-
headers = HTMLRequests.get_search_request_headers()
206+
auth_token = await HTMLRequests.async_send_website_get_auth_token()
207+
headers = HTMLRequests.get_search_request_headers(auth_token)
205208
search_info_data = HTMLRequests.send_website_request_getcode(False)
206-
if search_info_data is None or search_info_data.api_key is None:
209+
if search_info_data is None or search_info_data.search_url is None:
207210
search_info_data = HTMLRequests.send_website_request_getcode(True)
208211
# Make the request
209212
if search_info_data.search_url is not None:
210213
HTMLRequests.SEARCH_URL = HTMLRequests.BASE_URL + search_info_data.search_url
211-
# The main method currently is the call to the API search URL
212-
search_url_with_key = HTMLRequests.SEARCH_URL + search_info_data.api_key
213-
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, None)
214+
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page)
215+
timeout = aiohttp.ClientTimeout(total=60)
214216
async with aiohttp.ClientSession() as session:
215-
async with session.post(search_url_with_key, headers=headers, data=payload) as resp_with_key:
217+
async with session.post(HTMLRequests.SEARCH_URL, headers=headers, data=payload, timeout=timeout) as resp_with_key:
216218
if resp_with_key is not None and resp_with_key.status == 200:
217219
return await resp_with_key.text()
218220
else:
219-
search_url = HTMLRequests.SEARCH_URL
220-
payload = HTMLRequests.get_search_request_data(game_name, search_modifiers, page, search_info_data)
221-
async with session.post(search_url, headers=headers, data=payload) as resp_user_id:
222-
if resp_user_id is not None and resp_user_id.status == 200:
223-
return await resp_user_id.text()
224-
return None
221+
return None
225222

226223
@staticmethod
227224
def __cut_game_title(page_source: str):
@@ -296,8 +293,9 @@ async def async_get_game_title(game_id: int):
296293
headers = HTMLRequests.get_title_request_headers()
297294

298295
# Request and extract title
296+
timeout = aiohttp.ClientTimeout(total=60)
299297
async with aiohttp.ClientSession() as session:
300-
async with session.post(HTMLRequests.GAME_URL, params=params, headers=headers) as resp:
298+
async with session.post(HTMLRequests.GAME_URL, params=params, headers=headers, timeout=timeout) as resp:
301299
if resp is not None and resp.status == 200:
302300
text = await resp.text()
303301
return HTMLRequests.__cut_game_title(text)
@@ -306,8 +304,8 @@ async def async_get_game_title(game_id: int):
306304
@staticmethod
307305
def send_website_request_getcode(parse_all_scripts: bool):
308306
"""
309-
Function that send a request to howlongtobeat to scrape the API key
310-
@return: The string key to use
307+
Function that send a request to howlongtobeat to scrape the correct search url
308+
@return: The search informations to use in the request
311309
"""
312310
# Make the post request and return the result if is valid
313311
headers = HTMLRequests.get_title_request_headers()
@@ -326,21 +324,21 @@ def send_website_request_getcode(parse_all_scripts: bool):
326324
script_resp = requests.get(script_url, headers=headers, timeout=60)
327325
if script_resp.status_code == 200 and script_resp.text is not None:
328326
search_info = SearchInformations(script_resp.text)
329-
if search_info.api_key is not None:
330-
# The api key is necessary
327+
if search_info.search_url is not None:
331328
return search_info
332329
return None
333330

334331
@staticmethod
335332
async def async_send_website_request_getcode(parse_all_scripts: bool):
336333
"""
337-
Function that send a request to howlongtobeat to scrape the key used in the search URL
338-
@return: The string key to use
334+
Function that send a request to howlongtobeat to scrape the correct search url
335+
@return: The search informations to use in the request
339336
"""
340337
# Make the post request and return the result if is valid
341338
headers = HTMLRequests.get_title_request_headers()
339+
timeout = aiohttp.ClientTimeout(total=60)
342340
async with aiohttp.ClientSession() as session:
343-
async with session.get(HTMLRequests.BASE_URL, headers=headers) as resp:
341+
async with session.get(HTMLRequests.BASE_URL, headers=headers, timeout=timeout) as resp:
344342
if resp is not None and resp.status == 200:
345343
resp_text = await resp.text()
346344
# Parse the HTML content using BeautifulSoup
@@ -354,14 +352,61 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
354352
for script_url in matching_scripts:
355353
script_url = HTMLRequests.BASE_URL + script_url
356354
async with aiohttp.ClientSession() as session:
357-
async with session.get(script_url, headers=headers) as script_resp:
355+
async with session.get(script_url, headers=headers, timeout=timeout) as script_resp:
358356
if script_resp is not None and resp.status == 200:
359357
script_resp_text = await script_resp.text()
360358
search_info = SearchInformations(script_resp_text)
361-
if search_info.api_key is not None:
359+
if search_info.search_url is not None:
362360
# The api key is necessary
363361
return search_info
364362
else:
365363
return None
366364
else:
367365
return None
366+
367+
@staticmethod
368+
def get_auth_token_request_params():
369+
"""
370+
Generate the params for the auth token request
371+
@return: The params object for the request
372+
"""
373+
timestamp = int(time.time() * 1000)
374+
params = {
375+
't': timestamp
376+
}
377+
return params
378+
379+
@staticmethod
380+
def send_website_get_auth_token():
381+
"""
382+
Function that send a request to howlongtobeat to get the x-auth-token to get in the request
383+
@return: The auth token to use
384+
"""
385+
# Make the post request and return the result if is valid
386+
headers = HTMLRequests.get_title_request_headers()
387+
params = HTMLRequests.get_auth_token_request_params()
388+
auth_token = SearchAuthToken()
389+
auth_token_url = HTMLRequests.BASE_URL + auth_token.search_url
390+
resp = requests.get(auth_token_url, params=params, headers=headers, timeout=60)
391+
if resp.status_code == 200 and resp.text is not None:
392+
return auth_token.extract_auth_token_from_response(resp)
393+
return None
394+
395+
@staticmethod
396+
async def async_send_website_get_auth_token():
397+
"""
398+
Function that send a request to howlongtobeat to get the x-auth-token to get in the request
399+
@return: The auth token to use
400+
"""
401+
# Make the post request and return the result if is valid
402+
headers = HTMLRequests.get_title_request_headers()
403+
params = HTMLRequests.get_auth_token_request_params()
404+
auth_token = SearchAuthToken()
405+
auth_token_url = HTMLRequests.BASE_URL + auth_token.search_url
406+
timeout = aiohttp.ClientTimeout(total=60)
407+
async with aiohttp.ClientSession() as session:
408+
async with session.get(auth_token_url, params=params, headers=headers, timeout=timeout) as resp:
409+
if resp is not None and resp.status == 200:
410+
json_data = await resp.json()
411+
return auth_token.extract_auth_token_from_json(json_data)
412+
return None

0 commit comments

Comments
 (0)