11# ---------------------------------------------------------------------
22# IMPORTS
33
4- import re
54import json
5+ import re
6+ import time
67from enum import Enum
7- from bs4 import BeautifulSoup
8+
89import aiohttp
910import requests
11+ from bs4 import BeautifulSoup
1012from fake_useragent import UserAgent
1113
1214# ---------------------------------------------------------------------
@@ -26,62 +28,69 @@ class SearchModifiers(Enum):
2628
2729class SearchInformations :
2830 search_url = None
29- api_key = None
3031
3132 def __init__ (self , script_content : str ):
32- self .api_key = self .__extract_api_from_script (script_content )
3333 self .search_url = self .__extract_search_url_script (script_content )
3434 if HTMLRequests .BASE_URL .endswith ("/" ) and self .search_url is not None :
3535 self .search_url = self .search_url .lstrip ("/" )
36-
37- def __extract_api_from_script (self , script_content : str ):
38- """
39- Function that extract the htlb code to use in the request from the given script
40- @return: the string of the api key found
41- """
42- # Try multiple find one after the other as hltb keep changing format
43- # Test 1 - The API Key is in the user id in the request json
44- user_id_api_key_pattern = r'users\s*:\s*{\s*id\s*:\s*"([^"]+)"'
45- matches = re .findall (user_id_api_key_pattern , script_content )
46- if matches :
47- key = '' .join (matches )
48- return key
49- # Test 2 - The API Key is in format fetch("/api/[word here]/".concat("X").concat("Y")...
50- concat_api_key_pattern = r'\/api\/\w+\/"(?:\.concat\("[^"]*"\))*'
51- matches = re .findall (concat_api_key_pattern , script_content )
52- if matches :
53- matches = str (matches ).split ('.concat' )
54- matches = [re .sub (r'["\(\)\[\]\']' , '' , match ) for match in matches [1 :]]
55- key = '' .join (matches )
56- return key
57- # Unable to find :(
58- return None
59-
36+
6037 def __extract_search_url_script (self , script_content : str ):
6138 """
62- Function that extract the htlb search url to append from the script as /api/search
63- @return: the search url to append
39+ Function that finds the 'fetch' call using 'method: "POST"',
40+ extracts the base endpoint path, and returns the full '/api/path'
41+ string (e.g., "/api/search").
42+
43+ This avoids relying on the exact string "search" by confirming
44+ the use of the POST method, which identifies the actual search endpoint.
45+
46+ @return: The full API endpoint string (e.g., "/api/search") or None.
6447 """
48+ # Pattern explanation:
49+ # 1. Capture Group 1: Matches the path suffix (e.g., "search" or "find").
50+ # 2. Ensures the request options contain 'method: "POST"' to filter out the GET init call.
6551 pattern = re .compile (
66- r'fetch\(\s*["\'](\/api\/[^"\']*)["\']' # Matches the endpoint
67- r'((?:\s*\.concat\(\s*["\']([^"\']*)["\']\s*\))+)' # Captures concatenated strings
68- r'\s*,' , # Matches up to the comma
69- re .DOTALL
52+ # Capture Group 1: The path suffix after /api/ (e.g., "search" or "find/v2")
53+ r'fetch\s*\(\s*["\']\/api\/([a-zA-Z0-9_/]+)[^"\']*["\']\s*,\s*{.*?\s*method:\s*["\']POST["\'].*?}' ,
54+ re .DOTALL | re .IGNORECASE
7055 )
71- matches = pattern .finditer (script_content )
72- for match in matches :
73- endpoint = match .group (1 )
74- concat_calls = match .group (2 )
75- # Extract all concatenated strings
76- concat_strings = re .findall (r'\.concat\(\s*["\']([^"\']*)["\']\s*\)' , concat_calls )
77- concatenated_str = '' .join (concat_strings )
78- # Check if the concatenated string matches the known string
79- if concatenated_str == self .api_key :
80- return endpoint
81- # Unable to find :(
56+
57+ match = pattern .search (script_content )
58+
59+ if match :
60+ # Example captured string: "search" or "find/v2"
61+ path_suffix = match .group (1 )
62+
63+ # Determine the root path (e.g., "search" from "search/v2")
64+ # This ensures we get the base endpoint name even if sub-paths are used.
65+ if '/' in path_suffix :
66+ base_path = path_suffix .split ('/' )[0 ]
67+ else :
68+ base_path = path_suffix
69+
70+ if base_path != "find" :
71+ full_endpoint = f"/api/{ base_path } "
72+
73+ return full_endpoint
74+
8275 return None
8376
8477
78+ class SearchAuthToken :
79+ search_url = "api/search/init"
80+ auth_token = None
81+
82+ def extract_auth_token_from_response (self , response_content : requests .Response ):
83+ """
84+ Extract the auth token from the request
85+ @return: The auth token in the response json if present, also assigned to self.auth_token
86+ """
87+ data = response_content .json ()
88+ return self .extract_auth_token_from_json (data )
89+
90+ def extract_auth_token_from_json (self , json_content ):
91+ self .auth_token = json_content .get ('token' )
92+ return self .auth_token
93+
8594class HTMLRequests :
8695 BASE_URL = 'https://howlongtobeat.com/'
8796 REFERER_HEADER = BASE_URL
@@ -90,7 +99,7 @@ class HTMLRequests:
9099 SEARCH_URL = BASE_URL + "api/s/"
91100
92101 @staticmethod
93- def get_search_request_headers ():
102+ def get_search_request_headers (auth_token = None ):
94103 """
95104 Generate the headers for the search request
96105 @return: The headers object for the request
@@ -102,10 +111,14 @@ def get_search_request_headers():
102111 'User-Agent' : ua .random .strip (),
103112 'referer' : HTMLRequests .REFERER_HEADER
104113 }
114+
115+ if auth_token is not None :
116+ headers ['x-auth-token' ] = str (auth_token )
117+
105118 return headers
106119
107120 @staticmethod
108- def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int , search_info : SearchInformations ):
121+ def get_search_request_data (game_name : str , search_modifiers : SearchModifiers , page : int ):
109122 """
110123 Generate the data payload for the search request
111124 @param game_name: The name of the game to search
@@ -154,10 +167,6 @@ def get_search_request_data(game_name: str, search_modifiers: SearchModifiers, p
154167 'useCache' : True
155168 }
156169
157- # If api_key is passed add it to the dict
158- if search_info is not None and search_info .api_key is not None :
159- payload ['searchOptions' ]['users' ]['id' ] = search_info .api_key
160-
161170 return json .dumps (payload )
162171
163172 @staticmethod
@@ -170,23 +179,16 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
170179 @param page: The page to explore of the research, unknown if this is actually used
171180 @return: The HTML code of the research if the request returned 200(OK), None otherwise
172181 """
173- headers = HTMLRequests .get_search_request_headers ()
182+ auth_token = HTMLRequests .send_website_get_auth_token ()
183+ headers = HTMLRequests .get_search_request_headers (auth_token )
174184 search_info_data = HTMLRequests .send_website_request_getcode (False )
175- if search_info_data is None or search_info_data .api_key is None :
185+ if search_info_data is None or search_info_data .search_url is None :
176186 search_info_data = HTMLRequests .send_website_request_getcode (True )
177187 # Make the request
178188 if search_info_data .search_url is not None :
179189 HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
180- # The main method currently is the call to the API search URL
181- search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data .api_key
182- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
183- resp = requests .post (search_url_with_key , headers = headers , data = payload , timeout = 60 )
184- if resp .status_code == 200 :
185- return resp .text
186- # Try to call with the standard url adding the api key to the user
187- search_url = HTMLRequests .SEARCH_URL
188- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
189- resp = requests .post (search_url , headers = headers , data = payload , timeout = 60 )
190+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page )
191+ resp = requests .post (HTMLRequests .SEARCH_URL , headers = headers , data = payload , timeout = 60 )
190192 if resp .status_code == 200 :
191193 return resp .text
192194 return None
@@ -201,27 +203,22 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
201203 @param page: The page to explore of the research, unknown if this is actually used
202204 @return: The HTML code of the research if the request returned 200(OK), None otherwise
203205 """
204- headers = HTMLRequests .get_search_request_headers ()
206+ auth_token = await HTMLRequests .async_send_website_get_auth_token ()
207+ headers = HTMLRequests .get_search_request_headers (auth_token )
205208 search_info_data = HTMLRequests .send_website_request_getcode (False )
206- if search_info_data is None or search_info_data .api_key is None :
209+ if search_info_data is None or search_info_data .search_url is None :
207210 search_info_data = HTMLRequests .send_website_request_getcode (True )
208211 # Make the request
209212 if search_info_data .search_url is not None :
210213 HTMLRequests .SEARCH_URL = HTMLRequests .BASE_URL + search_info_data .search_url
211- # The main method currently is the call to the API search URL
212- search_url_with_key = HTMLRequests .SEARCH_URL + search_info_data .api_key
213- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , None )
214+ payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page )
215+ timeout = aiohttp .ClientTimeout (total = 60 )
214216 async with aiohttp .ClientSession () as session :
215- async with session .post (search_url_with_key , headers = headers , data = payload ) as resp_with_key :
217+ async with session .post (HTMLRequests . SEARCH_URL , headers = headers , data = payload , timeout = timeout ) as resp_with_key :
216218 if resp_with_key is not None and resp_with_key .status == 200 :
217219 return await resp_with_key .text ()
218220 else :
219- search_url = HTMLRequests .SEARCH_URL
220- payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page , search_info_data )
221- async with session .post (search_url , headers = headers , data = payload ) as resp_user_id :
222- if resp_user_id is not None and resp_user_id .status == 200 :
223- return await resp_user_id .text ()
224- return None
221+ return None
225222
226223 @staticmethod
227224 def __cut_game_title (page_source : str ):
@@ -296,8 +293,9 @@ async def async_get_game_title(game_id: int):
296293 headers = HTMLRequests .get_title_request_headers ()
297294
298295 # Request and extract title
296+ timeout = aiohttp .ClientTimeout (total = 60 )
299297 async with aiohttp .ClientSession () as session :
300- async with session .post (HTMLRequests .GAME_URL , params = params , headers = headers ) as resp :
298+ async with session .post (HTMLRequests .GAME_URL , params = params , headers = headers , timeout = timeout ) as resp :
301299 if resp is not None and resp .status == 200 :
302300 text = await resp .text ()
303301 return HTMLRequests .__cut_game_title (text )
@@ -306,8 +304,8 @@ async def async_get_game_title(game_id: int):
306304 @staticmethod
307305 def send_website_request_getcode (parse_all_scripts : bool ):
308306 """
309- Function that send a request to howlongtobeat to scrape the API key
310- @return: The string key to use
307+ Function that send a request to howlongtobeat to scrape the correct search url
308+ @return: The search informations to use in the request
311309 """
312310 # Make the post request and return the result if is valid
313311 headers = HTMLRequests .get_title_request_headers ()
@@ -326,21 +324,21 @@ def send_website_request_getcode(parse_all_scripts: bool):
326324 script_resp = requests .get (script_url , headers = headers , timeout = 60 )
327325 if script_resp .status_code == 200 and script_resp .text is not None :
328326 search_info = SearchInformations (script_resp .text )
329- if search_info .api_key is not None :
330- # The api key is necessary
327+ if search_info .search_url is not None :
331328 return search_info
332329 return None
333330
334331 @staticmethod
335332 async def async_send_website_request_getcode (parse_all_scripts : bool ):
336333 """
337- Function that send a request to howlongtobeat to scrape the key used in the search URL
338- @return: The string key to use
334+ Function that send a request to howlongtobeat to scrape the correct search url
335+ @return: The search informations to use in the request
339336 """
340337 # Make the post request and return the result if is valid
341338 headers = HTMLRequests .get_title_request_headers ()
339+ timeout = aiohttp .ClientTimeout (total = 60 )
342340 async with aiohttp .ClientSession () as session :
343- async with session .get (HTMLRequests .BASE_URL , headers = headers ) as resp :
341+ async with session .get (HTMLRequests .BASE_URL , headers = headers , timeout = timeout ) as resp :
344342 if resp is not None and resp .status == 200 :
345343 resp_text = await resp .text ()
346344 # Parse the HTML content using BeautifulSoup
@@ -354,14 +352,61 @@ async def async_send_website_request_getcode(parse_all_scripts: bool):
354352 for script_url in matching_scripts :
355353 script_url = HTMLRequests .BASE_URL + script_url
356354 async with aiohttp .ClientSession () as session :
357- async with session .get (script_url , headers = headers ) as script_resp :
355+ async with session .get (script_url , headers = headers , timeout = timeout ) as script_resp :
358356 if script_resp is not None and resp .status == 200 :
359357 script_resp_text = await script_resp .text ()
360358 search_info = SearchInformations (script_resp_text )
361- if search_info .api_key is not None :
359+ if search_info .search_url is not None :
362360 # The api key is necessary
363361 return search_info
364362 else :
365363 return None
366364 else :
367365 return None
366+
367+ @staticmethod
368+ def get_auth_token_request_params ():
369+ """
370+ Generate the params for the auth token request
371+ @return: The params object for the request
372+ """
373+ timestamp = int (time .time () * 1000 )
374+ params = {
375+ 't' : timestamp
376+ }
377+ return params
378+
379+ @staticmethod
380+ def send_website_get_auth_token ():
381+ """
382+ Function that send a request to howlongtobeat to get the x-auth-token to get in the request
383+ @return: The auth token to use
384+ """
385+ # Make the post request and return the result if is valid
386+ headers = HTMLRequests .get_title_request_headers ()
387+ params = HTMLRequests .get_auth_token_request_params ()
388+ auth_token = SearchAuthToken ()
389+ auth_token_url = HTMLRequests .BASE_URL + auth_token .search_url
390+ resp = requests .get (auth_token_url , params = params , headers = headers , timeout = 60 )
391+ if resp .status_code == 200 and resp .text is not None :
392+ return auth_token .extract_auth_token_from_response (resp )
393+ return None
394+
395+ @staticmethod
396+ async def async_send_website_get_auth_token ():
397+ """
398+ Function that send a request to howlongtobeat to get the x-auth-token to get in the request
399+ @return: The auth token to use
400+ """
401+ # Make the post request and return the result if is valid
402+ headers = HTMLRequests .get_title_request_headers ()
403+ params = HTMLRequests .get_auth_token_request_params ()
404+ auth_token = SearchAuthToken ()
405+ auth_token_url = HTMLRequests .BASE_URL + auth_token .search_url
406+ timeout = aiohttp .ClientTimeout (total = 60 )
407+ async with aiohttp .ClientSession () as session :
408+ async with session .get (auth_token_url , params = params , headers = headers , timeout = timeout ) as resp :
409+ if resp is not None and resp .status == 200 :
410+ json_data = await resp .json ()
411+ return auth_token .extract_auth_token_from_json (json_data )
412+ return None
0 commit comments