44import re
55import html
66import json
7+ from bs4 import BeautifulSoup
78from enum import Enum
89import aiohttp
910import requests
@@ -23,7 +24,7 @@ class SearchModifiers(Enum):
2324class HTMLRequests :
2425 BASE_URL = 'https://howlongtobeat.com/'
2526 REFERER_HEADER = BASE_URL
26- SEARCH_URL = BASE_URL + "api/search" + "/4b4cbe570602c88660f7df8ea0cb6b6e"
27+ SEARCH_URL = BASE_URL + "api/search"
2728 GAME_URL = BASE_URL + "game"
2829
2930 @staticmethod
@@ -94,8 +95,12 @@ def send_web_request(game_name: str, search_modifiers: SearchModifiers = SearchM
9495 """
9596 headers = HTMLRequests .get_search_request_headers ()
9697 payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page )
98+ api_key_result = HTMLRequests .send_website_request_getcode (False )
99+ if api_key_result is None :
100+ api_key_result = HTMLRequests .send_website_request_getcode (True )
97101 # Make the post request and return the result if is valid
98- resp = requests .post (HTMLRequests .SEARCH_URL , headers = headers , data = payload )
102+ search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
103+ resp = requests .post (search_url_with_key , headers = headers , data = payload )
99104 if resp .status_code == 200 :
100105 return resp .text
101106 return None
@@ -112,9 +117,13 @@ async def send_async_web_request(game_name: str, search_modifiers: SearchModifie
112117 """
113118 headers = HTMLRequests .get_search_request_headers ()
114119 payload = HTMLRequests .get_search_request_data (game_name , search_modifiers , page )
120+ api_key_result = await HTMLRequests .async_send_website_request_getcode (False )
121+ if api_key_result is None :
122+ api_key_result = await HTMLRequests .async_send_website_request_getcode (True )
115123 # Make the post request and return the result if is valid
124+ search_url_with_key = HTMLRequests .SEARCH_URL + "/" + api_key_result
116125 async with aiohttp .ClientSession () as session :
117- async with session .post (HTMLRequests . SEARCH_URL , headers = headers , data = payload ) as resp :
126+ async with session .post (search_url_with_key , headers = headers , data = payload ) as resp :
118127 if resp is not None and str (resp .status ) == "200" :
119128 return await resp .text ()
120129 return None
@@ -195,3 +204,66 @@ async def async_get_game_title(game_id: int):
195204 text = await resp .text ()
196205 return HTMLRequests .__cut_game_title (text )
197206 return None
207+
208+ @staticmethod
209+ def send_website_request_getcode (parse_all_scripts : bool ):
210+ """
211+ Function that send a request to howlongtobeat to scrape the /api/search key
212+ @return: The string key to use on /api/search
213+ """
214+ # Make the post request and return the result if is valid
215+ headers = HTMLRequests .get_title_request_headers ()
216+ resp = requests .get (HTMLRequests .BASE_URL , headers = headers )
217+ if resp .status_code == 200 and resp .text is not None :
218+ # Parse the HTML content using BeautifulSoup
219+ soup = BeautifulSoup (resp .text , 'html.parser' )
220+ # Find all <script> tags with a src attribute containing the substring
221+ scripts = soup .find_all ('script' , src = True )
222+ if parse_all_scripts :
223+ matching_scripts = [script ['src' ] for script in scripts ]
224+ else :
225+ matching_scripts = [script ['src' ] for script in scripts if '_app-' in script ['src' ]]
226+ for script_url in matching_scripts :
227+ script_url = HTMLRequests .BASE_URL + script_url
228+ script_resp = requests .get (script_url , headers = headers )
229+ if script_resp .status_code == 200 and script_resp .text is not None :
230+ pattern = r'"/api/search/".concat\("([a-zA-Z0-9]+)"\)'
231+ matches = re .findall (pattern , script_resp .text )
232+ for match in matches :
233+ return match
234+ return None
235+
236+ @staticmethod
237+ async def async_send_website_request_getcode (parse_all_scripts : bool ):
238+ """
239+ Function that send a request to howlongtobeat to scrape the /api/search key
240+ @return: The string key to use on /api/search
241+ """
242+ # Make the post request and return the result if is valid
243+ headers = HTMLRequests .get_title_request_headers ()
244+ async with aiohttp .ClientSession () as session :
245+ async with session .get (HTMLRequests .BASE_URL , headers = headers ) as resp :
246+ if resp is not None and str (resp .status ) == "200" :
247+ resp_text = await resp .text ()
248+ # Parse the HTML content using BeautifulSoup
249+ soup = BeautifulSoup (resp_text , 'html.parser' )
250+ # Find all <script> tags with a src attribute containing the substring
251+ scripts = soup .find_all ('script' , src = True )
252+ if parse_all_scripts :
253+ matching_scripts = [script ['src' ] for script in scripts ]
254+ else :
255+ matching_scripts = [script ['src' ] for script in scripts if '_app-' in script ['src' ]]
256+ for script_url in matching_scripts :
257+ script_url = HTMLRequests .BASE_URL + script_url
258+ async with aiohttp .ClientSession () as session :
259+ async with session .get (script_url , headers = headers ) as script_resp :
260+ if script_resp is not None and str (resp .status ) == "200" :
261+ script_resp_text = await script_resp .text ()
262+ pattern = r'"/api/search/".concat\("([a-zA-Z0-9]+)"\)'
263+ matches = re .findall (pattern , script_resp_text )
264+ for match in matches :
265+ return match
266+ else :
267+ return None
268+ else :
269+ return None
0 commit comments