Skip to content
This repository was archived by the owner on Jul 15, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 70 additions & 20 deletions src/searches.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,64 @@
from selenium.webdriver.common.by import By

from src.browser import Browser
from src.utils import makeRequestsSession


class Searches:
def __init__(self, browser: Browser):
self.browser = browser
self.webdriver = browser.webdriver

def getGoogleTrends(self, wordsCount: int) -> list:
searchTerms: list[str] = []
i = 0
while len(searchTerms) < wordsCount:
i += 1
r = requests.get(
f'https://trends.google.com/trends/api/dailytrends?hl={self.browser.localeLang}&ed={(date.today() - timedelta(days=i)).strftime("%Y%m%d")}&geo={self.browser.localeGeo}&ns=15'
)
trends = json.loads(r.text[6:])
for topic in trends["default"]["trendingSearchesDays"][0][
"trendingSearches"
]:
searchTerms.append(topic["title"]["query"].lower())
searchTerms.extend(
relatedTopic["query"].lower()
for relatedTopic in topic["relatedQueries"]
)
searchTerms = list(set(searchTerms))
del searchTerms[wordsCount : (len(searchTerms) + 1)]
return searchTerms
def getGoogleTrends(self, words_count: int) -> list[str]:
"""
Retrieves Google Trends search terms via the new API (last 48 hours).
"""
logging.debug("Starting Google Trends fetch (last 48 hours)...")
search_terms: list[str] = []
session = makeRequestsSession()

url = "https://trends.google.com/_/TrendsUi/data/batchexecute"
payload = f'f.req=[[[i0OFE,"[null, null, \\"{self.browser.localeGeo}\\", 0, null, 48]"]]]'
headers = {"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8"}

logging.debug(f"Sending POST request to {url}")
try:
response = session.post(url, headers=headers, data=payload)
response.raise_for_status()
logging.debug("Response received from Google Trends API")
except requests.RequestException as e:
logging.error(f"Error fetching Google Trends: {e}")
return []

trends_data = self.extract_json_from_response(response.text)
if not trends_data:
logging.error("Failed to extract JSON from Google Trends response")
return []

logging.debug("JSON successfully extracted. Processing root terms...")

# Process only the first element in each item
root_terms = []
for item in trends_data:
try:
topic = item[0]
root_terms.append(topic)
except Exception as e:
logging.warning(f"Error processing an item: {e}")
continue

logging.debug(f"Extracted {len(root_terms)} root trend entries")

# Convert to lowercase and remove duplicates
search_terms = list(set(term.lower() for term in root_terms))
logging.debug(f"Found {len(search_terms)} unique search terms")

if words_count < len(search_terms):
logging.debug(f"Limiting search terms to {words_count} items")
search_terms = search_terms[:words_count]

logging.debug("Google Trends fetch complete")
return search_terms

def getRelatedTerms(self, word: str) -> list:
try:
Expand Down Expand Up @@ -88,3 +119,22 @@ def bingSearch(self, word: str):
logging.error("[BING] " + "Timeout, retrying in 5 seconds...")
time.sleep(5)
continue

def extract_json_from_response(self, text: str):
"""
Extracts the nested JSON object from the API response.
"""
logging.debug("Extracting JSON from API response")
for line in text.splitlines():
trimmed = line.strip()
if trimmed.startswith('[') and trimmed.endswith(']'):
try:
intermediate = json.loads(trimmed)
data = json.loads(intermediate[0][2])
logging.debug("JSON extraction successful")
return data[1]
except Exception as e:
logging.warning(f"Error parsing JSON: {e}")
continue
logging.error("No valid JSON found in response")
return None
24 changes: 24 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,37 @@
from pathlib import Path

import requests
from requests import Session
from requests.adapters import HTTPAdapter
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as ec
from selenium.webdriver.support.wait import WebDriverWait
from urllib3 import Retry

from .constants import BASE_URL


def makeRequestsSession(session: Session = requests.session()) -> Session:
retry = Retry(
total=4,
backoff_factor=1,
status_forcelist=[
500,
502,
503,
504,
],
)
session.mount(
"https://", HTTPAdapter(max_retries=retry)
) # See https://stackoverflow.com/a/35504626/4164390 to finetune
session.mount(
"http://", HTTPAdapter(max_retries=retry)
) # See https://stackoverflow.com/a/35504626/4164390 to finetune
return session


class Utils:
def __init__(self, webdriver: WebDriver):
self.webdriver = webdriver
Expand Down Expand Up @@ -249,3 +272,4 @@ def saveBrowserConfig(sessionPath: Path, config: dict):
configFile = sessionPath.joinpath("config.json")
with open(configFile, "w") as f:
json.dump(config, f)