-
-
Notifications
You must be signed in to change notification settings - Fork 8.6k
Closed
Labels
C-pyPython BindingsPython BindingsD-chromeG-chromedriverRequires fixes in ChromeDriverRequires fixes in ChromeDriverOS-linux
Description
Description
Hi, I created a script that parses a site, it works correctly, but after 1-2 days of work I get an error
ERROR:root:Error in vacancy checker: Message: session not created
from unknown error: cannot create default profile directory; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
I can't figure out what's wrong, because everything works for a few days, and then it just dies. The problem disappears for a while after restarting the server, but not for long
Reproducible Code
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import os
import time
import logging
import platform
import tempfile
import uuid
import re
import shutil
import atexit
import psutil
import subprocess
import gc
class ParserConfig:
def __init__(self, url: str = "https://freelancehunt.com/my"):
self.url = url
self.LOGIN = os.getenv("LOGIN")
self.PASSWORD = os.getenv("PASSWORD")
self.SECRET_KEY = os.getenv("SECRET_KEY")
self.driver = None
self.process_pid = None
atexit.register(self.cleanup)
unique_user_data_dir = os.path.join(tempfile.gettempdir(), f"chrome_user_data_{uuid.uuid4().hex}")
os.makedirs(unique_user_data_dir, exist_ok=True)
self.user_data_dir = unique_user_data_dir
chrome_options = Options()
# Спеціальні налаштування для ARM архітектури
if platform.machine().lower() in ['aarch64', 'arm64', 'armv7l']:
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-plugins")
chrome_options.add_argument("--no-first-run")
chrome_options.add_argument("--disable-default-apps")
chrome_options.add_argument("--disable-background-timer-throttling")
chrome_options.add_argument("--disable-backgrounding-occluded-windows")
chrome_options.add_argument("--disable-renderer-backgrounding")
chrome_options.add_argument("--memory-pressure-off")
chrome_options.add_argument("--max_old_space_size=2048")
# Headless налаштування
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1280,720")
chrome_options.add_argument("--disable-gpu")
# Аргументи для обходу захисту
chrome_options.add_argument("--disable-web-security")
chrome_options.add_argument("--disable-features=VizDisplayCompositor")
chrome_options.add_argument("--disable-site-isolation-trials")
chrome_options.add_argument("--disable-backgrounding-occluded-windows")
chrome_options.add_argument("--disable-renderer-backgrounding")
# Маскування браузера
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation", "enable-logging"])
chrome_options.add_experimental_option('useAutomationExtension', False)
# SSL та мережа
chrome_options.add_argument('--ignore-certificate-errors')
chrome_options.add_argument('--allow-running-insecure-content')
chrome_options.add_argument('--disable-extensions')
# User-Agent
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
chrome_options.add_argument(f"--user-data-dir={unique_user_data_dir}")
try:
if platform.system() == "Linux":
# Linux settings
if platform.machine().lower() in ['aarch64', 'arm64', 'armv7l']:
print("Detected ARM architecture, using Chromium...")
chromium_paths = [
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/snap/bin/chromium"
]
chromium_path = None
for path in chromium_paths:
if os.path.exists(path):
chromium_path = path
print(f"Found Chromium at: {path}")
break
if chromium_path:
chrome_options.binary_location = chromium_path
chromedriver_paths = [
"/usr/bin/chromedriver",
"/usr/local/bin/chromedriver",
"/snap/bin/chromedriver",
"/usr/bin/chromium-chromedriver"
]
chromedriver_path = None
for path in chromedriver_paths:
if os.path.exists(path):
chromedriver_path = path
print(f"Found ChromeDriver at: {path}")
break
if chromedriver_path:
self.driver = webdriver.Chrome(
service=Service(chromedriver_path),
options=chrome_options
)
else:
self.driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)
else:
raise Exception("Chromium not found. Please install: sudo apt install chromium-browser")
else:
possible_paths = [
"/usr/bin/chromedriver",
"/usr/local/bin/chromedriver",
"/snap/bin/chromedriver"
]
chromedriver_path = None
for path in possible_paths:
if os.path.exists(path):
chromedriver_path = path
break
if chromedriver_path:
self.driver = webdriver.Chrome(
service=Service(chromedriver_path),
options=chrome_options
)
else:
self.driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)
else:
self.driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)
self.driver.set_page_load_timeout(10)
self.driver.implicitly_wait(10)
self.driver.get(self.url)
self.wait_for_page_load()
except Exception as e:
print(f"Error initializing driver: {e}")
self.cleanup()
raise
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.cleanup()
# ################### очищення папок та процесів chrome ###################
def cleanup(self):
try:
if hasattr(self, "driver") and self.driver:
try:
if hasattr(self.driver, 'service') and hasattr(self.driver.service, 'process'):
self.process_pid = self.driver.service.process.pid
try:
for handle in self.driver.window_handles:
self.driver.switch_to.window(handle)
self.driver.close()
except Exception as e:
print(f"Error closing windows: {e}")
except Exception as e:
print(f"Error closing windows: {e}")
try:
self.driver.quit()
time.sleep(1)
except Exception as e:
print(f"Error quitting driver: {e}")
self.driver = None
time.sleep(0.5)
self._force_kill_chrome_processes()
time.sleep(1)
if hasattr(self, "user_data_dir") and self.user_data_dir and os.path.exists(self.user_data_dir):
try:
shutil.rmtree(self.user_data_dir, ignore_errors=True)
if os.path.exists(self.user_data_dir):
time.sleep(2)
try:
result = subprocess.run(
['rm', '-rf', self.user_data_dir],
check=False,
timeout=10,
capture_output=True
)
except subprocess.TimeoutExpired:
print("Timeout removing user data dir")
if os.path.exists(self.user_data_dir):
shutil.rmtree(self.user_data_dir, ignore_errors=True)
except Exception as e:
print(f"Error removing user data dir: {e}")
gc.collect()
except Exception as e:
print(f"Error during cleanup: {e}")
# ################### вбивство процесів chrome ###################
def _force_kill_chrome_processes(self):
try:
killed_processes = []
for proc in psutil.process_iter(['pid', 'name', 'cmdline']):
try:
if (proc.info['name'] and
any(name in proc.info['name'].lower() for name in ['chrome', 'chromium'])):
should_kill = False
if (hasattr(self, 'user_data_dir') and self.user_data_dir and
proc.info['cmdline']):
for cmd in proc.info['cmdline']:
if self.user_data_dir in str(cmd):
should_kill = True
break
if self.process_pid and proc.info['pid'] == self.process_pid:
should_kill = True
if should_kill:
proc.terminate()
killed_processes.append(proc.info['pid'])
try:
proc.wait(timeout=3)
except psutil.TimeoutExpired:
proc.kill()
try:
proc.wait(timeout=2)
except psutil.TimeoutExpired:
print(f"Process {proc.info['pid']} may still be running")
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
if hasattr(self, 'user_data_dir') and self.user_data_dir:
try:
subprocess.run([
'pkill', '-f', f'chromium.*{os.path.basename(self.user_data_dir)}'
], check=False, timeout=5)
subprocess.run([
'pkill', '-f', f'chrome.*{os.path.basename(self.user_data_dir)}'
], check=False, timeout=5)
except subprocess.TimeoutExpired:
print("Timeout executing system kill commands")
except Exception as e:
print(f"Error executing system kill commands: {e}")
if killed_processes:
print(f"Killed Chrome processes: {killed_processes}")
time.sleep(1)
except Exception as e:
print(f"Error force killing chrome processes: {e}")
# ################### очищення опису від зайвого тексту ###################
def clear_description(self, description: str) -> str:
try:
cleaner = re.compile(
r'\s*(?:Проєкт|Проект)\s+перекладено\s+автоматично\.\s*Показати\s+оригінал\s*',
flags=re.IGNORECASE
)
return re.sub(cleaner, '', description).strip()
except Exception as e:
return description
# ################### очікування завантаження сторінки ###################
def wait_for_page_load(self, timeout=10):
try:
WebDriverWait(self.driver, timeout).until(
lambda driver: driver.execute_script("return document.readyState") == "complete"
)
print("Page loaded successfully")
except Exception as e:
print(f"Page load timeout: {e}")
# ################### закриття вікна ###################
def close_driver(self):
self.cleanup()
# ################### очікування елемента ###################
def wait_for_element(self, by: By, value: str, timeout: int = 5):
try:
element = WebDriverWait(self.driver, timeout).until(
EC.visibility_of_element_located((by, value))
)
return element
except Exception as e:
print(f"Element not found: {by} = {value}, error: {e}")
# ################### перевірка завантаженян сторінки ###################
def check_page_loaded(self):
try:
indicators = [
"//input[@name='login']",
"//input[@type='email']",
"//form[contains(@action, 'login')]",
"//a[contains(@href, 'login')]",
"//div[contains(@class, 'login')]",
"//input[contains(@placeholder, 'логін') or contains(@placeholder, 'email')]"
]
for indicator in indicators:
try:
element = self.driver.find_element(By.XPATH, indicator)
if element:
return True
except:
continue
return False
except Exception as e:
print(f"Error checking page load: {e}")
return False
# ################### вхід в акк ###################
def login(self):
try:
self.wait_for_page_load()
if not self.check_page_loaded():
self.driver.get("https://freelancehunt.com/login")
self.wait_for_page_load()
self.driver.find_element(By.NAME, "login").send_keys(self.LOGIN)
self.driver.find_element(By.NAME, "password").send_keys(self.PASSWORD + Keys.ENTER)
time.sleep(2)
try:
self.driver.find_element(By.NAME, "confirmation_code").send_keys(self.SECRET_KEY + Keys.ENTER)
except Exception as e:
print(f"Error during login: {e}")
time.sleep(2)
return True
except Exception as e:
print(f"Error during login: {e}")
return False
# ################### пошук вакансій ###################
def parsing_vacancies(self):
try:
if not self.login():
return []
WebDriverWait(self.driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "table.table-borderless"))
)
first_vacancies = self.driver.find_elements(By.CSS_SELECTOR, "table.table-borderless a.visitable[href*='/project/']")
first_vacancies_data = []
for element in first_vacancies:
first_vacancies_data.append({
"url": element.get_attribute("href"),
"title": element.text
})
self.driver.find_element(By.LINK_TEXT, "Знайти проєкт").click()
time.sleep(2)
find_vacancies = self.driver.find_elements(By.CSS_SELECTOR, "table.project-list a.visitable")
find_vacancies_data = []
for element in find_vacancies:
find_vacancies_data.append({
"url": element.get_attribute("href"),
"title": element.text
})
all_vacancies = first_vacancies_data + find_vacancies_data
unique_urls = set()
vacancies = []
for vacancy in all_vacancies:
if vacancy["url"] not in unique_urls:
unique_urls.add(vacancy["url"])
vacancies.append(vacancy)
result = []
for vacancy in vacancies:
title = vacancy["title"]
url = vacancy["url"]
if title and url:
result.append({
"title": title,
"url": url
})
unique_urls.clear()
return result
except Exception as e:
logging.error(f"Error during parsing: {e}")
return []
# ################### отримання деталей вакансії ###################
def get_vacancy_details(self, url):
try:
self.login()
self.driver.get(url)
self.wait_for_page_load()
title = self.driver.title.split("•")[0].strip()
description = self.clear_description(
self.driver.find_element(By.ID, "project-description").text.strip()
)
customer_country = self.driver.find_element(By.CLASS_NAME, "flag").get_attribute("alt").strip()
try:
price = self.driver.find_element(By.CLASS_NAME, "price").text.strip()
except:
price = "<b>❌ Бюджет не вказано ❌</b>"
return {
"url": url,
"title": title,
"description": description,
"price": price,
"country": customer_country
}
except Exception as e:
print(f"Error getting vacancy details: {e}")
return None
# ################### зробити ставку ###################
def take_rate(self, vacancy_url: str, text: str, price: str, deliver: str):
try:
self.login()
self.driver.get(vacancy_url)
self.wait_for_page_load()
time.sleep(5)
self.driver.find_element(By.ID, "add-bid").click()
time.sleep(2)
self.driver.find_element(By.NAME, "comment").send_keys(text)
self.driver.find_element(By.NAME, "days_to_deliver").send_keys(deliver)
self.driver.find_element(By.NAME, "amount").send_keys(price + Keys.ENTER)
time.sleep(2)
return f"<b><a href='{vacancy_url}'>✅ Ставка подана успішно ✅</a></b>"
except Exception as e:
print(f"Error taking rate: {e}")
return f"<b>❌ Помилка при подачі ставки ❌</b>"
Debugging Logs
Detected ARM architecture, using Chromium...
Found Chromium at: /usr/bin/chromium-browser
Found ChromeDriver at: /usr/bin/chromedriver
Error initializing driver: Message: session not created
from unknown error: cannot create default profile directory; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
#0 0xaaaac45e06c8 <unknown>
#1 0xaaaac40cfc6c <unknown>
#2 0xaaaac4107394 <unknown>
#3 0xaaaac41024e8 <unknown>
#4 0xaaaac41000bc <unknown>
#5 0xaaaac4145368 <unknown>
#6 0xaaaac4144d40 <unknown>
#7 0xaaaac410b9d4 <unknown>
#8 0xaaaac45a963c <unknown>
#9 0xaaaac45acd4c <unknown>
#10 0xaaaac459337c <unknown>
#11 0xaaaac45ad39c <unknown>
#12 0xaaaac457cb1c <unknown>
#13 0xaaaac45ce234 <unknown>
#14 0xaaaac45ce410 <unknown>
#15 0xaaaac45df3a0 <unknown>
#16 0xffff9b6dd5b4 <unknown>
#17 0xffff9b745ed8 <unknown>
ERROR:root:Error in vacancy checker: Message: session not created
from unknown error: cannot create default profile directory; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#sessionnotcreatedexception
Stacktrace:
#0 0xaaaac45e06c8 <unknown>
#1 0xaaaac40cfc6c <unknown>
#2 0xaaaac4107394 <unknown>
#3 0xaaaac41024e8 <unknown>
#4 0xaaaac41000bc <unknown>
#5 0xaaaac4145368 <unknown>
#6 0xaaaac4144d40 <unknown>
#7 0xaaaac410b9d4 <unknown>
#8 0xaaaac45a963c <unknown>
#9 0xaaaac45acd4c <unknown>
#10 0xaaaac459337c <unknown>
#11 0xaaaac45ad39c <unknown>
#12 0xaaaac457cb1c <unknown>
#13 0xaaaac45ce234 <unknown>
#14 0xaaaac45ce410 <unknown>
#15 0xaaaac45df3a0 <unknown>
#16 0xffff9b6dd5b4 <unknown>
#17 0xffff9b745ed8 <unknown>
INFO:aiogram.event:Update id=756488704 is handled. Duration 3513 ms by bot id=8097199920
ℹ️ Last known working version: 4.34.2
Metadata
Metadata
Assignees
Labels
C-pyPython BindingsPython BindingsD-chromeG-chromedriverRequires fixes in ChromeDriverRequires fixes in ChromeDriverOS-linux