@@ -5,6 +5,7 @@ import logger from '../logger.js';
55import fs from 'fs' ;
66import os from 'os' ;
77import path from 'path' ;
8+ import { URL } from 'url' ;
89
910puppeteer . use ( StealthPlugin ( ) ) ;
1011
@@ -27,23 +28,97 @@ export default async function execute(url, waitForSelector, options) {
2728 removeUserDataDir = true ;
2829 }
2930
31+ const launchArgs = [
32+ '--no-sandbox' ,
33+ '--disable-gpu' ,
34+ '--disable-setuid-sandbox' ,
35+ '--disable-dev-shm-usage' ,
36+ '--disable-crash-reporter' ,
37+ '--no-first-run' ,
38+ '--no-default-browser-check' ,
39+ ] ;
40+ if ( options ?. proxyUrl ) {
41+ launchArgs . push ( `--proxy-server=${ options . proxyUrl } ` ) ;
42+ }
43+
3044 browser = await puppeteer . launch ( {
31- headless : options . puppeteerHeadless ?? true ,
32- args : [
33- '--no-sandbox' ,
34- '--disable-gpu' ,
35- '--disable-setuid-sandbox' ,
36- '--disable-dev-shm-usage' ,
37- '--disable-crash-reporter' ,
38- ] ,
39- timeout : options . puppeteerTimeout || 30_000 ,
45+ headless : options ?. puppeteerHeadless ?? true ,
46+ args : launchArgs ,
47+ timeout : options ?. puppeteerTimeout || 30_000 ,
4048 userDataDir,
49+ executablePath : options ?. executablePath , // allow using system Chrome
4150 } ) ;
51+
4252 page = await browser . newPage ( ) ;
43- await page . setExtraHTTPHeaders ( DEFAULT_HEADER ) ;
53+
54+ // Derive domain-specific defaults
55+ const { hostname } = new URL ( url ) ;
56+
57+ // Set a realistic modern user agent unless provided
58+ const userAgent =
59+ options ?. userAgent ||
60+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36' ;
61+ await page . setUserAgent ( userAgent ) ;
62+
63+ // Viewport and device scale for typical desktop
64+ await page . setViewport ( { width : 1366 , height : 768 , deviceScaleFactor : 1 } ) ;
65+
66+ // Extra HTTP headers with localized Accept-Language
67+ const acceptLanguage = options ?. acceptLanguage || 'de-DE,de;q=0.9,en-US;q=0.7,en;q=0.5' ;
68+ const headers = {
69+ ...DEFAULT_HEADER ,
70+ 'Accept-Language' : acceptLanguage ,
71+ 'User-Agent' : userAgent ,
72+ Referer : options ?. referer || `https://${ hostname } /` ,
73+ Connection : 'keep-alive' ,
74+ DNT : '1' ,
75+ } ;
76+ await page . setExtraHTTPHeaders ( headers ) ;
77+
78+ // Timezone and locale tweaks to look German when needed
79+ try {
80+ const tz = options ?. timezone || 'Europe/Berlin' ;
81+ if ( tz ) await page . emulateTimezone ( tz ) ;
82+ } catch {
83+ //noop
84+ }
85+
86+ // Harden navigator properties (stealth already covers many, but we ensure critical ones)
87+ await page . evaluateOnNewDocument ( ( ) => {
88+ Object . defineProperty ( navigator , 'webdriver' , { get : ( ) => undefined } ) ;
89+ // Plugins and mimeTypes
90+ // @ts -ignore
91+ Object . defineProperty ( navigator , 'plugins' , { get : ( ) => [ 1 , 2 , 3 ] } ) ;
92+ // @ts -ignore
93+ Object . defineProperty ( navigator , 'languages' , {
94+ get : ( ) => ( window . localStorage . getItem ( '__LANGS__' ) || 'de-DE,de' ) . split ( ',' ) ,
95+ } ) ;
96+ } ) ;
97+ // Provide languages value before navigation
98+ await page . evaluateOnNewDocument ( ( langs ) => {
99+ try {
100+ window . localStorage . setItem ( '__LANGS__' , langs ) ;
101+ } catch {
102+ //noop
103+ }
104+ } , acceptLanguage . split ( ';' ) [ 0 ] ) ;
105+
106+ // Optional cookies
107+ if ( Array . isArray ( options ?. cookies ) && options . cookies . length > 0 ) {
108+ await page . setCookie ( ...options . cookies ) ;
109+ }
110+
111+ // Navigation
44112 const response = await page . goto ( url , {
45- waitUntil : 'domcontentloaded' ,
113+ waitUntil : options ?. waitUntil || 'domcontentloaded' ,
46114 } ) ;
115+
116+ // Optionally wait a random small delay to mimic human rendering time
117+ if ( options ?. humanDelay !== false ) {
118+ const delay = 200 + Math . floor ( Math . random ( ) * 400 ) ;
119+ await new Promise ( ( res ) => setTimeout ( res , delay ) ) ;
120+ }
121+
47122 let pageSource ;
48123 // if we're extracting data from a SPA, we must wait for the selector
49124 if ( waitForSelector != null ) {
@@ -57,7 +132,7 @@ export default async function execute(url, waitForSelector, options) {
57132 pageSource = await page . content ( ) ;
58133 }
59134
60- const statusCode = response . status ( ) ;
135+ const statusCode = response ? .status ?. ( ) ?? 200 ;
61136
62137 if ( botDetected ( pageSource , statusCode ) ) {
63138 logger . warn ( 'We have been detected as a bot :-/ Tried url: => ' , url ) ;
0 commit comments