@@ -28,8 +28,9 @@ export class SearchEngine {
2828 const enableQualityCheck = process . env . ENABLE_RELEVANCE_CHECKING !== 'false' ;
2929 const qualityThreshold = parseFloat ( process . env . RELEVANCE_THRESHOLD || '0.3' ) ;
3030 const forceMultiEngine = process . env . FORCE_MULTI_ENGINE_SEARCH === 'true' ;
31+ const debugBrowsers = process . env . DEBUG_BROWSER_LIFECYCLE === 'true' ;
3132
32- console . log ( `[SearchEngine] Quality checking: ${ enableQualityCheck } , threshold: ${ qualityThreshold } , multi-engine: ${ forceMultiEngine } ` ) ;
33+ console . log ( `[SearchEngine] Quality checking: ${ enableQualityCheck } , threshold: ${ qualityThreshold } , multi-engine: ${ forceMultiEngine } , debug: ${ debugBrowsers } ` ) ;
3334
3435 // Try multiple approaches to get search results, starting with most reliable
3536 const approaches = [
@@ -42,8 +43,11 @@ export class SearchEngine {
4243 let bestEngine = 'None' ;
4344 let bestQuality = 0 ;
4445
45- for ( const approach of approaches ) {
46+ for ( let i = 0 ; i < approaches . length ; i ++ ) {
47+ const approach = approaches [ i ] ;
4648 try {
49+ console . log ( `[SearchEngine] Attempting ${ approach . name } (${ i + 1 } /${ approaches . length } )...` ) ;
50+
4751 // Use more aggressive timeouts for faster fallback
4852 const approachTimeout = Math . min ( timeout / 3 , 4000 ) ; // Max 4 seconds per approach for faster fallback
4953 const results = await approach . method ( sanitizedQuery , numResults , approachTimeout ) ;
@@ -74,7 +78,7 @@ export class SearchEngine {
7478 }
7579
7680 // If this is the last engine or quality is acceptable, prepare to return
77- if ( approach === approaches [ approaches . length - 1 ] ) {
81+ if ( i === approaches . length - 1 ) {
7882 if ( bestQuality >= qualityThreshold || ! enableQualityCheck ) {
7983 console . log ( `[SearchEngine] Using best results from ${ bestEngine } (quality: ${ bestQuality . toFixed ( 2 ) } )` ) ;
8084 return { results : bestResults , engine : bestEngine } ;
@@ -88,6 +92,9 @@ export class SearchEngine {
8892 }
8993 } catch ( error ) {
9094 console . error ( `[SearchEngine] ${ approach . name } approach failed:` , error ) ;
95+
96+ // Handle browser-specific errors (no cleanup needed since each engine uses dedicated browsers)
97+ await this . handleBrowserError ( error , approach . name ) ;
9198 }
9299 }
93100
@@ -111,9 +118,52 @@ export class SearchEngine {
111118
112119
113120 private async tryBrowserBraveSearch ( query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
114- console . log ( `[SearchEngine] Trying browser-based Brave search...` ) ;
121+ console . log ( `[SearchEngine] Trying browser-based Brave search with dedicated browser...` ) ;
122+
123+ // Try with retry mechanism
124+ for ( let attempt = 1 ; attempt <= 2 ; attempt ++ ) {
125+ let browser ;
126+ try {
127+ // Create a dedicated browser instance for Brave search only
128+ const { firefox } = await import ( 'playwright' ) ;
129+ browser = await firefox . launch ( {
130+ headless : process . env . BROWSER_HEADLESS !== 'false' ,
131+ args : [
132+ '--no-sandbox' ,
133+ '--disable-dev-shm-usage' ,
134+ ] ,
135+ } ) ;
136+
137+ console . log ( `[SearchEngine] Brave search attempt ${ attempt } /2 with fresh browser` ) ;
138+ const results = await this . tryBrowserBraveSearchInternal ( browser , query , numResults , timeout ) ;
139+ return results ;
140+ } catch ( error ) {
141+ console . error ( `[SearchEngine] Brave search attempt ${ attempt } /2 failed:` , error ) ;
142+ if ( attempt === 2 ) {
143+ throw error ; // Re-throw on final attempt
144+ }
145+ // Small delay before retry
146+ await new Promise ( resolve => setTimeout ( resolve , 500 ) ) ;
147+ } finally {
148+ // Always close the dedicated browser
149+ if ( browser ) {
150+ try {
151+ await browser . close ( ) ;
152+ } catch ( closeError ) {
153+ console . log ( `[SearchEngine] Error closing Brave browser:` , closeError ) ;
154+ }
155+ }
156+ }
157+ }
115158
116- const browser = await this . browserPool . getBrowser ( ) ;
159+ throw new Error ( 'All Brave search attempts failed' ) ;
160+ }
161+
162+ private async tryBrowserBraveSearchInternal ( browser : any , query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
163+ // Validate browser is still functional before proceeding
164+ if ( ! browser . isConnected ( ) ) {
165+ throw new Error ( 'Browser is not connected' ) ;
166+ }
117167
118168 try {
119169 const context = await browser . newContext ( {
@@ -123,45 +173,95 @@ export class SearchEngine {
123173 timezoneId : 'America/New_York' ,
124174 } ) ;
125175
126- const page = await context . newPage ( ) ;
127-
128- // Navigate to Brave search
129- const searchUrl = `https://search.brave.com/search?q=${ encodeURIComponent ( query ) } &source=web` ;
130- console . log ( `[SearchEngine] Browser navigating to Brave: ${ searchUrl } ` ) ;
131-
132- await page . goto ( searchUrl , {
133- waitUntil : 'domcontentloaded' ,
134- timeout : timeout
135- } ) ;
136-
137- // Wait for search results to load
138176 try {
139- await page . waitForSelector ( '[data-type="web"]' , { timeout : 3000 } ) ;
140- } catch {
141- console . log ( `[SearchEngine] Browser Brave results selector not found, proceeding anyway` ) ;
142- }
177+ const page = await context . newPage ( ) ;
178+
179+ // Navigate to Brave search
180+ const searchUrl = `https://search.brave.com/search?q=${ encodeURIComponent ( query ) } &source=web` ;
181+ console . log ( `[SearchEngine] Browser navigating to Brave: ${ searchUrl } ` ) ;
182+
183+ await page . goto ( searchUrl , {
184+ waitUntil : 'domcontentloaded' ,
185+ timeout : timeout
186+ } ) ;
143187
144- // Get the page content
145- const html = await page . content ( ) ;
146-
147- await context . close ( ) ;
148-
149- console . log ( `[SearchEngine] Browser Brave got HTML with length: ${ html . length } ` ) ;
150-
151- const results = this . parseBraveResults ( html , numResults ) ;
152- console . log ( `[SearchEngine] Browser Brave parsed ${ results . length } results` ) ;
153-
154- return results ;
188+ // Wait for search results to load
189+ try {
190+ await page . waitForSelector ( '[data-type="web"]' , { timeout : 3000 } ) ;
191+ } catch {
192+ console . log ( `[SearchEngine] Browser Brave results selector not found, proceeding anyway` ) ;
193+ }
194+
195+ // Get the page content
196+ const html = await page . content ( ) ;
197+
198+ console . log ( `[SearchEngine] Browser Brave got HTML with length: ${ html . length } ` ) ;
199+
200+ const results = this . parseBraveResults ( html , numResults ) ;
201+ console . log ( `[SearchEngine] Browser Brave parsed ${ results . length } results` ) ;
202+
203+ await context . close ( ) ;
204+ return results ;
205+ } catch ( error ) {
206+ // Ensure context is closed even on error
207+ await context . close ( ) ;
208+ throw error ;
209+ }
155210 } catch ( error ) {
156211 console . error ( `[SearchEngine] Browser Brave search failed:` , error ) ;
157212 throw error ;
158213 }
159214 }
160215
161216 private async tryBrowserBingSearch ( query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
162- console . log ( `[SearchEngine] Trying browser-based Bing search...` ) ;
217+ console . log ( `[SearchEngine] Trying browser-based Bing search with dedicated browser...` ) ;
218+
219+ // Try with retry mechanism
220+ for ( let attempt = 1 ; attempt <= 2 ; attempt ++ ) {
221+ let browser ;
222+ try {
223+ // Create a dedicated browser instance for Bing search only
224+ const { chromium } = await import ( 'playwright' ) ;
225+ browser = await chromium . launch ( {
226+ headless : process . env . BROWSER_HEADLESS !== 'false' ,
227+ args : [
228+ '--no-sandbox' ,
229+ '--disable-blink-features=AutomationControlled' ,
230+ '--disable-dev-shm-usage' ,
231+ '--disable-gpu' ,
232+ ] ,
233+ } ) ;
234+
235+ console . log ( `[SearchEngine] Bing search attempt ${ attempt } /2 with fresh browser` ) ;
236+ const results = await this . tryBrowserBingSearchInternal ( browser , query , numResults , timeout ) ;
237+ return results ;
238+ } catch ( error ) {
239+ console . error ( `[SearchEngine] Bing search attempt ${ attempt } /2 failed:` , error ) ;
240+ if ( attempt === 2 ) {
241+ throw error ; // Re-throw on final attempt
242+ }
243+ // Small delay before retry
244+ await new Promise ( resolve => setTimeout ( resolve , 500 ) ) ;
245+ } finally {
246+ // Always close the dedicated browser
247+ if ( browser ) {
248+ try {
249+ await browser . close ( ) ;
250+ } catch ( closeError ) {
251+ console . log ( `[SearchEngine] Error closing Bing browser:` , closeError ) ;
252+ }
253+ }
254+ }
255+ }
163256
164- const browser = await this . browserPool . getBrowser ( ) ;
257+ throw new Error ( 'All Bing search attempts failed' ) ;
258+ }
259+
260+ private async tryBrowserBingSearchInternal ( browser : any , query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
261+ // Validate browser is still functional before proceeding
262+ if ( ! browser . isConnected ( ) ) {
263+ throw new Error ( 'Browser is not connected' ) ;
264+ }
165265
166266 try {
167267 // Enhanced browser context with more realistic fingerprinting
@@ -188,18 +288,24 @@ export class SearchEngine {
188288
189289 const page = await context . newPage ( ) ;
190290
191- // Try enhanced Bing search with proper web interface flow
192291 try {
193- const results = await this . tryEnhancedBingSearch ( page , query , numResults , timeout ) ;
194- await context . close ( ) ;
195- return results ;
196- } catch ( enhancedError ) {
197- console . log ( `[SearchEngine] Enhanced Bing search failed, trying fallback: ${ enhancedError instanceof Error ? enhancedError . message : 'Unknown error' } ` ) ;
198-
199- // Fallback to direct URL approach with enhanced parameters
200- const results = await this . tryDirectBingSearch ( page , query , numResults , timeout ) ;
292+ // Try enhanced Bing search with proper web interface flow
293+ try {
294+ const results = await this . tryEnhancedBingSearch ( page , query , numResults , timeout ) ;
295+ await context . close ( ) ;
296+ return results ;
297+ } catch ( enhancedError ) {
298+ console . log ( `[SearchEngine] Enhanced Bing search failed, trying fallback: ${ enhancedError instanceof Error ? enhancedError . message : 'Unknown error' } ` ) ;
299+
300+ // Fallback to direct URL approach with enhanced parameters
301+ const results = await this . tryDirectBingSearch ( page , query , numResults , timeout ) ;
302+ await context . close ( ) ;
303+ return results ;
304+ }
305+ } catch ( error ) {
306+ // Ensure context is closed even on error
201307 await context . close ( ) ;
202- return results ;
308+ throw error ;
203309 }
204310 } catch ( error ) {
205311 console . error ( `[SearchEngine] Browser Bing search failed:` , error ) ;
@@ -885,6 +991,51 @@ export class SearchEngine {
885991 return averageScore ;
886992 }
887993
994+ private async validateBrowserHealth ( browser : any ) : Promise < boolean > {
995+ const debugBrowsers = process . env . DEBUG_BROWSER_LIFECYCLE === 'true' ;
996+
997+ try {
998+ if ( debugBrowsers ) console . log ( `[SearchEngine] Validating browser health...` ) ;
999+
1000+ // Check if browser is still connected
1001+ if ( ! browser . isConnected ( ) ) {
1002+ if ( debugBrowsers ) console . log ( `[SearchEngine] Browser is not connected` ) ;
1003+ return false ;
1004+ }
1005+
1006+ // Try to create a simple context to test browser responsiveness
1007+ const testContext = await browser . newContext ( ) ;
1008+ await testContext . close ( ) ;
1009+
1010+ if ( debugBrowsers ) console . log ( `[SearchEngine] Browser health check passed` ) ;
1011+ return true ;
1012+ } catch ( error ) {
1013+ console . log ( `[SearchEngine] Browser health check failed: ${ error instanceof Error ? error . message : 'Unknown error' } ` ) ;
1014+ return false ;
1015+ }
1016+ }
1017+
1018+ private async handleBrowserError ( error : any , engineName : string , attemptNumber : number = 1 ) : Promise < void > {
1019+ const errorMessage = error instanceof Error ? error . message : 'Unknown error' ;
1020+ console . error ( `[SearchEngine] ${ engineName } browser error (attempt ${ attemptNumber } ): ${ errorMessage } ` ) ;
1021+
1022+ // Check for specific browser-related errors
1023+ if ( errorMessage . includes ( 'Target page, context or browser has been closed' ) ||
1024+ errorMessage . includes ( 'Browser has been closed' ) ||
1025+ errorMessage . includes ( 'Session has been closed' ) ) {
1026+
1027+ console . log ( `[SearchEngine] Detected browser session closure, attempting to refresh browser pool` ) ;
1028+
1029+ // Try to refresh the browser pool for subsequent attempts
1030+ try {
1031+ await this . browserPool . closeAll ( ) ;
1032+ console . log ( `[SearchEngine] Browser pool refreshed for ${ engineName } ` ) ;
1033+ } catch ( refreshError ) {
1034+ console . error ( `[SearchEngine] Failed to refresh browser pool: ${ refreshError instanceof Error ? refreshError . message : 'Unknown error' } ` ) ;
1035+ }
1036+ }
1037+ }
1038+
8881039 async closeAll ( ) : Promise < void > {
8891040 await this . browserPool . closeAll ( ) ;
8901041 }
0 commit comments