@@ -24,21 +24,67 @@ export class SearchEngine {
2424 return await this . rateLimiter . execute ( async ( ) => {
2525 console . log ( `[SearchEngine] Starting search with multiple engines...` ) ;
2626
27+ // Configuration from environment variables
28+ const enableQualityCheck = process . env . ENABLE_RELEVANCE_CHECKING !== 'false' ;
29+ const qualityThreshold = parseFloat ( process . env . RELEVANCE_THRESHOLD || '0.3' ) ;
30+ const forceMultiEngine = process . env . FORCE_MULTI_ENGINE_SEARCH === 'true' ;
31+
32+ console . log ( `[SearchEngine] Quality checking: ${ enableQualityCheck } , threshold: ${ qualityThreshold } , multi-engine: ${ forceMultiEngine } ` ) ;
33+
2734 // Try multiple approaches to get search results, starting with most reliable
2835 const approaches = [
2936 { method : this . tryBrowserBingSearch . bind ( this ) , name : 'Browser Bing' } ,
3037 { method : this . tryBrowserBraveSearch . bind ( this ) , name : 'Browser Brave' } ,
3138 { method : this . tryDuckDuckGoSearch . bind ( this ) , name : 'Axios DuckDuckGo' }
3239 ] ;
3340
41+ let bestResults : SearchResult [ ] = [ ] ;
42+ let bestEngine = 'None' ;
43+ let bestQuality = 0 ;
44+
3445 for ( const approach of approaches ) {
3546 try {
36- // Use shorter timeout per approach to allow trying all methods
37- const approachTimeout = Math . min ( timeout / 2 , 6000 ) ; // Max 6 seconds per approach
47+ // Use more aggressive timeouts for faster fallback
48+ const approachTimeout = Math . min ( timeout / 3 , 4000 ) ; // Max 4 seconds per approach for faster fallback
3849 const results = await approach . method ( sanitizedQuery , numResults , approachTimeout ) ;
3950 if ( results . length > 0 ) {
4051 console . log ( `[SearchEngine] Found ${ results . length } results with ${ approach . name } ` ) ;
41- return { results, engine : approach . name } ;
52+
53+ // Validate result quality to detect irrelevant results
54+ const qualityScore = enableQualityCheck ? this . assessResultQuality ( results , sanitizedQuery ) : 1.0 ;
55+ console . log ( `[SearchEngine] ${ approach . name } quality score: ${ qualityScore . toFixed ( 2 ) } /1.0` ) ;
56+
57+ // Track the best results so far
58+ if ( qualityScore > bestQuality ) {
59+ bestResults = results ;
60+ bestEngine = approach . name ;
61+ bestQuality = qualityScore ;
62+ }
63+
64+ // If quality is excellent, return immediately (unless forcing multi-engine)
65+ if ( qualityScore >= 0.8 && ! forceMultiEngine ) {
66+ console . log ( `[SearchEngine] Excellent quality results from ${ approach . name } , returning immediately` ) ;
67+ return { results, engine : approach . name } ;
68+ }
69+
70+ // If quality is acceptable and this isn't Bing (first engine), return
71+ if ( qualityScore >= qualityThreshold && approach . name !== 'Browser Bing' && ! forceMultiEngine ) {
72+ console . log ( `[SearchEngine] Good quality results from ${ approach . name } , using as primary` ) ;
73+ return { results, engine : approach . name } ;
74+ }
75+
76+ // If this is the last engine or quality is acceptable, prepare to return
77+ if ( approach === approaches [ approaches . length - 1 ] ) {
78+ if ( bestQuality >= qualityThreshold || ! enableQualityCheck ) {
79+ console . log ( `[SearchEngine] Using best results from ${ bestEngine } (quality: ${ bestQuality . toFixed ( 2 ) } )` ) ;
80+ return { results : bestResults , engine : bestEngine } ;
81+ } else if ( bestResults . length > 0 ) {
82+ console . log ( `[SearchEngine] Warning: Low quality results from all engines, using best available from ${ bestEngine } ` ) ;
83+ return { results : bestResults , engine : bestEngine } ;
84+ }
85+ } else {
86+ console . log ( `[SearchEngine] ${ approach . name } results quality: ${ qualityScore . toFixed ( 2 ) } , continuing to try other engines...` ) ;
87+ }
4288 }
4389 } catch ( error ) {
4490 console . error ( `[SearchEngine] ${ approach . name } approach failed:` , error ) ;
@@ -118,48 +164,134 @@ export class SearchEngine {
118164 const browser = await this . browserPool . getBrowser ( ) ;
119165
120166 try {
167+ // Enhanced browser context with more realistic fingerprinting
121168 const context = await browser . newContext ( {
122169 userAgent : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ,
123170 viewport : { width : 1366 , height : 768 } ,
124171 locale : 'en-US' ,
125172 timezoneId : 'America/New_York' ,
173+ colorScheme : 'light' ,
174+ deviceScaleFactor : 1 ,
175+ hasTouch : false ,
176+ isMobile : false ,
177+ extraHTTPHeaders : {
178+ 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' ,
179+ 'Accept-Language' : 'en-US,en;q=0.9' ,
180+ 'Accept-Encoding' : 'gzip, deflate, br' ,
181+ 'DNT' : '1' ,
182+ 'Upgrade-Insecure-Requests' : '1' ,
183+ 'Sec-Fetch-Dest' : 'document' ,
184+ 'Sec-Fetch-Mode' : 'navigate' ,
185+ 'Sec-Fetch-Site' : 'none'
186+ }
126187 } ) ;
127188
128189 const page = await context . newPage ( ) ;
129190
130- // Navigate to Bing search
131- const searchUrl = `https://www.bing.com/search?q=${ encodeURIComponent ( query ) } &count=${ Math . min ( numResults , 10 ) } ` ;
132- console . log ( `[SearchEngine] Browser navigating to Bing: ${ searchUrl } ` ) ;
133-
134- await page . goto ( searchUrl , {
135- waitUntil : 'domcontentloaded' ,
136- timeout : timeout
137- } ) ;
138-
139- // Wait for search results to load
191+ // Try enhanced Bing search with proper web interface flow
140192 try {
141- await page . waitForSelector ( '.b_algo, .b_result' , { timeout : 3000 } ) ;
142- } catch {
143- console . log ( `[SearchEngine] Browser Bing results selector not found, proceeding anyway` ) ;
193+ const results = await this . tryEnhancedBingSearch ( page , query , numResults , timeout ) ;
194+ await context . close ( ) ;
195+ return results ;
196+ } catch ( enhancedError ) {
197+ console . log ( `[SearchEngine] Enhanced Bing search failed, trying fallback: ${ enhancedError instanceof Error ? enhancedError . message : 'Unknown error' } ` ) ;
198+
199+ // Fallback to direct URL approach with enhanced parameters
200+ const results = await this . tryDirectBingSearch ( page , query , numResults , timeout ) ;
201+ await context . close ( ) ;
202+ return results ;
144203 }
145-
146- // Get the page content
147- const html = await page . content ( ) ;
148-
149- await context . close ( ) ;
150-
151- console . log ( `[SearchEngine] Browser Bing got HTML with length: ${ html . length } ` ) ;
152-
153- const results = this . parseBingResults ( html , numResults ) ;
154- console . log ( `[SearchEngine] Browser Bing parsed ${ results . length } results` ) ;
155-
156- return results ;
157204 } catch ( error ) {
158205 console . error ( `[SearchEngine] Browser Bing search failed:` , error ) ;
159206 throw error ;
160207 }
161208 }
162209
210+ private async tryEnhancedBingSearch ( page : any , query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
211+ console . log ( `[SearchEngine] Trying enhanced Bing search via web interface...` ) ;
212+
213+ // Navigate to Bing homepage first to establish proper session
214+ await page . goto ( 'https://www.bing.com' , {
215+ waitUntil : 'domcontentloaded' ,
216+ timeout : timeout / 2
217+ } ) ;
218+
219+ // Wait a moment for page to fully load
220+ await page . waitForTimeout ( 500 ) ;
221+
222+ // Find and use the search box (more realistic than direct URL)
223+ try {
224+ await page . waitForSelector ( '#sb_form_q' , { timeout : 2000 } ) ;
225+ await page . fill ( '#sb_form_q' , query ) ;
226+
227+ // Submit the search form
228+ await Promise . all ( [
229+ page . waitForNavigation ( { waitUntil : 'domcontentloaded' , timeout : timeout } ) ,
230+ page . click ( '#search_icon' )
231+ ] ) ;
232+
233+ } catch ( formError ) {
234+ console . log ( `[SearchEngine] Search form submission failed, falling back to URL navigation` ) ;
235+ throw formError ;
236+ }
237+
238+ // Wait for search results to load
239+ try {
240+ await page . waitForSelector ( '.b_algo, .b_result' , { timeout : 3000 } ) ;
241+ } catch {
242+ console . log ( `[SearchEngine] Enhanced Bing results selector not found, proceeding anyway` ) ;
243+ }
244+
245+ const html = await page . content ( ) ;
246+ console . log ( `[SearchEngine] Enhanced Bing got HTML with length: ${ html . length } ` ) ;
247+
248+ const results = this . parseBingResults ( html , numResults ) ;
249+ console . log ( `[SearchEngine] Enhanced Bing parsed ${ results . length } results` ) ;
250+
251+ return results ;
252+ }
253+
254+ private async tryDirectBingSearch ( page : any , query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
255+ console . log ( `[SearchEngine] Trying direct Bing search with enhanced parameters...` ) ;
256+
257+ // Generate a conversation ID (cvid) similar to what Bing uses
258+ const cvid = this . generateConversationId ( ) ;
259+
260+ // Construct URL with enhanced parameters based on successful manual searches
261+ const searchUrl = `https://www.bing.com/search?q=${ encodeURIComponent ( query ) } &count=${ Math . min ( numResults , 10 ) } &form=QBLH&sp=-1&qs=n&cvid=${ cvid } ` ;
262+ console . log ( `[SearchEngine] Browser navigating to enhanced Bing URL: ${ searchUrl } ` ) ;
263+
264+ await page . goto ( searchUrl , {
265+ waitUntil : 'domcontentloaded' ,
266+ timeout : timeout
267+ } ) ;
268+
269+ // Wait for search results to load
270+ try {
271+ await page . waitForSelector ( '.b_algo, .b_result' , { timeout : 3000 } ) ;
272+ } catch {
273+ console . log ( `[SearchEngine] Direct Bing results selector not found, proceeding anyway` ) ;
274+ }
275+
276+ const html = await page . content ( ) ;
277+ console . log ( `[SearchEngine] Direct Bing got HTML with length: ${ html . length } ` ) ;
278+
279+ const results = this . parseBingResults ( html , numResults ) ;
280+ console . log ( `[SearchEngine] Direct Bing parsed ${ results . length } results` ) ;
281+
282+ return results ;
283+ }
284+
285+ private generateConversationId ( ) : string {
286+ // Generate a conversation ID similar to Bing's format (32 hex characters)
287+ const chars = '0123456789ABCDEF' ;
288+ let cvid = '' ;
289+ for ( let i = 0 ; i < 32 ; i ++ ) {
290+ cvid += chars [ Math . floor ( Math . random ( ) * chars . length ) ] ;
291+ }
292+ return cvid ;
293+ }
294+
163295
164296 private async tryDuckDuckGoSearch ( query : string , numResults : number , timeout : number ) : Promise < SearchResult [ ] > {
165297 console . log ( `[SearchEngine] Trying DuckDuckGo as fallback...` ) ;
@@ -665,6 +797,94 @@ export class SearchEngine {
665797 return url ;
666798 }
667799
800+ private assessResultQuality ( results : SearchResult [ ] , originalQuery : string ) : number {
801+ if ( results . length === 0 ) return 0 ;
802+
803+ // Extract keywords from the original query (ignore common words)
804+ const commonWords = new Set ( [ 'the' , 'a' , 'an' , 'and' , 'or' , 'but' , 'in' , 'on' , 'at' , 'to' , 'for' , 'of' , 'with' , 'by' , 'is' , 'are' , 'was' , 'were' , 'be' , 'been' , 'have' , 'has' , 'had' , 'do' , 'does' , 'did' , 'will' , 'would' , 'could' , 'should' , 'may' , 'might' , 'must' , 'can' , 'group' , 'members' ] ) ;
805+ const queryWords = originalQuery . toLowerCase ( )
806+ . replace ( / [ ^ \w \s ] / g, ' ' )
807+ . split ( / \s + / )
808+ . filter ( word => word . length > 2 && ! commonWords . has ( word ) ) ;
809+
810+ if ( queryWords . length === 0 ) return 0.5 ; // Default score if no meaningful keywords
811+
812+ console . log ( `[SearchEngine] Quality assessment - Query keywords: [${ queryWords . join ( ', ' ) } ]` ) ;
813+
814+ let totalScore = 0 ;
815+ let scoredResults = 0 ;
816+
817+ for ( const result of results ) {
818+ const titleText = result . title . toLowerCase ( ) ;
819+ const descText = result . description . toLowerCase ( ) ;
820+ const urlText = result . url . toLowerCase ( ) ;
821+ const combinedText = `${ titleText } ${ descText } ${ urlText } ` ;
822+
823+ // Count keyword matches
824+ let keywordMatches = 0 ;
825+ let phraseMatches = 0 ;
826+
827+ // Check for exact phrase matches (higher value)
828+ if ( queryWords . length >= 2 ) {
829+ const queryPhrases = [ ] ;
830+ for ( let i = 0 ; i < queryWords . length - 1 ; i ++ ) {
831+ queryPhrases . push ( queryWords . slice ( i , i + 2 ) . join ( ' ' ) ) ;
832+ }
833+ if ( queryWords . length >= 3 ) {
834+ queryPhrases . push ( queryWords . slice ( 0 , 3 ) . join ( ' ' ) ) ;
835+ }
836+
837+ for ( const phrase of queryPhrases ) {
838+ if ( combinedText . includes ( phrase ) ) {
839+ phraseMatches ++ ;
840+ }
841+ }
842+ }
843+
844+ // Check individual keyword matches
845+ for ( const keyword of queryWords ) {
846+ if ( combinedText . includes ( keyword ) ) {
847+ keywordMatches ++ ;
848+ }
849+ }
850+
851+ // Calculate score for this result
852+ const keywordRatio = keywordMatches / queryWords . length ;
853+ const phraseBonus = phraseMatches * 0.3 ; // Bonus for phrase matches
854+ const resultScore = Math . min ( 1.0 , keywordRatio + phraseBonus ) ;
855+
856+ // Penalty for obvious irrelevant content
857+ const irrelevantPatterns = [
858+ / r e c i p e / i, / c o o k i n g / i, / f o o d / i, / r e s t a u r a n t / i, / m e n u / i,
859+ / w e a t h e r / i, / t e m p e r a t u r e / i, / f o r e c a s t / i,
860+ / s h o p p i n g / i, / s a l e / i, / p r i c e / i, / b u y / i, / s t o r e / i,
861+ / m o v i e / i, / f i l m / i, / t v s h o w / i, / e n t e r t a i n m e n t / i,
862+ / s p o r t s / i, / g a m e / i, / s c o r e / i, / t e a m / i,
863+ / f a s h i o n / i, / c l o t h i n g / i, / s t y l e / i,
864+ / t r a v e l / i, / h o t e l / i, / f l i g h t / i, / v a c a t i o n / i,
865+ / c a r / i, / v e h i c l e / i, / a u t o m o t i v e / i,
866+ / r e a l e s t a t e / i, / p r o p e r t y / i, / h o u s e / i, / a p a r t m e n t / i
867+ ] ;
868+
869+ let penalty = 0 ;
870+ for ( const pattern of irrelevantPatterns ) {
871+ if ( pattern . test ( combinedText ) ) {
872+ penalty += 0.2 ;
873+ }
874+ }
875+
876+ const finalScore = Math . max ( 0 , resultScore - penalty ) ;
877+
878+ console . log ( `[SearchEngine] Result "${ result . title . substring ( 0 , 50 ) } ..." - Score: ${ finalScore . toFixed ( 2 ) } (keywords: ${ keywordMatches } /${ queryWords . length } , phrases: ${ phraseMatches } , penalty: ${ penalty . toFixed ( 2 ) } )` ) ;
879+
880+ totalScore += finalScore ;
881+ scoredResults ++ ;
882+ }
883+
884+ const averageScore = scoredResults > 0 ? totalScore / scoredResults : 0 ;
885+ return averageScore ;
886+ }
887+
668888 async closeAll ( ) : Promise < void > {
669889 await this . browserPool . closeAll ( ) ;
670890 }
0 commit comments