@@ -1816,7 +1816,7 @@ def websearch(query):
18161816 global websearch_lastresponse
18171817 if not query or query == "" :
18181818 return []
1819- query = query [:300 ] # only search first 300 chars, due to search engine limits
1819+ query = query [:499 ] # only search first 300 chars, due to search engine limits
18201820 if query == websearch_lastquery :
18211821 print ("Returning cached websearch..." )
18221822 return websearch_lastresponse
@@ -1825,24 +1825,28 @@ def websearch(query):
18251825 import difflib
18261826 from html .parser import HTMLParser
18271827 from concurrent .futures import ThreadPoolExecutor
1828- num_results = 3
1828+ num_results = 10
18291829 searchresults = []
18301830 if args .debugmode != - 1 and not args .quiet :
18311831 print ("Performing new websearch..." )
18321832
18331833 def fetch_searched_webpage (url ):
1834- if args .debugmode :
1834+ if args .debugmode != - 1 and not args . quiet :
18351835 utfprint (f"WebSearch URL: { url } " )
18361836 try :
18371837 req = urllib .request .Request (url , headers = {'User-Agent' : 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' })
18381838 with urllib .request .urlopen (req , timeout = 15 ) as response :
18391839 html_content = response .read ().decode ('utf-8' , errors = 'ignore' )
1840+ # if args.debugmode != -1 and not args.quiet:
1841+ # print(f"Returning results with Googlebot compatible agent: {html_content}")
18401842 return html_content
18411843 except urllib .error .HTTPError : #we got blocked? try 1 more time with a different user agent
18421844 try :
18431845 req = urllib .request .Request (url , headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' })
18441846 with urllib .request .urlopen (req , timeout = 15 ) as response :
18451847 html_content = response .read ().decode ('utf-8' , errors = 'ignore' )
1848+ # if args.debugmode != -1 and not args.quiet:
1849+ # print(f"Returning results with AppleWebKit/KHTML/Gecko compatible agent: {html_content}")
18461850 return html_content
18471851 except Exception as e :
18481852 if args .debugmode != - 1 and not args .quiet :
@@ -1856,11 +1860,16 @@ def fetch_webpages_parallel(urls):
18561860 with ThreadPoolExecutor () as executor :
18571861 # Submit tasks and gather results
18581862 results = list (executor .map (fetch_searched_webpage , urls ))
1863+ if args .debugmode != - 1 and not args .quiet :
1864+ print (f"Returning results: { urls } " )
1865+ # print(f"Returning results: {results}")
18591866 return results
18601867
18611868 def normalize_page_text (text ):
18621869 text = re .sub (r'\s+([.,!?])' , r'\1' , text ) # Remove spaces before punctuation
18631870 # text = re.sub(r'([.,!?])([^\s])', r'\1 \2', text) # Ensure a single space follows punctuation, if not at the end of a line
1871+ # if args.debugmode != -1 and not args.quiet:
1872+ # print(f"Returning text: {text}")
18641873 return text
18651874
18661875 class VisibleTextParser (HTMLParser ):
@@ -1953,7 +1962,7 @@ def handle_data(self, data):
19531962 if matches .size > 100 and desclen - matches .size < 100 : #good enough match
19541963 # expand description by some chars both sides
19551964 expandamtbefore = 200
1956- expandamtafter = 800
1965+ expandamtafter = 3500
19571966 startpt = matches .a - expandamtbefore
19581967 startpt = 0 if startpt < 0 else startpt
19591968 endpt = matches .a + expandamtafter + desclen
0 commit comments