11
22### MODULES
3-
3+ import re
44import urllib .request
55import dateparser , copy
6- from bs4 import BeautifulSoup as Soup
6+ from bs4 import BeautifulSoup as Soup , ResultSet
77from dateutil .parser import parse
88
99### METHODS
@@ -86,6 +86,21 @@ def search(self, key):
8686 self .__key = urllib .request .quote (self .__key .encode (self .__encode ))
8787 self .get_page ()
8888
89+ def build_response (self ):
90+ self .req = urllib .request .Request (self .url , headers = self .headers )
91+ self .response = urllib .request .urlopen (self .req )
92+ self .page = self .response .read ()
93+ self .content = Soup (self .page , "html.parser" )
94+ stats = self .content .find_all ("div" , id = "result-stats" )
95+ if stats and isinstance (stats , ResultSet ):
96+ stats = re .search (r'\d+' , stats [0 ].text )
97+ self .__totalcount = int (stats .group ())
98+ else :
99+ #TODO might want to add output for user to know no data was found
100+ return
101+ result = self .content .find_all ("div" , id = "search" )[0 ].find_all ("g-card" )
102+ return result
103+
89104 def page_at (self , page = 1 ):
90105 """
91106 Retrieves a specific page from google.com in the news sections into __results.
@@ -104,13 +119,7 @@ def page_at(self, page=1):
104119 except AttributeError :
105120 raise AttributeError ("You need to run a search() before using get_page()." )
106121 try :
107- self .req = urllib .request .Request (self .url , headers = self .headers )
108- self .response = urllib .request .urlopen (self .req )
109- self .page = self .response .read ()
110- self .content = Soup (self .page , "html.parser" )
111- stats = self .content .find_all ("div" , id = "result-stats" )[0 ].text
112- self .__totalcount = int (stats [stats .find ('bout' )+ 5 :stats .find ('results' )- 1 ].replace (',' , '' ))
113- result = self .content .find_all ("div" , id = "search" )[0 ].find_all ("g-card" )
122+ result = self .build_response ()
114123 for item in result :
115124 try :
116125 tmp_text = item .find ("div" , {"role" : "heading" }).text .replace ("\n " ,"" )
@@ -164,13 +173,7 @@ def get_page(self, page=1):
164173 except AttributeError :
165174 raise AttributeError ("You need to run a search() before using get_page()." )
166175 try :
167- self .req = urllib .request .Request (self .url , headers = self .headers )
168- self .response = urllib .request .urlopen (self .req )
169- self .page = self .response .read ()
170- self .content = Soup (self .page , "html.parser" )
171- stats = self .content .find_all ("div" , id = "result-stats" )[0 ].text
172- self .__totalcount = int (stats [stats .find ('bout' )+ 5 :stats .find ('results' )- 1 ].replace (',' , '' ))
173- result = self .content .find_all ("div" , id = "search" )[0 ].find_all ("g-card" )
176+ result = self .build_response ()
174177 for item in result :
175178 try :
176179 tmp_text = item .find ("div" , {"role" : "heading" }).text .replace ("\n " ,"" )
0 commit comments