Skip to content

Commit 9a7745d

Browse files
authored
Merge pull request #50 from Iceloof/dev
Dev
2 parents 459a7a3 + fda5550 commit 9a7745d

File tree

1 file changed

+19
-16
lines changed

1 file changed

+19
-16
lines changed

GoogleNews/__init__.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11

22
### MODULES
3-
3+
import re
44
import urllib.request
55
import dateparser, copy
6-
from bs4 import BeautifulSoup as Soup
6+
from bs4 import BeautifulSoup as Soup, ResultSet
77
from dateutil.parser import parse
88

99
### METHODS
@@ -86,6 +86,21 @@ def search(self, key):
8686
self.__key = urllib.request.quote(self.__key.encode(self.__encode))
8787
self.get_page()
8888

89+
def build_response(self):
90+
self.req = urllib.request.Request(self.url, headers=self.headers)
91+
self.response = urllib.request.urlopen(self.req)
92+
self.page = self.response.read()
93+
self.content = Soup(self.page, "html.parser")
94+
stats = self.content.find_all("div", id="result-stats")
95+
if stats and isinstance(stats, ResultSet):
96+
stats = re.search(r'\d+', stats[0].text)
97+
self.__totalcount = int(stats.group())
98+
else:
99+
#TODO might want to add output for user to know no data was found
100+
return
101+
result = self.content.find_all("div", id="search")[0].find_all("g-card")
102+
return result
103+
89104
def page_at(self, page=1):
90105
"""
91106
Retrieves a specific page from google.com in the news sections into __results.
@@ -104,13 +119,7 @@ def page_at(self, page=1):
104119
except AttributeError:
105120
raise AttributeError("You need to run a search() before using get_page().")
106121
try:
107-
self.req = urllib.request.Request(self.url, headers=self.headers)
108-
self.response = urllib.request.urlopen(self.req)
109-
self.page = self.response.read()
110-
self.content = Soup(self.page, "html.parser")
111-
stats = self.content.find_all("div", id="result-stats")[0].text
112-
self.__totalcount = int(stats[stats.find('bout')+5:stats.find('results')-1].replace(',', ''))
113-
result = self.content.find_all("div", id="search")[0].find_all("g-card")
122+
result = self.build_response()
114123
for item in result:
115124
try:
116125
tmp_text = item.find("div", {"role" : "heading"}).text.replace("\n","")
@@ -164,13 +173,7 @@ def get_page(self, page=1):
164173
except AttributeError:
165174
raise AttributeError("You need to run a search() before using get_page().")
166175
try:
167-
self.req = urllib.request.Request(self.url, headers=self.headers)
168-
self.response = urllib.request.urlopen(self.req)
169-
self.page = self.response.read()
170-
self.content = Soup(self.page, "html.parser")
171-
stats = self.content.find_all("div", id="result-stats")[0].text
172-
self.__totalcount = int(stats[stats.find('bout')+5:stats.find('results')-1].replace(',', ''))
173-
result = self.content.find_all("div", id="search")[0].find_all("g-card")
176+
result = self.build_response()
174177
for item in result:
175178
try:
176179
tmp_text = item.find("div", {"role" : "heading"}).text.replace("\n","")

0 commit comments

Comments
 (0)