Skip to content
This repository was archived by the owner on May 17, 2022. It is now read-only.

Commit 3beb4e5

Browse files
committed
using beautiful soup extract method in L332
1 parent ba04287 commit 3beb4e5

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

main.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -203,29 +203,29 @@ def get_feeds(self):
203203
with urlopen(self.feed_configs['source']) as f:
204204
return f.read().decode('utf-8')
205205

206-
def summarize(self, soup:Soup, length, read_more):
207-
offset = len(read_more)
206+
def summarize(self, soup:Soup, max_length, read_more):
207+
trim = len(read_more)
208208
len_ = len(str(soup))
209-
if len_>length:
210-
offset += len_ - length
209+
if len_>max_length:
210+
trim += len_ - max_length
211211
removed = 0
212212
for element in reversed(list(soup.descendants)):
213-
if (not element.name) and len(str(element))>offset-removed:
213+
if (not element.name) and len(str(element))>trim-removed:
214214
s = str(element)
215-
wrap_index = s.rfind(' ',0 , offset-removed)
215+
wrap_index = s.rfind(' ',0 , trim-removed)
216216
if wrap_index == -1:
217-
element.replace_with(s[:-offset+removed])
218-
removed = offset
217+
element.replace_with(s[:-trim+removed])
218+
removed = trim
219219
else:
220220
element.replace_with(s[:wrap_index])
221-
removed = offset
221+
removed = trim
222222
else:
223223
element.replace_with('')
224224
removed += len(str(element))
225-
if removed >= offset:
225+
if removed >= trim:
226226
break
227227
soup.append(read_more)
228-
return str(soup), len_>length
228+
return str(soup), len_>max_length
229229

230230
# in this version fead reader uses css selector to get feeds.
231231
#
@@ -251,7 +251,7 @@ def read_feed(self, index=0):
251251
soup_page = Soup(feeds_page, self.feed_configs.get('feed-format', 'xml'))
252252
feeds_list = soup_page.select(self.feed_configs['feeds-selector'])
253253
title, link, content, time = None, None, None, None
254-
for feed in feeds_list[index::-1]:
254+
for feed in feeds_list[index:]:
255255
try:
256256
if self.__skip_field == 'feed':
257257
if self.__skip(feed):
@@ -326,10 +326,10 @@ def render_feed(self, feed: dict, header: str):
326326
try:
327327
if content:
328328
#Remove elements with selector
329-
remove_elem = self.feed_configs.get('remove-elements')
330-
if remove_elem:
331-
for elem in content.select():
332-
elem.replace_with('')
329+
remove_elem = self.feed_configs.get('remove-elements',[])
330+
for elem in remove_elem:
331+
for e in content.select(elem):
332+
e.extract()
333333
content = self.purge(content)
334334
images = content.find_all('img')
335335
first = True

0 commit comments

Comments
 (0)