Skip to content
This repository was archived by the owner on May 17, 2022. It is now read-only.

Commit 51e3d8d

Browse files
committed
bug-fix new feature
1 parent fe61414 commit 51e3d8d

File tree

2 files changed

+39
-14
lines changed

2 files changed

+39
-14
lines changed

config-example.jsonc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,27 @@
1414
//}
1515
},
1616
"feed-configs":{
17-
"source": "https://pcworms.blog.ir/rss",
17+
"source": "https://pcworms.ir/rss",
1818
"parse": "xml",
1919
// FEEDS TEMPLATE: (set null to skip that property)
2020
// feeds-selector: css-selector for each feed item
21-
// time-selector: css-selector for date of feed
21+
// time-selector: css-selector for time of feed
22+
// time-attribute: if time stored in attribute, specify it here
2223
// link-selector: css-selector for link of post
24+
// link-attribute: if link stored in attribute, specify it here
2325
// title-selector: css-selector for title of a feed
26+
// title-attribute: if title stored in attribute, specify it here
2427
// content-selector: css-selector for content of feed
2528
// feed-skip-condition: define a condition to skip a feed
26-
// format: feed/css-selector, content/css-selector, title/regex
29+
// format: feed/css-selector, content/css-selector, title/regex, link/regex
2730
// remove-elements-selector: hide any element that match this css-selector
2831
"feeds-selector": "item",
29-
"date-selector": "pubDate",
32+
"time-selector": "pubDate",
33+
"time-attribute": null,
3034
"link-selector": "link",
35+
"link-attribute": null,
3136
"title-selector": "title",
37+
"title-attribute": null,
3238
"content-selector": "description",
3339
"feed-skip-condition": "content/[name=\"skip\"]",
3440
"remove-elements-selector": ".skip"

main.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def __init__(
125125
# - link-selector: how to get link of source
126126
# - content-selector: how to get content
127127
# - skip-condition: how to check skip condition
128-
# - format: feed/{selector}, content/{selector}, title/{regex}, none
128+
# - format: feed/{selector}, content/{selector}, title/{regex}, link/{regex}, none
129129

130130
self.__skip = lambda feed: False
131131
skip_condition = feed_configs.get('feed-skip-condition')
@@ -138,6 +138,9 @@ def __init__(
138138
elif self.__skip_field == 'title':
139139
match = re.compile(skip_condition).match
140140
self.__skip = lambda title: bool(match(title))
141+
elif self.__skip_field == 'link':
142+
match = re.compile(skip_condition).match
143+
self.__skip = lambda link: bool(match(link))
141144

142145
def log_bug(self, exc:Exception, msg='', report = True, disable_notification = False,**args):
143146
info = BugReporter.exception(msg, exc, report = self.bug_reporter and report)
@@ -247,7 +250,7 @@ def read_feed(self, index=0):
247250

248251
soup_page = Soup(feeds_page, self.feed_configs.get('feed-format', 'xml'))
249252
feeds_list = soup_page.select(self.feed_configs['feeds-selector'])
250-
title, link, content, date = None, None, None, None
253+
title, link, content, time = None, None, None, None
251254
for feed in feeds_list[index:]:
252255
try:
253256
if self.__skip_field == 'feed':
@@ -257,17 +260,34 @@ def read_feed(self, index=0):
257260
title_selector = self.feed_configs['title-selector']
258261
if title_selector:
259262
# title-selector could be None (null)
260-
title = str(feed.select(title_selector)[0].text)
263+
if self.feed_configs['title-attribute']:
264+
title = str(feed.select_one(title_selector).attrs[self.feed_configs['title-attribute']])
265+
else:
266+
title = str(feed.select_one(title_selector).text)
261267

262268
if self.__skip_field == 'title':
263269
if self.__skip(title):
264270
continue
265271

266272
link_selector = self.feed_configs['link-selector']
267273
if link_selector:
268-
link = str(feed.select(link_selector)[0].text)
274+
# link-selector could be None (null)
275+
if self.feed_configs['link-attribute']:
276+
link = str(feed.select_one(link_selector).attrs[self.feed_configs['link-attribute']])
277+
else:
278+
link = str(feed.select_one(link_selector).text)
269279

270-
date = str(feed.select(self.feed_configs['date-selector'])[0].text)
280+
if self.__skip_field == 'link':
281+
if self.__skip(link):
282+
continue
283+
284+
time_selector = self.feed_configs['time-selector']
285+
if time_selector:
286+
# date-selector could be None (null)
287+
if self.feed_configs['time-attribute']:
288+
time = str(feed.select_one(time_selector).attrs[self.feed_configs['time-attribute']])
289+
else:
290+
time = str(feed.select_one(time_selector).text)
271291

272292
content_selector = self.feed_configs['content-selector']
273293
if content_selector:
@@ -285,7 +305,7 @@ def read_feed(self, index=0):
285305
'title': title,
286306
'link': link,
287307
'content': content,
288-
'date': date
308+
'date': time
289309
}
290310

291311
def render_feed(self, feed: dict, header: str):
@@ -433,15 +453,14 @@ def iter_all_chats(self):
433453

434454
def check_new_feed(self):
435455
last_date = self.get_data('last-feed-date', DB = self.data_db)
436-
skip_date_check = not self.feed_configs.get('check-date',True)
437456
for feed in self.read_feed():
438457
feed_date = parse_date(feed['date'])
439-
if not last_date or last_date < feed_date: # if last_date not exist or last feed's date is older than the new one
458+
if feed_date and (not last_date or last_date < feed_date): # if feed_date is not None and last_date not exist or last feed's date is older than the new one
440459
self.set_data('last-feed-date', feed_date, DB = self.data_db)
441-
if skip_date_check or (last_date and last_date < feed_date):
460+
if not feed_date or (last_date and last_date < feed_date):
442461
messages = self.render_feed(feed, header= self.get_string('new-feed'))
443462
self.send_feed(messages, self.iter_all_chats())
444-
if skip_date_check:
463+
if not feed_date or not last_date:
445464
break #just send last feed
446465
if self.__check:
447466
self.check_thread = Timer(self.interval, self.check_new_feed)

0 commit comments

Comments
 (0)