Skip to content

Commit a82e876

Browse files
authored
Merge pull request #4 from Linusp/optimize
optimize command `filter` and `fetch-unread`
2 parents f2dda4e + 1393929 commit a82e876

File tree

5 files changed

+127
-81
lines changed

5 files changed

+127
-81
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# CHANGELOG
22

3+
## v0.2.1
4+
5+
Changed
6+
7+
- Supported new output formats in command `fetch-unread`: `markdown` and `org-mode`
8+
- Changed command `filter`, see `example/rules.example.yaml` for details
9+
310
## v0.2.0
411

512
Added

examples/rules.example.yaml

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,24 @@
1-
- field: title_or_content # title/content/title_or_content
2-
type: include_any # include_any/include_all/exclude
3-
rules:
4-
- 权威认证
5-
- 值得拥有
6-
- 潮牌
7-
- 新款
8-
- 求助
9-
- 招聘
10-
- 免费版
11-
- 包邮
12-
- 天猫
13-
- 淘宝
1+
- name: test
2+
folders:
3+
- inbox
4+
fields: ["title", "content"]
5+
filter:
6+
type: include_any # include_any/include_all/exclude
7+
rules:
8+
- 权威认证
9+
- 值得拥有
10+
- 潮牌
11+
- 新款
12+
- 求助
13+
- 招聘
14+
- 免费版
15+
- 包邮
16+
- 天猫
17+
- 淘宝
18+
actions:
19+
- type: mark_as_read
20+
- type: tag
21+
tags: tag1,tag2,tag3
22+
- type: like
23+
- type: star
24+
- type: broadcast

inoreader/filter.py

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,25 @@
11
import re
22

33

4-
class Filter(object):
5-
def __init__(self, *args, **kwargs):
6-
pass
7-
8-
@classmethod
9-
def from_config(cls, config):
10-
"""
11-
config: {
12-
'type': 'include_any',
13-
'rules': [],
14-
}
15-
"""
16-
cls_map = {
17-
sub_cls.name: sub_cls
18-
for sub_cls in cls.__subclasses__()
19-
}
20-
21-
sub_cls = config['type']
22-
rules = config['rules']
23-
return cls_map[sub_cls](rules)
4+
_FILTERS = {}
245

25-
def validate(self, text):
26-
raise NotImplementedError
276

7+
def register_filter(name, override=False):
8+
def wrap(cls):
9+
global _FILTERS
10+
if name not in _FILTERS or override:
11+
_FILTERS[name] = cls
12+
13+
return cls
2814

29-
class IncludeAnyFilter(Filter):
15+
return wrap
3016

31-
name = 'include_any'
17+
18+
@register_filter('include_any')
19+
class IncludeAnyFilter(object):
3220

3321
def __init__(self, rules):
34-
self.rules = [re.compile(regexp) for regexp in rules]
22+
self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules]
3523

3624
def validate(self, text):
3725
for regexp in self.rules:
@@ -41,12 +29,11 @@ def validate(self, text):
4129
return False
4230

4331

44-
class IncludeAllFilter(Filter):
45-
46-
name = 'include_all'
32+
@register_filter('include_all')
33+
class IncludeAllFilter(object):
4734

4835
def __init__(self, rules):
49-
self.rules = [re.compile(regexp) for regexp in rules]
36+
self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules]
5037

5138
def validate(self, text):
5239
for regexp in self.rules:
@@ -56,16 +43,25 @@ def validate(self, text):
5643
return True
5744

5845

59-
class ExcludeFilter(Filter):
60-
61-
name = 'exclude'
46+
@register_filter('exclude')
47+
class ExcludeFilter(object):
6248

6349
def __init__(self, rules):
64-
self.rules = [re.compile(regexp) for regexp in rules]
50+
self.rules = [re.compile(regexp, re.IGNORECASE) for regexp in rules]
6551

6652
def validate(self, text):
6753
for regexp in self.rules:
6854
if regexp.findall(text):
6955
return False
7056

7157
return True
58+
59+
60+
def get_filter(config):
61+
filter_type = config['type']
62+
if filter_type not in _FILTERS:
63+
raise ValueError("unsupported filter type: {}".format(filter_type))
64+
65+
filter_cls = _FILTERS[filter_type]
66+
params = {k: v for k, v in config.items() if k != 'type'}
67+
return filter_cls(**params)

inoreader/main.py

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
import codecs
99
import argparse
1010
from datetime import datetime
11+
from collections import defaultdict
1112
from configparser import ConfigParser
1213

1314
import yaml
1415
from inoreader import InoreaderClient
15-
from inoreader.filter import Filter
16+
from inoreader.filter import get_filter
1617

1718

1819
APPID_ENV_NAME = 'INOREADER_APP_ID'
@@ -141,7 +142,7 @@ def add_unread_fetch_parser(subparsers):
141142
parser.add_argument("-o", "--outfile", required=True, help="Filename to save articles")
142143
parser.add_argument(
143144
"--out-format",
144-
choices=['json', 'csv', 'plain'],
145+
choices=['json', 'csv', 'plain', 'markdown', 'org-mode'],
145146
default='json',
146147
help='Format of output file, default: json'
147148
)
@@ -166,6 +167,12 @@ def fetch_unread(folder, tags, outfile, out_format):
166167
print('TITLE: {}'.format(title), file=fout)
167168
print("CONTENT: {}".format(text), file=fout)
168169
print(file=fout)
170+
elif out_format == 'markdown':
171+
print('# {}\n'.format(title), file=fout)
172+
print(text + '\n', file=fout)
173+
elif out_format == 'org-mode':
174+
print('* {}\n'.format(title), file=fout)
175+
print(text + '\n', file=fout)
169176

170177
print("[{}] fetched {} articles and saved them in {}".format(datetime.now(), idx + 1, outfile))
171178

@@ -174,13 +181,7 @@ def fetch_unread(folder, tags, outfile, out_format):
174181

175182
def add_filter_parser(subparsers):
176183
parser = subparsers.add_parser('filter', help='Select articles and do something')
177-
parser.add_argument("-f", "--folder", required=True, help='Folder which articles belong to')
178184
parser.add_argument("-r", "--rules", required=True, help='YAML file with your rules')
179-
parser.add_argument("-a", "--action", default='read',
180-
choices=['read', 'like', 'tag', 'broadcast', 'star'],
181-
help='Action you want to perform, default: read')
182-
parser.add_argument("-t", "--tags",
183-
help="Tag(s) to be used when action is 'tag', seprate with comma")
184185

185186

186187
def apply_action(articles, client, action, tags):
@@ -190,7 +191,7 @@ def apply_action(articles, client, action, tags):
190191

191192
for article in articles:
192193
print("Add tags [{}] on article: {}".format(tags, article.title))
193-
elif action == 'read':
194+
elif action == 'mark_as_read':
194195
client.mark_as_read(articles)
195196
for article in articles:
196197
print("Mark article as read: {}".format(article.title))
@@ -208,35 +209,70 @@ def apply_action(articles, client, action, tags):
208209
print("Starred article: {}".format(article.title))
209210

210211

211-
def filter_articles(folder, rules_file, action, tags):
212+
def filter_articles(rules_file):
212213
client = get_client()
213214
filters = []
214215
for rule in yaml.load(open(rules_file)):
216+
name = rule.get('name')
217+
folders = rule['folders']
218+
219+
fields = []
220+
# only 'title' or 'content' is supported now
221+
for field in rule.get('fields', ['title', 'content']):
222+
if field not in ('title', 'content'):
223+
continue
224+
fields.append(field)
225+
cur_filter = get_filter(rule['filter'])
226+
227+
actions = []
228+
# only 'mark_as_read', 'like', 'star', 'broadcast', 'tag' is supported now
229+
for action in rule.get('actions', [{'type': 'mark_as_read'}]):
230+
if action['type'] not in ('mark_as_read', 'like', 'star', 'broadcast', 'tag'):
231+
continue
232+
actions.append(action)
233+
215234
filters.append({
216-
'field': rule.get('field', 'title'),
217-
'filter': Filter.from_config(rule),
235+
'name': name,
236+
'folders': folders,
237+
'fields': fields,
238+
'filter': cur_filter,
239+
'actions': actions
218240
})
219241

220-
matched_articles = []
221-
for idx, article in enumerate(client.fetch_unread(folder=folder)):
222-
matched = False
223-
for article_filter in filters:
224-
if article_filter['field'] in ('title', 'title_or_content') and \
225-
article_filter['filter'].validate(article.title):
242+
articles_by_foler = {} # folder -> articles
243+
matched_articles = defaultdict(list) # action -> articles
244+
for rule in filters:
245+
articles = []
246+
for folder in rule['folders']:
247+
if folder not in articles_by_foler:
248+
articles_by_foler[folder] = list(client.fetch_unread(folder=folder))
249+
250+
articles.extend(articles_by_foler[folder])
251+
252+
# FIXME: deduplicate
253+
count = 0
254+
for article in articles:
255+
matched = False
256+
if 'title' in rule['fields'] and rule['filter'].validate(article.title):
226257
matched = True
227-
break
228-
if article_filter['field'] in ('content', 'title_or_content') and \
229-
article_filter['filter'].validate(article.text):
258+
if 'content' in rule['fields'] and rule['filter'].validate(article.text):
230259
matched = True
231-
break
232-
if matched:
233-
matched_articles.append(article)
234-
if len(matched_articles) == 10:
235-
apply_action(matched_articles, client, action, tags)
236-
matched_articles = []
237260

238-
if matched_articles:
239-
apply_action(matched_articles, client, action, tags)
261+
if matched:
262+
for action in rule['actions']:
263+
matched_articles[action['type']].append((article, action))
264+
265+
count += 1
266+
print("[{}] matched {} articles with filter: {}".format(
267+
datetime.now(), count, rule['name']))
268+
269+
for action_name in matched_articles:
270+
articles, actions = zip(*matched_articles[action_name])
271+
if action_name != 'tag':
272+
apply_action(articles, client, action_name, None)
273+
else:
274+
for article, action in zip(articles, actions):
275+
apply_action([article], client, 'tag', action['tags'])
240276

241277

242278
def main():
@@ -263,11 +299,7 @@ def main():
263299
elif args.command == 'fetch-unread':
264300
fetch_unread(args.folder, args.tags, args.outfile, args.out_format)
265301
elif args.command == 'filter':
266-
if args.action == 'tag' and not args.tags:
267-
print("Need at least one tag when action is 'tag'!")
268-
sys.exit(1)
269-
270-
filter_articles(args.folder, args.rules, args.action, args.tags)
302+
filter_articles(args.rules)
271303

272304

273305
if __name__ == '__main__':

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from setuptools import setup, find_packages
55

66

7-
VERSION = '0.2.0'
7+
VERSION = '0.2.1'
88
REQS = [
99
'lxml',
1010
'requests',

0 commit comments

Comments
 (0)