66
77# useful for handling different item types with a single interface
88
9- from typing import Any , Union
9+ from typing import Any
1010
1111# don't remove
12- import v2ex_scrapy .insert_ignore
1312from v2ex_scrapy .DB import DB
1413from v2ex_scrapy .items import CommentItem , MemberItem , TopicItem , TopicSupplementItem
1514
16- ItemsType = Union [ TopicItem , CommentItem , MemberItem , TopicSupplementItem ]
15+ ItemsType = TopicItem | CommentItem | MemberItem | TopicSupplementItem
1716
1817
1918class TutorialScrapyPipeline :
@@ -31,22 +30,40 @@ def __init__(self):
3130
3231 def process_item (
3332 self ,
34- item : Union [ ItemsType , Any ] ,
33+ item : ItemsType | Any ,
3534 spider ,
3635 ):
3736 if isinstance (item , (TopicItem , CommentItem , MemberItem , TopicSupplementItem )):
3837 item_type = type (item )
3938 self .data [item_type ].append (item )
4039 if len (self .data [item_type ]) >= self .BATCH :
41- self .db . session . add_all (self .data [item_type ])
40+ self .process_it (self .data [item_type ])
4241 self .data [item_type ] = []
43- self .db .session .commit ()
4442 return item
4543
44+ def process_it (self , items : list [ItemsType ]):
45+ if len (items ) > 0 and isinstance (items [0 ], MemberItem ):
46+ self .process_members (items )
47+ else :
48+ self .db .session .add_all (items )
49+ self .db .session .commit ()
50+
51+ def process_members (self , items : list [MemberItem ]):
52+ for item in items :
53+ e = (
54+ self .db .session .query (MemberItem )
55+ .where (MemberItem .username == item .username )
56+ .first ()
57+ )
58+ if e is None :
59+ self .db .session .add_all ([item ])
60+ elif e .uid is None :
61+ e .uid = item .uid
62+ self .db .session .commit ()
63+
4664 def save_all (self ):
4765 for _ , v in self .data .items ():
48- self .db .session .add_all (v )
49- self .db .session .commit ()
66+ self .process_it (v )
5067
5168 def close_spider (self , spider ):
5269 self .save_all ()
0 commit comments