7
7
from django .utils import timezone as django_timezone
8
8
from django .core .files .base import ContentFile
9
9
from .models import RSSFeed , RSSItem
10
- import logging
11
-
12
- logger = logging .getLogger (__name__ )
13
10
14
11
def crawl_all_rss_feeds ():
15
12
"""모든 활성화된 RSS 피드를 크롤링합니다."""
@@ -28,12 +25,10 @@ def crawl_all_rss_feeds():
28
25
results ['processed_feeds' ] += 1
29
26
results ['new_items' ] += result .get ('new_items' , 0 )
30
27
logfire .info (f"Successfully crawled feed { feed .name } : { result .get ('new_items' , 0 )} new items" )
31
- logger .info (f"Successfully crawled feed { feed .name } : { result .get ('new_items' , 0 )} new items" )
32
28
except Exception as e :
33
29
error_msg = f"Error crawling feed { feed .name } : { str (e )} "
34
30
results ['errors' ].append (error_msg )
35
31
logfire .error (error_msg )
36
- logger .error (error_msg )
37
32
38
33
return results
39
34
@@ -46,15 +41,13 @@ def crawl_single_rss_feed(feed_id):
46
41
raise Exception (f"RSS Feed with id { feed_id } not found" )
47
42
48
43
logfire .info (f"Starting to crawl RSS feed: { feed .name } ({ feed .url } )" )
49
- logger .info (f"Starting to crawl RSS feed: { feed .name } ({ feed .url } )" )
50
44
51
45
try :
52
46
# RSS 피드 파싱
53
47
parsed_feed = feedparser .parse (feed .url )
54
48
55
49
if parsed_feed .bozo :
56
50
logfire .warning (f"RSS feed { feed .name } has parsing issues: { parsed_feed .bozo_exception } " )
57
- logger .warning (f"RSS feed { feed .name } has parsing issues: { parsed_feed .bozo_exception } " )
58
51
59
52
new_items_count = 0
60
53
@@ -65,7 +58,6 @@ def crawl_single_rss_feed(feed_id):
65
58
66
59
if not guid and not link :
67
60
logfire .warning (f"Skipping entry without GUID or link in feed { feed .name } " )
68
- logger .warning (f"Skipping entry without GUID or link in feed { feed .name } " )
69
61
continue
70
62
71
63
# 중복 체크
@@ -105,11 +97,9 @@ def crawl_single_rss_feed(feed_id):
105
97
)
106
98
new_items_count += 1
107
99
logfire .debug (f"Created new RSS item: { rss_item .title } " )
108
- logger .debug (f"Created new RSS item: { rss_item .title } " )
109
100
110
101
except Exception as e :
111
102
logfire .error (f"Error creating RSS item for { link } : { str (e )} " )
112
- logger .error (f"Error creating RSS item for { link } : { str (e )} " )
113
103
continue
114
104
115
105
# 마지막 크롤링 시간 업데이트
@@ -123,7 +113,6 @@ def crawl_single_rss_feed(feed_id):
123
113
}
124
114
125
115
logfire .info (f"Completed crawling { feed .name } : { new_items_count } new items out of { len (parsed_feed .entries )} total entries" )
126
- logger .info (f"Completed crawling { feed .name } : { new_items_count } new items out of { len (parsed_feed .entries )} total entries" )
127
116
return result
128
117
129
118
except requests .RequestException as e :
@@ -136,15 +125,13 @@ def crawl_single_rss_feed(feed_id):
136
125
def crawl_rss ():
137
126
"""10분마다 실행되는 RSS 크롤링 태스크"""
138
127
logfire .info ("start to crawl rss" )
139
- logger .info ("start to crawl rss" )
140
128
return crawl_all_rss_feeds ()
141
129
142
130
143
131
@shared_task
144
132
def crawl_rss_item_content ():
145
133
"""RSS 아이템의 본문을 크롤링하는 태스크 (10분마다 실행)"""
146
134
logfire .info ("Starting RSS item content crawling" )
147
- logger .info ("Starting RSS item content crawling" )
148
135
149
136
# 2주 이내의 크롤링되지 않은 최신 1개 아이템 가져오기
150
137
two_weeks_ago = django_timezone .now () - timedelta (days = 14 )
@@ -155,11 +142,9 @@ def crawl_rss_item_content():
155
142
156
143
if not pending_item :
157
144
logfire .info ("No pending RSS items to crawl" )
158
- logger .info ("No pending RSS items to crawl" )
159
145
return {"status" : "no_items" , "message" : "No pending items to crawl" }
160
146
161
147
logfire .info (f"Crawling RSS item: { pending_item .title } ({ pending_item .link } )" )
162
- logger .info (f"Crawling RSS item: { pending_item .title } ({ pending_item .link } )" )
163
148
164
149
# 크롤링 상태를 진행 중으로 변경 (동시 처리 방지)
165
150
pending_item .crawling_status = 'completed' # 임시로 설정하여 중복 처리 방지
@@ -186,7 +171,6 @@ def crawl_rss_item_content():
186
171
pending_item .save ()
187
172
188
173
logfire .info (f"Successfully crawled RSS item: { pending_item .title } " )
189
- logger .info (f"Successfully crawled RSS item: { pending_item .title } " )
190
174
191
175
return {
192
176
"status" : "success" ,
@@ -202,7 +186,6 @@ def crawl_rss_item_content():
202
186
pending_item .save (update_fields = ['crawling_status' , 'error_message' ])
203
187
204
188
logfire .error (error_msg )
205
- logger .error (error_msg )
206
189
207
190
return {
208
191
"status" : "failed" ,
@@ -217,7 +200,6 @@ def crawl_rss_item_content():
217
200
pending_item .save (update_fields = ['crawling_status' , 'error_message' ])
218
201
219
202
logfire .error (error_msg )
220
- logger .error (error_msg )
221
203
222
204
return {
223
205
"status" : "failed" ,
0 commit comments