11#! python3
22
3+ from collections import namedtuple
34from contextlib import contextmanager
45from pathlib import Path
56from threading import Lock
910import time
1011import json
1112
12- import enlighten
1313from worker import async_ , await_ , sleep , Defer
1414# from urllib3.util import is_fp_closed
1515from urllib3 .exceptions import IncompleteRead
2020from .profile import get as profile
2121from .session_manager import session_manager
2222from .filename_ext import get_ext
23+ from .channel import request_ch
2324
2425cooldown = {}
2526grabber_pool = {}
2627grabber_pool_lock = Lock ()
27- pb_manager = enlighten .get_manager ()
2828
2929@contextmanager
3030def get_request_lock (url ):
@@ -74,6 +74,8 @@ def grabber_log(obj):
7474 content = time .strftime ("%Y-%m-%dT%H:%M:%S%z" ) + "\n " + json .dumps (obj , indent = 2 , sort_keys = True ) + "\n \n "
7575 content_write (profile ("grabber.log" ), content , append = True )
7676
77+ inc_request_id = 1
78+
7779def grabber (url , * , referer = None , retry = False , done = None , proxy = None , ** kwargs ):
7880 """Request url, return text or bytes of the content."""
7981 s = session_manager .get (url )
@@ -91,6 +93,10 @@ def grabber(url, *, referer=None, retry=False, done=None, proxy=None, **kwargs):
9193 if done :
9294 done (s , r )
9395
96+ global inc_request_id
97+ r .request_id = inc_request_id
98+ inc_request_id += 1
99+
94100 return r
95101
96102RETRYABLE_HTTP_CODES = (423 , 429 , 503 )
@@ -149,6 +155,8 @@ def iter_content(r):
149155 """Iterate the content of the response."""
150156 yield from r .iter_content ()
151157
158+ RequestProgress = namedtuple ("RequestProgress" , "id loaded total hostname" , defaults = (None , None , None ))
159+
152160def grabimg (* args , on_opened = None , tempfile = None , headers = None , ** kwargs ):
153161 """Grab the image. Return ImgResult"""
154162 kwargs ["stream" ] = True
@@ -192,23 +200,23 @@ def grabimg(*args, on_opened=None, tempfile=None, headers=None, **kwargs):
192200 def _ ():
193201 nonlocal loaded
194202 u = urlparse (r .url )
195- with pb_manager .counter (total = total , unit = "b" , leave = False , desc = u .hostname ) as counter :
196- counter .update (loaded )
197- if tempfile :
198- Path (tempfile ).parent .mkdir (parents = True , exist_ok = True )
199- mode = "ab" if loaded else "wb"
200- with open (tempfile , mode = mode ) as f :
201- for chunk in iter_content (r ):
202- f .write (chunk )
203- counter .update (len (chunk ))
204- loaded += len (chunk )
205- else :
203+ request_ch .pub ("REQUEST_START" , data = RequestProgress (id = r .request_id , loaded = loaded , total = total , hostname = u .hostname ))
204+ if tempfile :
205+ Path (tempfile ).parent .mkdir (parents = True , exist_ok = True )
206+ mode = "ab" if loaded else "wb"
207+ with open (tempfile , mode = mode ) as f :
206208 for chunk in iter_content (r ):
207- content_list .append (chunk )
208- counter .update (len (chunk ))
209+ f .write (chunk )
209210 loaded += len (chunk )
211+ request_ch .pub ("REQUEST_PROGRESS" , data = RequestProgress (id = r .request_id , loaded = loaded ))
212+ else :
213+ for chunk in iter_content (r ):
214+ content_list .append (chunk )
215+ loaded += len (chunk )
216+ request_ch .pub ("REQUEST_PROGRESS" , data = RequestProgress (id = r .request_id , loaded = loaded ))
210217 finally :
211218 # FIXME: is it safe to always close the connection?
219+ request_ch .pub ("REQUEST_END" , data = RequestProgress (id = r .request_id ))
212220 r .close ()
213221 if total and loaded < total :
214222 raise IncompleteRead (loaded , total - loaded )
0 commit comments