11
11
12
12
from .errorcode import (ER_NO_ADDITIONAL_CHUNK , ER_CHUNK_DOWNLOAD_FAILED )
13
13
from .errors import (Error , OperationalError )
14
- from .network import (SnowflakeRestful , NO_TOKEN )
14
+ from .network import (SnowflakeRestful , NO_TOKEN , MAX_CONNECTION_POOL )
15
15
16
16
DEFAULT_REQUEST_TIMEOUT = 300
17
17
DEFAULT_CLIENT_RESULT_PREFETCH_SLOTS = 2
32
32
'ready' # True if ready to consume or False
33
33
])
34
34
35
+ logger = getLogger (__name__ )
36
+
35
37
36
38
class SnowflakeChunkDownloader (object ):
37
39
u"""
38
40
Large Result set chunk downloader class.
39
41
"""
40
42
41
- def __init__ (self , chunks , connection , cursor , qrmk , chunk_headers ,
42
- prefetch_slots = DEFAULT_CLIENT_RESULT_PREFETCH_SLOTS ,
43
- prefetch_threads = DEFAULT_CLIENT_RESULT_PREFETCH_THREADS ,
44
- use_ijson = False ):
45
- self .logger = getLogger (__name__ )
43
+ def _pre_init (self , chunks , connection , cursor , qrmk , chunk_headers ,
44
+ prefetch_slots = DEFAULT_CLIENT_RESULT_PREFETCH_SLOTS ,
45
+ prefetch_threads = DEFAULT_CLIENT_RESULT_PREFETCH_THREADS ,
46
+ use_ijson = False ):
46
47
self ._use_ijson = use_ijson
47
48
self ._session = None
48
49
@@ -70,22 +71,22 @@ def __init__(self, chunks, connection, cursor, qrmk, chunk_headers,
70
71
self ._chunk_size )
71
72
72
73
for idx , chunk in enumerate (chunks ):
73
- self . logger .info (u"queued chunk: url=%s, rowCount=%s" ,
74
- chunk [u'url' ], chunk [u'rowCount' ])
74
+ logger .info (u"queued chunk: url=%s, rowCount=%s" ,
75
+ chunk [u'url' ], chunk [u'rowCount' ])
75
76
self ._chunks [idx ] = SnowflakeChunk (
76
77
url = chunk [u'url' ],
77
78
result_data = None ,
78
79
ready = False ,
79
80
row_count = int (chunk [u'rowCount' ]))
80
81
81
- self . logger .debug (u'prefetch slots: %s, '
82
- u'prefetch threads: %s, '
83
- u'number of chunks: %s, '
84
- u'effective threads: %s' ,
85
- self ._prefetch_slots ,
86
- self ._prefetch_threads ,
87
- self ._chunk_size ,
88
- self ._effective_threads )
82
+ logger .debug (u'prefetch slots: %s, '
83
+ u'prefetch threads: %s, '
84
+ u'number of chunks: %s, '
85
+ u'effective threads: %s' ,
86
+ self ._prefetch_slots ,
87
+ self ._prefetch_threads ,
88
+ self ._chunk_size ,
89
+ self ._effective_threads )
89
90
90
91
self ._pool = ThreadPool (self ._effective_threads )
91
92
@@ -94,6 +95,15 @@ def __init__(self, chunks, connection, cursor, qrmk, chunk_headers,
94
95
95
96
self ._next_chunk_to_consume = 0
96
97
98
+ def __init__ (self , chunks , connection , cursor , qrmk , chunk_headers ,
99
+ prefetch_slots = DEFAULT_CLIENT_RESULT_PREFETCH_SLOTS ,
100
+ prefetch_threads = DEFAULT_CLIENT_RESULT_PREFETCH_THREADS ,
101
+ use_ijson = False ):
102
+ self ._pre_init (chunks , connection , cursor , qrmk , chunk_headers ,
103
+ prefetch_slots = prefetch_slots ,
104
+ prefetch_threads = prefetch_threads ,
105
+ use_ijson = use_ijson )
106
+ logger .info ('Chunk Downloader in memory' )
97
107
for idx in range (self ._num_chunks_to_prefetch ):
98
108
self ._pool .apply_async (self ._download_chunk , [idx ])
99
109
self ._chunk_locks [idx ] = Condition ()
@@ -103,47 +113,48 @@ def _download_chunk(self, idx):
103
113
"""
104
114
Downloads a chunk asynchronously
105
115
"""
106
- self . logger .debug (u'downloading chunk %s/%s' , idx , self ._chunk_size )
116
+ logger .debug (u'downloading chunk %s/%s' , idx + 1 , self ._chunk_size )
107
117
headers = {}
108
118
try :
109
119
if self ._chunk_headers is not None :
110
120
headers = self ._chunk_headers
111
- self . logger .debug (u'use chunk headers from result' )
121
+ logger .debug (u'use chunk headers from result' )
112
122
elif self ._qrmk is not None :
113
123
headers [SSE_C_ALGORITHM ] = SSE_C_AES
114
124
headers [SSE_C_KEY ] = self ._qrmk
115
125
116
- self . logger .debug (u"started getting the result set %s: %s" ,
117
- idx + 1 , self ._chunks [idx ].url )
126
+ logger .debug (u"started getting the result set %s: %s" ,
127
+ idx + 1 , self ._chunks [idx ].url )
118
128
result_data = self ._get_request (
119
129
self ._chunks [idx ].url ,
120
- headers )
121
- self . logger .debug (u"finished getting the result set %s: %s" ,
122
- idx + 1 , self ._chunks [idx ].url )
130
+ headers , max_connection_pool = self . _effective_threads )
131
+ logger .debug (u"finished getting the result set %s: %s" ,
132
+ idx + 1 , self ._chunks [idx ].url )
123
133
124
134
with self ._chunk_locks [idx ]:
125
135
self ._chunks [idx ] = self ._chunks [idx ]._replace (
126
136
result_data = result_data ,
127
137
ready = True )
128
138
self ._chunk_locks [idx ].notify ()
129
- self . logger .debug (
139
+ logger .debug (
130
140
u'added chunk %s/%s to a chunk list.' , idx + 1 ,
131
141
self ._chunk_size )
132
142
except Exception as e :
133
- self .logger .exception (
134
- u'Failed to fetch the large result set chunk' )
143
+ logger .exception (
144
+ u'Failed to fetch the large result set chunk %s/%s' ,
145
+ idx + 1 , self ._chunk_size )
135
146
self ._downloader_error = e
136
147
137
148
def next_chunk (self ):
138
149
"""
139
150
Gets the next chunk if ready
140
151
"""
141
- self . logger .debug (
152
+ logger .debug (
142
153
u'next_chunk_to_consume={next_chunk_to_consume}, '
143
154
u'next_chunk_to_download={next_chunk_to_download}, '
144
155
u'total_chunks={total_chunks}' .format (
145
- next_chunk_to_consume = self ._next_chunk_to_consume ,
146
- next_chunk_to_download = self ._next_chunk_to_download ,
156
+ next_chunk_to_consume = self ._next_chunk_to_consume + 1 ,
157
+ next_chunk_to_download = self ._next_chunk_to_download + 1 ,
147
158
total_chunks = self ._chunk_size ))
148
159
if self ._next_chunk_to_consume > 0 :
149
160
# clean up the previously fetched data and lock
@@ -169,12 +180,12 @@ def next_chunk(self):
169
180
raise self ._downloader_error
170
181
171
182
for attempt in range (MAX_RETRY_DOWNLOAD ):
172
- self . logger .debug (u'waiting for chunk %s/%s'
173
- u' in %s/%s download attempt' ,
174
- self ._next_chunk_to_consume + 1 ,
175
- self ._chunk_size ,
176
- attempt + 1 ,
177
- MAX_RETRY_DOWNLOAD )
183
+ logger .debug (u'waiting for chunk %s/%s'
184
+ u' in %s/%s download attempt' ,
185
+ self ._next_chunk_to_consume + 1 ,
186
+ self ._chunk_size ,
187
+ attempt + 1 ,
188
+ MAX_RETRY_DOWNLOAD )
178
189
done = False
179
190
for wait_counter in range (MAX_WAIT ):
180
191
with self ._chunk_locks [self ._next_chunk_to_consume ]:
@@ -184,16 +195,16 @@ def next_chunk(self):
184
195
self ._downloader_error is not None :
185
196
done = True
186
197
break
187
- self . logger .debug (u'chunk %s/%s is NOT ready to consume'
188
- u' in %s/%s(s)' ,
189
- self ._next_chunk_to_consume + 1 ,
190
- self ._chunk_size ,
191
- (wait_counter + 1 ) * WAIT_TIME_IN_SECONDS ,
192
- MAX_WAIT * WAIT_TIME_IN_SECONDS )
198
+ logger .debug (u'chunk %s/%s is NOT ready to consume'
199
+ u' in %s/%s(s)' ,
200
+ self ._next_chunk_to_consume + 1 ,
201
+ self ._chunk_size ,
202
+ (wait_counter + 1 ) * WAIT_TIME_IN_SECONDS ,
203
+ MAX_WAIT * WAIT_TIME_IN_SECONDS )
193
204
self ._chunk_locks [self ._next_chunk_to_consume ].wait (
194
205
WAIT_TIME_IN_SECONDS )
195
206
else :
196
- self . logger .debug (
207
+ logger .debug (
197
208
u'chunk %s/%s is still NOT ready. Restarting chunk '
198
209
u'downloader threads' ,
199
210
self ._next_chunk_to_consume + 1 ,
@@ -216,9 +227,9 @@ def next_chunk(self):
216
227
u'unknown reason.' ,
217
228
u'errno' : ER_CHUNK_DOWNLOAD_FAILED
218
229
})
219
- self . logger .debug (u'chunk %s/%s is ready to consume' ,
220
- self ._next_chunk_to_consume + 1 ,
221
- self ._chunk_size )
230
+ logger .debug (u'chunk %s/%s is ready to consume' ,
231
+ self ._next_chunk_to_consume + 1 ,
232
+ self ._chunk_size )
222
233
223
234
ret = self ._chunks [self ._next_chunk_to_consume ]
224
235
self ._next_chunk_to_consume += 1
@@ -243,7 +254,11 @@ def __del__(self):
243
254
# ignore all errors in the destructor
244
255
pass
245
256
246
- def _get_request (self , url , headers , retry = 10 ):
257
+ def _get_request (
258
+ self , url , headers ,
259
+ is_raw_binary_iterator = True ,
260
+ max_connection_pool = MAX_CONNECTION_POOL ,
261
+ retry = 10 ):
247
262
"""
248
263
GET request for Large Result set chunkloader
249
264
"""
@@ -254,7 +269,7 @@ def _get_request(self, url, headers, retry=10):
254
269
self ._connection .rest ._proxy_user ,
255
270
self ._connection .rest ._proxy_password )
256
271
257
- self . logger .debug (u'proxies=%s, url=%s' , proxies , url )
272
+ logger .debug (u'proxies=%s, url=%s' , proxies , url )
258
273
259
274
return SnowflakeRestful .access_url (
260
275
self ._connection ,
@@ -270,4 +285,6 @@ def _get_request(self, url, headers, retry=10):
270
285
retry = retry ,
271
286
token = NO_TOKEN ,
272
287
is_raw_binary = True ,
288
+ is_raw_binary_iterator = is_raw_binary_iterator ,
289
+ max_connection_pool = max_connection_pool ,
273
290
use_ijson = self ._use_ijson )
0 commit comments