Skip to content

Commit 0e9509d

Browse files
committed
SNOW-27306: removed generator and enumerate from row_to_python method to improve fetch performance. This change made fetch 60% faster.
1 parent 3da01f1 commit 0e9509d

File tree

6 files changed

+167
-120
lines changed

6 files changed

+167
-120
lines changed

converter.py

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,21 @@ def _generate_tzinfo_from_tzoffset(self, tzoffset_minutes):
8989
# FROM Snowflake to Python Objects
9090
#
9191
def to_python_method(self, type_name, row_type):
92+
ctx = {
93+
u'scale': row_type['scale'],
94+
}
9295
try:
9396
if self._use_numpy:
9497
return getattr(self, u'_{type_name}_numpy_to_python'.format(
95-
type_name=type_name)), None
98+
type_name=type_name)), ctx
9699
elif type_name == 'FIXED' and row_type['scale'] == 0:
97100
return self._FIXED_INT_to_python, None
98101
else:
99102
return getattr(self, u'_{type_name}_to_python'.format(
100-
type_name=type_name)), None
103+
type_name=type_name)), ctx
101104
except KeyError:
102105
# no type is defined
103-
return self._str_to_snowflake, None
106+
return self._TEXT_to_python, None
104107

105108
def _FIXED_INT_to_python(self, value, *_):
106109
return int(value)
@@ -144,10 +147,10 @@ def _DATE_numpy_to_python(self, value, *_):
144147
"""
145148
return numpy.datetime64(int(value), 'D')
146149

147-
def _extract_timestamp(self, value, col_desc, has_tz=False):
150+
def _extract_timestamp(self, value, ctx, has_tz=False):
148151
"""Extracts timstamp from a raw data
149152
"""
150-
scale = col_desc[5]
153+
scale = ctx['scale']
151154
try:
152155
value1 = decimal.Decimal(value)
153156
big_int = int(value1.scaleb(scale)) # removed fraction
@@ -167,7 +170,7 @@ def _extract_timestamp(self, value, col_desc, has_tz=False):
167170
except decimal.InvalidOperation:
168171
return None, None, None
169172

170-
def _pre_TIMESTAMP_TZ_to_python(self, value, col_desc):
173+
def _pre_TIMESTAMP_TZ_to_python(self, value, ctx):
171174
u"""
172175
try to split value by space for handling new timestamp with timezone
173176
encoding format which has timezone index separate from the timestamp
@@ -182,7 +185,7 @@ def _pre_TIMESTAMP_TZ_to_python(self, value, col_desc):
182185
value = valueComponents[0]
183186

184187
tzoffset, microseconds, fraction_of_nanoseconds, nanoseconds = \
185-
self._extract_timestamp(value, col_desc,
188+
self._extract_timestamp(value, ctx,
186189
has_tz=(tzoffset_extracted is None))
187190

188191
if tzoffset_extracted is not None:
@@ -198,34 +201,34 @@ def _pre_TIMESTAMP_TZ_to_python(self, value, col_desc):
198201
t += tzinfo_value.utcoffset(t, is_dst=False)
199202
return t.replace(tzinfo=tzinfo_value), fraction_of_nanoseconds
200203

201-
def _TIMESTAMP_TZ_to_python(self, value, col_desc, *_):
204+
def _TIMESTAMP_TZ_to_python(self, value, ctx):
202205
"""
203206
TIMESTAMP TZ to datetime
204207
205208
The timezone offset is piggybacked.
206209
"""
207-
t, _ = self._pre_TIMESTAMP_TZ_to_python(value, col_desc)
210+
t, _ = self._pre_TIMESTAMP_TZ_to_python(value, ctx)
208211
return t
209212

210-
def _TIMESTAMP_TZ_numpy_to_python(self, value, col_desc, *_):
213+
def _TIMESTAMP_TZ_numpy_to_python(self, value, ctx):
211214
"""TIMESTAMP TZ to datetime
212215
213216
The timezone offset is piggybacked.
214217
"""
215218
t, fraction_of_nanoseconds = self._pre_TIMESTAMP_TZ_to_python(
216-
value, col_desc)
219+
value, ctx)
217220
ts = int(time.mktime(t.timetuple())) * 1000000000 + int(
218221
fraction_of_nanoseconds)
219222
return numpy.datetime64(ts, 'ns')
220223

221-
def _pre_TIMESTAMP_LTZ_to_python(self, value, col_desc):
224+
def _pre_TIMESTAMP_LTZ_to_python(self, value, ctx):
222225
u""" TIMESTAMP LTZ to datetime
223226
224227
This takes consideration of the session parameter TIMEZONE if
225228
available. If not, tzlocal is used
226229
"""
227230
tzoffset, microseconds, fraction_of_nanoseconds, nanoseconds = \
228-
self._extract_timestamp(value, col_desc)
231+
self._extract_timestamp(value, ctx)
229232
if tzoffset is None:
230233
return None
231234
try:
@@ -251,39 +254,39 @@ def _pre_TIMESTAMP_LTZ_to_python(self, value, col_desc):
251254
t = time.gmtime(microseconds / float(1000000))
252255
return t, fraction_of_nanoseconds
253256

254-
def _TIMESTAMP_LTZ_to_python(self, value, col_desc, *_):
255-
t, _ = self._pre_TIMESTAMP_LTZ_to_python(value, col_desc)
257+
def _TIMESTAMP_LTZ_to_python(self, value, ctx):
258+
t, _ = self._pre_TIMESTAMP_LTZ_to_python(value, ctx)
256259
return t
257260

258-
def _TIMESTAMP_LTZ_numpy_to_python(self, value, col_desc, *_):
261+
def _TIMESTAMP_LTZ_numpy_to_python(self, value, ctx):
259262
t, fraction_of_nanoseconds = self._pre_TIMESTAMP_LTZ_to_python(
260-
value, col_desc)
263+
value, ctx)
261264
ts = int(time.mktime(t.timetuple())) * 1000000000 + int(
262265
fraction_of_nanoseconds)
263266
return numpy.datetime64(ts, 'ns')
264267

265268
_TIMESTAMP_to_python = _TIMESTAMP_LTZ_to_python
266269

267-
def _pre_TIMESTAMP_NTZ_to_python(self, value, col_desc):
270+
def _pre_TIMESTAMP_NTZ_to_python(self, value, ctx):
268271
"""TIMESTAMP NTZ to datetime
269272
270273
No timezone info is attached.
271274
"""
272275
tzoffset, microseconds, fraction_of_nanoseconds, nanoseconds = \
273-
self._extract_timestamp(value, col_desc)
276+
self._extract_timestamp(value, ctx)
274277

275278
if tzoffset is None:
276279
return None, None, None
277280

278281
return nanoseconds, microseconds, fraction_of_nanoseconds
279282

280-
def _TIMESTAMP_NTZ_to_python(self, value, col_desc, *_):
283+
def _TIMESTAMP_NTZ_to_python(self, value, ctx):
281284
"""
282285
TIMESTAMP NTZ to datetime
283286
284287
No timezone info is attached.
285288
"""
286-
_, microseconds, _ = self._pre_TIMESTAMP_NTZ_to_python(value, col_desc)
289+
_, microseconds, _ = self._pre_TIMESTAMP_NTZ_to_python(value, ctx)
287290
if microseconds is None:
288291
return None
289292

@@ -292,23 +295,23 @@ def _TIMESTAMP_NTZ_to_python(self, value, col_desc, *_):
292295
t = ZERO_EPOCH + timedelta(seconds=(microseconds / float(1000000)))
293296
return t
294297

295-
def _TIMESTAMP_NTZ_numpy_to_python(self, value, col_desc, *_):
298+
def _TIMESTAMP_NTZ_numpy_to_python(self, value, ctx):
296299
"""
297300
TIMESTAMP NTZ to datetime64
298301
299302
No timezone info is attached.
300303
"""
301-
nanoseconds, _, _ = self._pre_TIMESTAMP_NTZ_to_python(value, col_desc)
304+
nanoseconds, _, _ = self._pre_TIMESTAMP_NTZ_to_python(value, ctx)
302305
return numpy.datetime64(nanoseconds, 'ns')
303306

304-
def _extract_time(self, value, col_desc):
307+
def _extract_time(self, value, ctx):
305308
u"""Extracts time from raw data
306309
307310
Returns a pair containing microseconds since midnight and nanoseconds
308311
since the last whole-numebr second. The last 6 digits of microseconds
309312
will be the same as the first 6 digits of nanoseconds.
310313
"""
311-
scale = col_desc[5]
314+
scale = ctx['scale']
312315
try:
313316
value1 = decimal.Decimal(value)
314317
big_int = int(value1.scaleb(scale)) # removed fraction
@@ -321,13 +324,13 @@ def _extract_time(self, value, col_desc):
321324
except decimal.InvalidOperation:
322325
return None, None
323326

324-
def _TIME_to_python(self, value, col_desc, *_):
327+
def _TIME_to_python(self, value, ctx):
325328
"""
326329
TIME to formatted string, SnowflakeDateTime, or datetime.time
327330
328331
No timezone is attached.
329332
"""
330-
microseconds, _ = self._extract_time(value, col_desc)
333+
microseconds, _ = self._extract_time(value, ctx)
331334
ts = ZERO_EPOCH + timedelta(seconds=(microseconds / float(1000000)))
332335
return ts.time()
333336

@@ -338,13 +341,13 @@ def _VARIANT_to_python(self, value, *_):
338341

339342
_VARIANT_numpy_to_python = _VARIANT_to_python
340343

341-
def _OBJECT_to_python(self, value, col_desc, *_):
342-
return self._VARIANT_to_python(value, col_desc)
344+
def _OBJECT_to_python(self, value, *_):
345+
return self._VARIANT_to_python(value)
343346

344347
_OBJECT_numpy_to_python = _OBJECT_to_python
345348

346-
def _ARRAY_to_python(self, value, col_desc, *_):
347-
return self._VARIANT_to_python(value, col_desc)
349+
def _ARRAY_to_python(self, value, *_):
350+
return self._VARIANT_to_python(value)
348351

349352
_ARRAY_numpy_to_python = _ARRAY_to_python
350353

converter_snowsql.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def _get_format(self, type_name):
5656
# FROM Snowflake to Python objects
5757
#
5858
def to_python_method(self, type_name, row_type):
59+
ctx = {
60+
u'scale': row_type['scale'],
61+
}
5962
try:
6063
fmt = None
6164
if is_timestamp_type_name(type_name):
@@ -64,8 +67,9 @@ def to_python_method(self, type_name, row_type):
6467
datetime_class=SnowflakeDateTime)
6568
elif type_name == u'BINARY':
6669
fmt = SnowflakeBinaryFormat(self._get_format(type_name))
70+
ctx['fmt'] = fmt
6771
return getattr(self, u'_{type_name}_to_python'.format(
68-
type_name=type_name)), fmt
72+
type_name=type_name)), ctx
6973
except KeyError:
7074
# no type is defined, pass through it
7175
return self._TEXT_to_python, None
@@ -88,18 +92,19 @@ def _REAL_to_python(self, value, *_):
8892
"""
8993
return value
9094

91-
def _BINARY_to_python(self, value, _, fmt):
95+
def _BINARY_to_python(self, value, ctx):
9296
"""
9397
BINARY to a string formatted by BINARY_OUTPUT_FORMAT
9498
"""
95-
return fmt.format(binary_to_python(value))
99+
return ctx['fmt'].format(binary_to_python(value))
96100

97-
def _DATE_to_python(self, value, _, fmt):
101+
def _DATE_to_python(self, value, ctx):
98102
"""
99103
DATE to datetime
100104
101105
No timezone is attached.
102106
"""
107+
fmt = ctx['fmt']
103108
try:
104109
t = ZERO_EPOCH + timedelta(seconds=int(value) * (24 * 60 * 60))
105110
if fmt:
@@ -118,29 +123,29 @@ def _DATE_to_python(self, value, _, fmt):
118123
return u'{year:d}-{month:02d}-{day:02d}'.format(
119124
year=t.tm_year, month=t.tm_mon, day=t.tm_mday)
120125

121-
def _TIMESTAMP_TZ_to_python(self, value, col_desc, fmt):
126+
def _TIMESTAMP_TZ_to_python(self, value, ctx):
122127
"""
123128
TIMESTAMP TZ to datetime
124129
125130
The timezone offset is piggybacked.
126131
"""
127132
t, fraction_of_nanoseconds = self._pre_TIMESTAMP_TZ_to_python(
128-
value, col_desc)
129-
return _format_sftimestamp(fmt, t, fraction_of_nanoseconds)
133+
value, ctx)
134+
return _format_sftimestamp(ctx['fmt'], t, fraction_of_nanoseconds)
130135

131-
def _TIMESTAMP_LTZ_to_python(self, value, col_desc, fmt):
136+
def _TIMESTAMP_LTZ_to_python(self, value, ctx):
132137
t, fraction_of_nanoseconds = self._pre_TIMESTAMP_LTZ_to_python(
133-
value, col_desc)
134-
return _format_sftimestamp(fmt, t, fraction_of_nanoseconds)
138+
value, ctx)
139+
return _format_sftimestamp(ctx['fmt'], t, fraction_of_nanoseconds)
135140

136-
def _TIMESTAMP_NTZ_to_python(self, value, col_desc, fmt):
141+
def _TIMESTAMP_NTZ_to_python(self, value, ctx):
137142
"""
138143
TIMESTAMP NTZ to Snowflake Formatted String
139144
140145
No timezone info is attached.
141146
"""
142147
_, microseconds, fraction_of_nanoseconds = \
143-
self._pre_TIMESTAMP_NTZ_to_python(value, col_desc)
148+
self._pre_TIMESTAMP_NTZ_to_python(value, ctx)
144149
if microseconds is None:
145150
return None
146151
try:
@@ -151,16 +156,16 @@ def _TIMESTAMP_NTZ_to_python(self, value, col_desc, fmt):
151156
"ms). Falling back to use struct_time.",
152157
microseconds)
153158
t = time.gmtime(microseconds / float(1000000))
154-
return _format_sftimestamp(fmt, t, fraction_of_nanoseconds)
159+
return _format_sftimestamp(ctx['fmt'], t, fraction_of_nanoseconds)
155160

156-
def _TIME_to_python(self, value, col_desc, fmt):
161+
def _TIME_to_python(self, value, ctx):
157162
"""
158163
TIME to formatted string, SnowflakeDateTime, or datetime.time
159164
160165
No timezone is attached.
161166
"""
162167
microseconds, fraction_of_nanoseconds = \
163-
self._extract_time(value, col_desc)
168+
self._extract_time(value, ctx)
164169

165170
try:
166171
t = ZERO_EPOCH + timedelta(seconds=(microseconds / float(1000000)))
@@ -170,4 +175,4 @@ def _TIME_to_python(self, value, col_desc, fmt):
170175
"ms). Falling back to use struct_time.",
171176
microseconds)
172177
t = time.gmtime(microseconds / float(1000000))
173-
return _format_sftimestamp(fmt, t, fraction_of_nanoseconds)
178+
return _format_sftimestamp(ctx['fmt'], t, fraction_of_nanoseconds)

0 commit comments

Comments
 (0)