Skip to content

Commit 4b1d474

Browse files
authored
SNOW-749141: Fix uploading a file > 200M when the same filename already exists in a stage (#1470)
1 parent 0894b78 commit 4b1d474

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

DESCRIPTION.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
1010

1111
- v3.0.2(Unreleased)
1212

13-
- Fixed a memory leak in the logging module of the Cython extension
13+
- Fixed a memory leak in the logging module of the Cython extension.
14+
- Fixed a bug where the `put` command on AWS raised `AttributeError` when the file size was larger than 200M.
1415

1516
- v3.0.1(February 28, 2023)
1617

src/snowflake/connector/storage_client.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ def get_file_header(self, filename: str) -> FileHeader | None:
185185
def preprocess(self) -> None:
186186
meta = self.meta
187187
logger.debug(f"Preprocessing {meta.src_file_name}")
188-
189188
if not meta.overwrite:
189+
self.get_digest() # self.get_file_header needs digest for multiparts upload when aws is used.
190190
self.get_file_header(meta.dst_file_name) # Check if file exists on remote
191191
if meta.result_status == ResultStatus.UPLOADED:
192192
# Skipped
@@ -202,7 +202,6 @@ def preprocess(self) -> None:
202202
if meta.require_compress:
203203
self.compress()
204204
self.get_digest()
205-
206205
self.preprocessed = True
207206

208207
def prepare_upload(self) -> None:

test/integ/test_large_put.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,26 @@ def mocked_file_agent(*args, **kwargs):
5656
"snowflake.connector.cursor.SnowflakeFileTransferAgent",
5757
side_effect=mocked_file_agent,
5858
):
59+
# upload with auto compress = True
5960
cnx.cursor().execute(
60-
f"put 'file://{files}' @%{db_parameters['name']}",
61+
f"put 'file://{files}' @%{db_parameters['name']} auto_compress=True",
6162
)
6263
assert mocked_file_agent.agent._multipart_threshold == 10000
64+
cnx.cursor().execute(f"remove @%{db_parameters['name']}")
65+
66+
# upload with auto compress = False
67+
cnx.cursor().execute(
68+
f"put 'file://{files}' @%{db_parameters['name']} auto_compress=False",
69+
)
70+
assert mocked_file_agent.agent._multipart_threshold == 10000
71+
72+
# Upload again. There was a bug when a large file is uploaded again while it already exists in a stage.
73+
# Refer to preprocess(self) of storage_client.py.
74+
# self.get_digest() needs to be called before self.get_file_header(meta.dst_file_name).
75+
# SNOW-749141
76+
cnx.cursor().execute(
77+
f"put 'file://{files}' @%{db_parameters['name']} auto_compress=False",
78+
) # do not add `overwrite=True` because overwrite will skip the code path to extract file header.
6379

6480
c = cnx.cursor()
6581
try:

0 commit comments

Comments
 (0)