Skip to content

Commit c3d5c74

Browse files
committed
Fix s3 test
1 parent 4a4a597 commit c3d5c74

File tree

1 file changed

+60
-48
lines changed

1 file changed

+60
-48
lines changed

tests/processors/test_dump_to_s3.py

Lines changed: 60 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -298,51 +298,63 @@ def test_large_dump_s3():
298298
server = ThreadedMotoServer()
299299
server.start()
300300
os.environ["LAMINAR_S3_HOST"] = "http://localhost:5000"
301-
302-
# create bucket and put objects
303-
conn = boto3.client("s3", endpoint_url="http://localhost:5000")
304-
conn.create_bucket(Bucket="testing_bucket")
305-
conn.create_bucket(Bucket="testing_dump_bucket")
306-
f = io.StringIO()
307-
writer = csv.writer(f)
308-
header = [["a", "b", "c", "d"]]
309-
num_rows = 1000000
310-
rows = [[1, 2, 3, 4, 5]] * num_rows
311-
writer.writerows(header + rows)
312-
f.seek(0)
313-
f = io.BytesIO(f.read().encode("utf-8"))
314-
315-
conn.upload_fileobj(f, "testing_bucket", "test.csv")
316-
317-
flows = [
318-
load(
319-
{
320-
"from": "s3://testing_bucket/test.csv",
321-
"name": "res",
322-
"format": "csv",
323-
}
324-
),
325-
dump_to_s3(
326-
{
327-
"prefix": "test",
328-
"force-format": True,
329-
"format": "csv",
330-
"save_pipeline_spec": True,
331-
"temporal_format_property": "outputFormat",
332-
"bucket_name": "testing_dump_bucket",
333-
"data_manager": "test",
334-
}
335-
),
336-
]
337-
338-
rows, datapackage, _ = Flow(*flows).results()
339-
body = (
340-
conn.get_object(Bucket="testing_dump_bucket", Key="test/res.csv")["Body"]
341-
.read()
342-
.decode("utf-8")
343-
)
344-
345-
assert len(body) == 8000008
346-
assert len(datapackage.resources) == 1
347-
assert datapackage.descriptor["count_of_rows"] == num_rows
348-
server.stop()
301+
302+
# Disable checksum validation to fix moto compatibility issues
303+
# Monkey patch the checksum validation to avoid moto checksum mismatch
304+
import botocore.httpchecksum
305+
original_validate = botocore.httpchecksum.StreamingChecksumBody._validate_checksum
306+
def mock_validate_checksum(self):
307+
pass # Skip checksum validation for moto
308+
botocore.httpchecksum.StreamingChecksumBody._validate_checksum = mock_validate_checksum
309+
310+
try:
311+
# create bucket and put objects
312+
conn = boto3.client("s3", endpoint_url="http://localhost:5000")
313+
conn.create_bucket(Bucket="testing_bucket")
314+
conn.create_bucket(Bucket="testing_dump_bucket")
315+
f = io.StringIO()
316+
writer = csv.writer(f)
317+
header = [["a", "b", "c", "d"]]
318+
num_rows = 1000000
319+
rows = [[1, 2, 3, 4, 5]] * num_rows
320+
writer.writerows(header + rows)
321+
f.seek(0)
322+
f = io.BytesIO(f.read().encode("utf-8"))
323+
324+
conn.upload_fileobj(f, "testing_bucket", "test.csv")
325+
326+
flows = [
327+
load(
328+
{
329+
"from": "s3://testing_bucket/test.csv",
330+
"name": "res",
331+
"format": "csv",
332+
}
333+
),
334+
dump_to_s3(
335+
{
336+
"prefix": "test",
337+
"force-format": True,
338+
"format": "csv",
339+
"save_pipeline_spec": True,
340+
"temporal_format_property": "outputFormat",
341+
"bucket_name": "testing_dump_bucket",
342+
"data_manager": "test",
343+
}
344+
),
345+
]
346+
347+
rows, datapackage, _ = Flow(*flows).results()
348+
body = (
349+
conn.get_object(Bucket="testing_dump_bucket", Key="test/res.csv")["Body"]
350+
.read()
351+
.decode("utf-8")
352+
)
353+
354+
assert len(body) == 8000008
355+
assert len(datapackage.resources) == 1
356+
assert datapackage.descriptor["count_of_rows"] == num_rows
357+
finally:
358+
# Restore original checksum validation
359+
botocore.httpchecksum.StreamingChecksumBody._validate_checksum = original_validate
360+
server.stop()

0 commit comments

Comments
 (0)