@@ -298,51 +298,63 @@ def test_large_dump_s3():
298298 server = ThreadedMotoServer ()
299299 server .start ()
300300 os .environ ["LAMINAR_S3_HOST" ] = "http://localhost:5000"
301-
302- # create bucket and put objects
303- conn = boto3 .client ("s3" , endpoint_url = "http://localhost:5000" )
304- conn .create_bucket (Bucket = "testing_bucket" )
305- conn .create_bucket (Bucket = "testing_dump_bucket" )
306- f = io .StringIO ()
307- writer = csv .writer (f )
308- header = [["a" , "b" , "c" , "d" ]]
309- num_rows = 1000000
310- rows = [[1 , 2 , 3 , 4 , 5 ]] * num_rows
311- writer .writerows (header + rows )
312- f .seek (0 )
313- f = io .BytesIO (f .read ().encode ("utf-8" ))
314-
315- conn .upload_fileobj (f , "testing_bucket" , "test.csv" )
316-
317- flows = [
318- load (
319- {
320- "from" : "s3://testing_bucket/test.csv" ,
321- "name" : "res" ,
322- "format" : "csv" ,
323- }
324- ),
325- dump_to_s3 (
326- {
327- "prefix" : "test" ,
328- "force-format" : True ,
329- "format" : "csv" ,
330- "save_pipeline_spec" : True ,
331- "temporal_format_property" : "outputFormat" ,
332- "bucket_name" : "testing_dump_bucket" ,
333- "data_manager" : "test" ,
334- }
335- ),
336- ]
337-
338- rows , datapackage , _ = Flow (* flows ).results ()
339- body = (
340- conn .get_object (Bucket = "testing_dump_bucket" , Key = "test/res.csv" )["Body" ]
341- .read ()
342- .decode ("utf-8" )
343- )
344-
345- assert len (body ) == 8000008
346- assert len (datapackage .resources ) == 1
347- assert datapackage .descriptor ["count_of_rows" ] == num_rows
348- server .stop ()
301+
302+ # Disable checksum validation to fix moto compatibility issues
303+ # Monkey patch the checksum validation to avoid moto checksum mismatch
304+ import botocore .httpchecksum
305+ original_validate = botocore .httpchecksum .StreamingChecksumBody ._validate_checksum
306+ def mock_validate_checksum (self ):
307+ pass # Skip checksum validation for moto
308+ botocore .httpchecksum .StreamingChecksumBody ._validate_checksum = mock_validate_checksum
309+
310+ try :
311+ # create bucket and put objects
312+ conn = boto3 .client ("s3" , endpoint_url = "http://localhost:5000" )
313+ conn .create_bucket (Bucket = "testing_bucket" )
314+ conn .create_bucket (Bucket = "testing_dump_bucket" )
315+ f = io .StringIO ()
316+ writer = csv .writer (f )
317+ header = [["a" , "b" , "c" , "d" ]]
318+ num_rows = 1000000
319+ rows = [[1 , 2 , 3 , 4 , 5 ]] * num_rows
320+ writer .writerows (header + rows )
321+ f .seek (0 )
322+ f = io .BytesIO (f .read ().encode ("utf-8" ))
323+
324+ conn .upload_fileobj (f , "testing_bucket" , "test.csv" )
325+
326+ flows = [
327+ load (
328+ {
329+ "from" : "s3://testing_bucket/test.csv" ,
330+ "name" : "res" ,
331+ "format" : "csv" ,
332+ }
333+ ),
334+ dump_to_s3 (
335+ {
336+ "prefix" : "test" ,
337+ "force-format" : True ,
338+ "format" : "csv" ,
339+ "save_pipeline_spec" : True ,
340+ "temporal_format_property" : "outputFormat" ,
341+ "bucket_name" : "testing_dump_bucket" ,
342+ "data_manager" : "test" ,
343+ }
344+ ),
345+ ]
346+
347+ rows , datapackage , _ = Flow (* flows ).results ()
348+ body = (
349+ conn .get_object (Bucket = "testing_dump_bucket" , Key = "test/res.csv" )["Body" ]
350+ .read ()
351+ .decode ("utf-8" )
352+ )
353+
354+ assert len (body ) == 8000008
355+ assert len (datapackage .resources ) == 1
356+ assert datapackage .descriptor ["count_of_rows" ] == num_rows
357+ finally :
358+ # Restore original checksum validation
359+ botocore .httpchecksum .StreamingChecksumBody ._validate_checksum = original_validate
360+ server .stop ()
0 commit comments