@@ -390,41 +390,6 @@ def test_reconcile_collection_types(
390390# ---------------------------- PARQUET SPECIFICS ---------------------------------- #
391391
392392
393- @pytest .mark .parametrize ("validation" , ["warn" , "allow" , "forbid" , "skip" ])
394- @pytest .mark .parametrize ("lazy" , [True , False ])
395- @pytest .mark .parametrize (
396- "any_tmp_path" ,
397- ["tmp_path" , pytest .param ("s3_tmp_path" , marks = pytest .mark .s3 )],
398- indirect = True ,
399- )
400- def test_read_write_parquet_fallback_schema_json_success (
401- any_tmp_path : str , mocker : pytest_mock .MockerFixture , validation : Any , lazy : bool
402- ) -> None :
403- # In https://github.com/Quantco/dataframely/pull/107, the
404- # mechanism for storing collection metadata was changed.
405- # Prior to this change, the metadata was stored in a `schema.json` file.
406- # After this change, the metadata was moved into the parquet files.
407- # This test verifies that the change was implemented a backward compatible manner:
408- # The new code can still read parquet files that do not contain the metadata,
409- # and will not call `validate` if the `schema.json` file is present.
410-
411- # Arrange
412- tester = ParquetCollectionStorageTester ()
413- collection = MyCollection .create_empty ()
414- tester .write_untyped (collection , any_tmp_path , lazy )
415-
416- fs : AbstractFileSystem = url_to_fs (any_tmp_path )[0 ]
417- with fs .open (fs .sep .join ([any_tmp_path , "schema.json" ]), "w" ) as f :
418- f .write (collection .serialize ())
419-
420- # Act
421- spy = mocker .spy (MyCollection , "validate" )
422- tester .read (MyCollection , any_tmp_path , lazy , validation = validation )
423-
424- # Assert
425- spy .assert_not_called ()
426-
427-
428393@pytest .mark .parametrize ("validation" , ["allow" , "warn" ])
429394@pytest .mark .parametrize ("lazy" , [True , False ])
430395@pytest .mark .parametrize (
@@ -435,16 +400,17 @@ def test_read_write_parquet_fallback_schema_json_success(
435400def test_read_write_parquet_schema_json_fallback_corrupt (
436401 any_tmp_path : str , mocker : pytest_mock .MockerFixture , validation : Any , lazy : bool
437402) -> None :
438- """If the schema.json file is present, but corrupt, we should always fall back to
403+ """If schema information is present, but corrupt, we should always fall back to
439404 validating."""
440405 # Arrange
441406 collection = MyCollection .create_empty ()
442407 tester = ParquetCollectionStorageTester ()
443- tester .write_untyped (collection , any_tmp_path , lazy )
444-
445- fs : AbstractFileSystem = url_to_fs (any_tmp_path )[0 ]
446- with fs .open (fs .sep .join ([any_tmp_path , "schema.json" ]), "w" ) as f :
447- f .write ("} this is not a valid JSON {" )
408+ tester .write_untyped (
409+ collection ,
410+ any_tmp_path ,
411+ lazy ,
412+ metadata = {COLLECTION_METADATA_KEY : "} this is not a valid JSON {" },
413+ )
448414
449415 # Act
450416 spy = mocker .spy (MyCollection , "validate" )
0 commit comments