55
66import pytest
77
8+ import pandas as pd
9+ import pandas ._testing as tm
810
9- @pytest .fixture
11+
12+ @pytest .fixture (scope = "module" )
1013def directory_data ():
1114 return ["a" , "b" , "c" ], [
1215 {"first" : {"a" : 1 , "b" : 2 , "c" : 3 }},
@@ -28,10 +31,49 @@ def directory_data_to_file(tmp_path, directory_data):
2831 return tmp_path
2932
3033
34+ @pytest .fixture
35+ def s3_bucket_public_with_directory_data (s3_bucket_public , directory_data_to_file ):
36+ for file in directory_data_to_file .iterdir ():
37+ s3_bucket_public .put_object (
38+ Key = file .name ,
39+ Body = file .open ("rb" ),
40+ ContentType = "text/csv" ,
41+ )
42+ return s3_bucket_public
43+
44+
3145def test_directory_data (directory_data_to_file ):
3246 assert len (list (directory_data_to_file .iterdir ())) == 3
3347 for file in directory_data_to_file .iterdir ():
3448 with file .open (encoding = "utf-8" ) as f :
3549 reader = csv_reader (f )
3650 header = next (reader )
3751 assert header == ["a" , "b" , "c" ]
52+
53+
54+ def test_read_directory_local (directory_data , directory_data_to_file ):
55+ field_names , data_list = directory_data
56+ df_list = []
57+ for df in pd .read_csv (directory_data_to_file ):
58+ assert set (df .columns ) == set (field_names )
59+ df_list .append (df )
60+ assert len (df_list ) == len (data_list )
61+ df_concat = pd .concat (df_list , ignore_index = True )
62+ df_concat = df_concat .sort_values (by = list (df_concat .columns )).reset_index (drop = True )
63+ expected = pd .DataFrame ([value for data in data_list for value in data .values ()])
64+ tm .assert_frame_equal (df_concat , expected )
65+
66+
67+ def test_read_directory_s3 (s3_bucket_public_with_directory_data , s3so , directory_data ):
68+ _ , data_list = directory_data
69+ df_list = list (
70+ pd .read_csv (
71+ f"s3://{ s3_bucket_public_with_directory_data .name } /" ,
72+ storage_options = s3so ,
73+ )
74+ )
75+ assert len (df_list ) == 3
76+ df_concat = pd .concat (df_list , ignore_index = True )
77+ df_concat = df_concat .sort_values (by = list (df_concat .columns )).reset_index (drop = True )
78+ expected = pd .DataFrame ([value for data in data_list for value in data .values ()])
79+ tm .assert_frame_equal (df_concat , expected )
0 commit comments