5
5
6
6
import pytest
7
7
8
+ import pandas as pd
9
+ import pandas ._testing as tm
8
10
9
- @pytest .fixture
11
+
12
+ @pytest .fixture (scope = "module" )
10
13
def directory_data ():
11
14
return ["a" , "b" , "c" ], [
12
15
{"first" : {"a" : 1 , "b" : 2 , "c" : 3 }},
@@ -28,10 +31,49 @@ def directory_data_to_file(tmp_path, directory_data):
28
31
return tmp_path
29
32
30
33
34
+ @pytest .fixture
35
+ def s3_bucket_public_with_directory_data (s3_bucket_public , directory_data_to_file ):
36
+ for file in directory_data_to_file .iterdir ():
37
+ s3_bucket_public .put_object (
38
+ Key = file .name ,
39
+ Body = file .open ("rb" ),
40
+ ContentType = "text/csv" ,
41
+ )
42
+ return s3_bucket_public
43
+
44
+
31
45
def test_directory_data (directory_data_to_file ):
32
46
assert len (list (directory_data_to_file .iterdir ())) == 3
33
47
for file in directory_data_to_file .iterdir ():
34
48
with file .open (encoding = "utf-8" ) as f :
35
49
reader = csv_reader (f )
36
50
header = next (reader )
37
51
assert header == ["a" , "b" , "c" ]
52
+
53
+
54
+ def test_read_directory_local (directory_data , directory_data_to_file ):
55
+ field_names , data_list = directory_data
56
+ df_list = []
57
+ for df in pd .read_csv (directory_data_to_file ):
58
+ assert set (df .columns ) == set (field_names )
59
+ df_list .append (df )
60
+ assert len (df_list ) == len (data_list )
61
+ df_concat = pd .concat (df_list , ignore_index = True )
62
+ df_concat = df_concat .sort_values (by = list (df_concat .columns )).reset_index (drop = True )
63
+ expected = pd .DataFrame ([value for data in data_list for value in data .values ()])
64
+ tm .assert_frame_equal (df_concat , expected )
65
+
66
+
67
+ def test_read_directory_s3 (s3_bucket_public_with_directory_data , s3so , directory_data ):
68
+ _ , data_list = directory_data
69
+ df_list = list (
70
+ pd .read_csv (
71
+ f"s3://{ s3_bucket_public_with_directory_data .name } /" ,
72
+ storage_options = s3so ,
73
+ )
74
+ )
75
+ assert len (df_list ) == 3
76
+ df_concat = pd .concat (df_list , ignore_index = True )
77
+ df_concat = df_concat .sort_values (by = list (df_concat .columns )).reset_index (drop = True )
78
+ expected = pd .DataFrame ([value for data in data_list for value in data .values ()])
79
+ tm .assert_frame_equal (df_concat , expected )
0 commit comments