1111from sdgym .s3 import get_s3_client , is_s3_path , parse_s3_path
1212
1313
14- def read_file (path , aws_key , aws_secret ):
14+ def read_file (path , aws_access_key_id , aws_secret_access_key ):
1515 """Read file from path.
1616
1717 The path can either be a local path or an s3 directory.
1818
1919 Args:
2020 path (str):
2121 The path to the file.
22- aws_key (str):
22+ aws_access_key_id (str):
2323 The access key id that will be used to communicate with s3, if provided.
24- aws_secret (str):
24+ aws_secret_access_key (str):
2525 The secret access key that will be used to communicate with s3, if provided.
2626
2727 Returns:
2828 bytes:
2929 The content of the file in bytes.
3030 """
3131 if is_s3_path (path ):
32- s3 = get_s3_client (aws_key , aws_secret )
32+ s3 = get_s3_client (aws_access_key_id , aws_secret_access_key )
3333 bucket_name , key = parse_s3_path (path )
3434 obj = s3 .get_object (Bucket = bucket_name , Key = key )
3535 contents = obj ['Body' ].read ()
@@ -40,28 +40,28 @@ def read_file(path, aws_key, aws_secret):
4040 return contents
4141
4242
43- def read_csv (path , aws_key , aws_secret ):
43+ def read_csv (path , aws_access_key_id , aws_secret_access_key ):
4444 """Read csv file from path.
4545
4646 The path can either be a local path or an s3 directory.
4747
4848 Args:
4949 path (str):
5050 The path to the csv file.
51- aws_key (str):
51+ aws_access_key_id (str):
5252 The access key id that will be used to communicate with s3, if provided.
53- aws_secret (str):
53+ aws_secret_access_key (str):
5454 The secret access key that will be used to communicate with s3, if provided.
5555
5656 Returns:
5757 pandas.DataFrame:
5858 A DataFrame containing the contents of the csv file.
5959 """
60- contents = read_file (path , aws_key , aws_secret )
60+ contents = read_file (path , aws_access_key_id , aws_secret_access_key )
6161 return pd .read_csv (io .BytesIO (contents ))
6262
6363
64- def read_csv_from_path (path , aws_key , aws_secret ):
64+ def read_csv_from_path (path , aws_access_key_id , aws_secret_access_key ):
6565 """Read all csv content within a path.
6666
6767 All csv content within a path will be read and returned in a
@@ -70,9 +70,9 @@ def read_csv_from_path(path, aws_key, aws_secret):
7070 Args:
7171 path (str):
7272 The path to read from, which can be either local or an s3 path.
73- aws_key (str):
73+ aws_access_key_id (str):
7474 The access key id that will be used to communicate with s3, if provided.
75- aws_secret (str):
75+ aws_secret_access_key (str):
7676 The secret access key that will be used to communicate with s3, if provided.
7777
7878 Returns:
@@ -81,13 +81,17 @@ def read_csv_from_path(path, aws_key, aws_secret):
8181 """
8282 csv_contents = []
8383 if is_s3_path (path ):
84- s3 = get_s3_client (aws_key , aws_secret )
84+ s3 = get_s3_client (aws_access_key_id , aws_secret_access_key )
8585 bucket_name , key_prefix = parse_s3_path (path )
8686 resp = s3 .list_objects (Bucket = bucket_name , Prefix = key_prefix )
8787 csv_files = [f for f in resp ['Contents' ] if f ['Key' ].endswith ('.csv' )]
8888 for csv_file in csv_files :
8989 csv_file_key = csv_file ['Key' ]
90- csv_contents .append (read_csv (f's3://{ bucket_name } /{ csv_file_key } ' , aws_key , aws_secret ))
90+ csv_contents .append (
91+ read_csv (
92+ f's3://{ bucket_name } /{ csv_file_key } ' , aws_access_key_id , aws_secret_access_key
93+ )
94+ )
9195
9296 else :
9397 run_path = pathlib .Path (path )
0 commit comments