Skip to content

Commit 7f2050c

Browse files
authored
Increase S3 tests coverage (#1909)
1 parent 5bce61e commit 7f2050c

File tree

4 files changed

+60
-18
lines changed

4 files changed

+60
-18
lines changed

awswrangler/s3/_write_text.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import csv
44
import logging
55
import uuid
6-
from distutils.version import LooseVersion
76
from typing import Any, Dict, List, Optional, Tuple, Union
87

98
import boto3
@@ -420,11 +419,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
420419
"Pandas arguments in the function call and awswrangler will accept it."
421420
"e.g. wr.s3.to_csv(df, path, sep='|', na_rep='NULL', decimal=',', compression='gzip')"
422421
)
423-
if pandas_kwargs.get("compression") and str(pd.__version__) < LooseVersion("1.2.0"):
424-
raise exceptions.InvalidArgument(
425-
f"CSV compression on S3 is not supported for Pandas version {pd.__version__}. "
426-
"The minimum acceptable version to achive it is Pandas 1.2.0 that requires Python >=3.7.1."
427-
)
422+
428423
_validate_args(
429424
df=df,
430425
table=table,
@@ -885,11 +880,6 @@ def to_json( # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
885880
"Pandas arguments in the function call and awswrangler will accept it."
886881
"e.g. wr.s3.to_json(df, path, lines=True, date_format='iso')"
887882
)
888-
if pandas_kwargs.get("compression") and str(pd.__version__) < LooseVersion("1.2.0"):
889-
raise exceptions.InvalidArgument(
890-
f"JSON compression on S3 is not supported for Pandas version {pd.__version__}. "
891-
"The minimum acceptable version to achive it is Pandas 1.2.0 that requires Python >=3.7.1."
892-
)
893883

894884
_validate_args(
895885
df=df,

tests/test_s3_excel.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
import sys
32

43
import pandas as pd
54
import pytest
@@ -15,11 +14,15 @@ def test_excel(path, ext, use_threads):
1514
df = pd.DataFrame({"c0": [1, 2, 3], "c1": ["foo", "boo", "bar"]})
1615
file_path = f"{path}0.{ext}"
1716
pandas_kwargs = {}
18-
if sys.version_info < (3, 7):
19-
pandas_kwargs["engine"] = "xlwt" if ext == "xls" else "openpyxl"
17+
18+
with pytest.raises(wr.exceptions.InvalidArgument):
19+
wr.s3.to_excel(df, file_path, use_threads=use_threads, index=False, pandas_kwargs=pandas_kwargs)
20+
2021
wr.s3.to_excel(df, file_path, use_threads=use_threads, index=False, **pandas_kwargs)
21-
if sys.version_info < (3, 7):
22-
pandas_kwargs["engine"] = "xlrd" if ext == "xls" else "openpyxl"
22+
23+
with pytest.raises(wr.exceptions.InvalidArgument):
24+
wr.s3.read_excel(file_path, use_threads=use_threads, pandas_kwargs=pandas_kwargs)
25+
2326
df2 = wr.s3.read_excel(file_path, use_threads=use_threads, **pandas_kwargs)
2427
assert df.equals(df2)
2528

@@ -28,8 +31,6 @@ def test_read_xlsx_versioned(path) -> None:
2831
path_file = f"{path}0.xlsx"
2932
dfs = [pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5]}), pd.DataFrame({"c0": [3, 4, 5], "c1": [6, 7, 8]})]
3033
pandas_kwargs = {}
31-
if sys.version_info < (3, 7):
32-
pandas_kwargs["engine"] = "openpyxl"
3334
for df in dfs:
3435
wr.s3.to_excel(df=df, path=path_file, index=False, **pandas_kwargs)
3536
version_id = wr.s3.describe_objects(path=path_file)[path_file]["VersionId"]

tests/test_s3_select.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def test_full_table(path, use_threads):
2222
input_serialization="Parquet",
2323
input_serialization_params={},
2424
use_threads=use_threads,
25+
s3_additional_kwargs={"RequestProgress": {"Enabled": False}},
2526
)
2627
assert df.equals(df2)
2728

@@ -164,3 +165,23 @@ def test_encryption(path, kms_key_id, s3_additional_kwargs):
164165
use_threads=False,
165166
)
166167
assert df.equals(df2)
168+
169+
170+
def test_exceptions(path):
171+
args = {
172+
"sql": "select * from s3object",
173+
"path": f"{path}/test.pq",
174+
"input_serialization_params": {},
175+
}
176+
177+
with pytest.raises(wr.exceptions.InvalidArgumentValue):
178+
args.update({"input_serialization": "ORC"})
179+
wr.s3.select_query(**args)
180+
181+
with pytest.raises(wr.exceptions.InvalidCompression):
182+
args.update({"input_serialization": "Parquet", "compression": "zip"})
183+
wr.s3.select_query(**args)
184+
185+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
186+
args.update({"compression": "gzip"})
187+
wr.s3.select_query(**args)

tests/test_s3_text.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,3 +378,33 @@ def test_to_csv_schema_evolution(path, glue_database, glue_table) -> None:
378378
wr.s3.to_csv(
379379
df=df, path=path_file, dataset=True, database=glue_database, table=glue_table, schema_evolution=False
380380
)
381+
382+
383+
def test_exceptions(path):
384+
with pytest.raises(wr.exceptions.EmptyDataFrame):
385+
wr.s3.to_json(df=pd.DataFrame(), path=path)
386+
387+
with pytest.raises(wr.exceptions.EmptyDataFrame):
388+
wr.s3.to_csv(df=pd.DataFrame(), path=path)
389+
390+
df = pd.DataFrame({"c0": [1, 2], "c1": ["a", "b"]})
391+
with pytest.raises(wr.exceptions.InvalidArgument):
392+
wr.s3.to_csv(df=df, path=path, pandas_kwargs={})
393+
394+
with pytest.raises(wr.exceptions.InvalidArgument):
395+
wr.s3.to_json(df=df, path=path, pandas_kwargs={})
396+
397+
with pytest.raises(wr.exceptions.InvalidArgumentValue):
398+
wr.s3.to_csv(df=df, path=path, dataset=False)
399+
400+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
401+
wr.s3.to_json(df=df, path=f"{path}test.pq", dataset=False, bucketing_info=(["c0"], 2))
402+
403+
with pytest.raises(wr.exceptions.InvalidArgumentValue):
404+
wr.s3.to_csv(df=df, path=f"{path}test.pq", dataset=True, bucketing_info=(["c0"], -1))
405+
406+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
407+
wr.s3.to_json(df=df, path=f"{path}test.pq", dataset=True, database=None, table="test")
408+
409+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
410+
wr.s3.to_csv(df=df, dataset=True)

0 commit comments

Comments
 (0)