Increase S3 tests coverage (#1909)

jaidisido · web-flow · commit 7f2050c9e401 · 2022-12-29T16:35:55.000Z
diff --git a/awswrangler/s3/_write_text.py b/awswrangler/s3/_write_text.py
@@ -3,7 +3,6 @@
 import csv
 import logging
 import uuid
-from distutils.version import LooseVersion
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import boto3
@@ -420,11 +419,7 @@ def to_csv(  # pylint: disable=too-many-arguments,too-many-locals,too-many-state
             "Pandas arguments in the function call and awswrangler will accept it."
             "e.g. wr.s3.to_csv(df, path, sep='|', na_rep='NULL', decimal=',', compression='gzip')"
         )
-    if pandas_kwargs.get("compression") and str(pd.__version__) < LooseVersion("1.2.0"):
-        raise exceptions.InvalidArgument(
-            f"CSV compression on S3 is not supported for Pandas version {pd.__version__}. "
-            "The minimum acceptable version to achive it is Pandas 1.2.0 that requires Python >=3.7.1."
-        )
+
     _validate_args(
         df=df,
         table=table,
@@ -885,11 +880,6 @@ def to_json(  # pylint: disable=too-many-arguments,too-many-locals,too-many-stat
             "Pandas arguments in the function call and awswrangler will accept it."
             "e.g. wr.s3.to_json(df, path, lines=True, date_format='iso')"
         )
-    if pandas_kwargs.get("compression") and str(pd.__version__) < LooseVersion("1.2.0"):
-        raise exceptions.InvalidArgument(
-            f"JSON compression on S3 is not supported for Pandas version {pd.__version__}. "
-            "The minimum acceptable version to achive it is Pandas 1.2.0 that requires Python >=3.7.1."
-        )
 
     _validate_args(
         df=df,
diff --git a/tests/test_s3_excel.py b/tests/test_s3_excel.py
@@ -1,5 +1,4 @@
 import logging
-import sys
 
 import pandas as pd
 import pytest
@@ -15,11 +14,15 @@ def test_excel(path, ext, use_threads):
     df = pd.DataFrame({"c0": [1, 2, 3], "c1": ["foo", "boo", "bar"]})
     file_path = f"{path}0.{ext}"
     pandas_kwargs = {}
-    if sys.version_info < (3, 7):
-        pandas_kwargs["engine"] = "xlwt" if ext == "xls" else "openpyxl"
+
+    with pytest.raises(wr.exceptions.InvalidArgument):
+        wr.s3.to_excel(df, file_path, use_threads=use_threads, index=False, pandas_kwargs=pandas_kwargs)
+
     wr.s3.to_excel(df, file_path, use_threads=use_threads, index=False, **pandas_kwargs)
-    if sys.version_info < (3, 7):
-        pandas_kwargs["engine"] = "xlrd" if ext == "xls" else "openpyxl"
+
+    with pytest.raises(wr.exceptions.InvalidArgument):
+        wr.s3.read_excel(file_path, use_threads=use_threads, pandas_kwargs=pandas_kwargs)
+
     df2 = wr.s3.read_excel(file_path, use_threads=use_threads, **pandas_kwargs)
     assert df.equals(df2)
 
@@ -28,8 +31,6 @@ def test_read_xlsx_versioned(path) -> None:
     path_file = f"{path}0.xlsx"
     dfs = [pd.DataFrame({"c0": [0, 1, 2], "c1": [3, 4, 5]}), pd.DataFrame({"c0": [3, 4, 5], "c1": [6, 7, 8]})]
     pandas_kwargs = {}
-    if sys.version_info < (3, 7):
-        pandas_kwargs["engine"] = "openpyxl"
     for df in dfs:
         wr.s3.to_excel(df=df, path=path_file, index=False, **pandas_kwargs)
         version_id = wr.s3.describe_objects(path=path_file)[path_file]["VersionId"]
diff --git a/tests/test_s3_select.py b/tests/test_s3_select.py
@@ -22,6 +22,7 @@ def test_full_table(path, use_threads):
         input_serialization="Parquet",
         input_serialization_params={},
         use_threads=use_threads,
+        s3_additional_kwargs={"RequestProgress": {"Enabled": False}},
     )
     assert df.equals(df2)
 
@@ -164,3 +165,23 @@ def test_encryption(path, kms_key_id, s3_additional_kwargs):
         use_threads=False,
     )
     assert df.equals(df2)
+
+
+def test_exceptions(path):
+    args = {
+        "sql": "select * from s3object",
+        "path": f"{path}/test.pq",
+        "input_serialization_params": {},
+    }
+
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        args.update({"input_serialization": "ORC"})
+        wr.s3.select_query(**args)
+
+    with pytest.raises(wr.exceptions.InvalidCompression):
+        args.update({"input_serialization": "Parquet", "compression": "zip"})
+        wr.s3.select_query(**args)
+
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        args.update({"compression": "gzip"})
+        wr.s3.select_query(**args)
diff --git a/tests/test_s3_text.py b/tests/test_s3_text.py
@@ -378,3 +378,33 @@ def test_to_csv_schema_evolution(path, glue_database, glue_table) -> None:
         wr.s3.to_csv(
             df=df, path=path_file, dataset=True, database=glue_database, table=glue_table, schema_evolution=False
         )
+
+
+def test_exceptions(path):
+    with pytest.raises(wr.exceptions.EmptyDataFrame):
+        wr.s3.to_json(df=pd.DataFrame(), path=path)
+
+    with pytest.raises(wr.exceptions.EmptyDataFrame):
+        wr.s3.to_csv(df=pd.DataFrame(), path=path)
+
+    df = pd.DataFrame({"c0": [1, 2], "c1": ["a", "b"]})
+    with pytest.raises(wr.exceptions.InvalidArgument):
+        wr.s3.to_csv(df=df, path=path, pandas_kwargs={})
+
+    with pytest.raises(wr.exceptions.InvalidArgument):
+        wr.s3.to_json(df=df, path=path, pandas_kwargs={})
+
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.s3.to_csv(df=df, path=path, dataset=False)
+
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_json(df=df, path=f"{path}test.pq", dataset=False, bucketing_info=(["c0"], 2))
+
+    with pytest.raises(wr.exceptions.InvalidArgumentValue):
+        wr.s3.to_csv(df=df, path=f"{path}test.pq", dataset=True, bucketing_info=(["c0"], -1))
+
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_json(df=df, path=f"{path}test.pq", dataset=True, database=None, table="test")
+
+    with pytest.raises(wr.exceptions.InvalidArgumentCombination):
+        wr.s3.to_csv(df=df, dataset=True)