Improving tests for ctas_approach

igorborgest · igorborgest · commit f2709a0a1223 · 2019-12-12T15:08:12.000-03:00
diff --git a/awswrangler/pandas.py b/awswrangler/pandas.py
@@ -528,7 +528,7 @@ def read_sql_athena(self,
         :param max_result_size: Max number of bytes on each request to S3 (VALID ONLY FOR ctas_approach=False)
         :return: Pandas Dataframe or Iterator of Pandas Dataframes if max_result_size was passed
         """
-        ctas_approach = ctas_approach if ctas_approach is not None else self._session.ctas_approach if self._session.ctas_approach is not None else False
+        ctas_approach = ctas_approach if ctas_approach is not None else self._session.athena_ctas_approach if self._session.athena_ctas_approach is not None else False
         if ctas_approach is True and max_result_size is not None:
             raise InvalidParameters("ctas_approach can't use max_result_size!")
         if s3_output is None:
diff --git a/testing/test_awswrangler/test_pandas.py b/testing/test_awswrangler/test_pandas.py
@@ -1442,6 +1442,7 @@ def test_read_table(session, bucket, database):
                               preserve_index=False,
                               procs_cpu_bound=1)
     df2 = session.pandas.read_table(database=database, table="test")
+    session.s3.delete_objects(path=path)
     assert len(list(df.columns)) == len(list(df2.columns))
     assert len(df.index) == len(df2.index)
 
@@ -1465,7 +1466,7 @@ def test_read_table2(session, bucket, database):
                                                                                                            3)]],
         "partition": [0, 0, 1]
     })
-    path = f"s3://{bucket}/test_read_table/"
+    path = f"s3://{bucket}/test_read_table2/"
     session.pandas.to_parquet(dataframe=df,
                               database=database,
                               table="test",
@@ -1474,8 +1475,9 @@ def test_read_table2(session, bucket, database):
                               preserve_index=False,
                               procs_cpu_bound=4,
                               partition_cols=["partition"])
-    sleep(5)
+    sleep(15)
     df2 = session.pandas.read_table(database=database, table="test")
+    session.s3.delete_objects(path=path)
     assert len(list(df.columns)) == len(list(df2.columns))
     assert len(df.index) == len(df2.index)
 
diff --git a/testing/test_awswrangler/test_redshift.py b/testing/test_awswrangler/test_redshift.py
@@ -510,7 +510,9 @@ def test_to_redshift_spark_decimal(session, bucket, redshift_parameters):
             assert row[2] == Decimal((0, (1, 9, 0, 0, 0, 0), -5))
 
 
-def test_to_parquet(bucket, redshift_parameters):
+def test_to_parquet(session, bucket, redshift_parameters):
+    n: int = 1_000_000
+    df = pd.DataFrame({"id": list((range(n))), "name": list(["foo" if i % 2 == 0 else "boo" for i in range(n)])})
     con = Redshift.generate_connection(
         database="test",
         host=redshift_parameters.get("RedshiftAddress"),
@@ -519,12 +521,23 @@ def test_to_parquet(bucket, redshift_parameters):
         password=redshift_parameters.get("RedshiftPassword"),
     )
     path = f"s3://{bucket}/test_to_parquet/"
+    session.pandas.to_redshift(
+        dataframe=df,
+        path=path,
+        schema="public",
+        table="test",
+        connection=con,
+        iam_role=redshift_parameters.get("RedshiftRole"),
+        mode="overwrite",
+        preserve_index=True,
+    )
+    path = f"s3://{bucket}/test_to_parquet2/"
     paths = Redshift.to_parquet(sql="SELECT * FROM public.test",
                                 path=path,
                                 iam_role=redshift_parameters.get("RedshiftRole"),
                                 connection=con,
                                 partition_cols=["name"])
-    assert len(paths) == 20
+    assert len(paths) == 4
 
 
 @pytest.mark.parametrize("sample_name", ["micro", "small", "nano"])