Skip to content

Commit 2520f57

Browse files
committed
Increase coverage DQ
1 parent 92c8664 commit 2520f57

File tree

5 files changed

+24
-14
lines changed

5 files changed

+24
-14
lines changed

awswrangler/data_quality/_create.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def evaluate_ruleset(
295295
catalog_id: Optional[str] = None,
296296
connection_name: Optional[str] = None,
297297
additional_options: Optional[Dict[str, str]] = None,
298-
additional_run_options: Optional[Dict[str, str]] = None,
298+
additional_run_options: Optional[Dict[str, Union[str, bool]]] = None,
299299
client_token: Optional[str] = None,
300300
boto3_session: Optional[boto3.Session] = None,
301301
) -> pd.DataFrame:
@@ -324,7 +324,7 @@ def evaluate_ruleset(
324324
`pushDownPredicate`: to filter on partitions without having to list and read all the files in your dataset.
325325
`catalogPartitionPredicate`: to use server-side partition pruning using partition indexes in the
326326
Glue Data Catalog.
327-
additional_run_options : Dict[str, str], optional
327+
additional_run_options : Dict[str, Union[str, bool]], optional
328328
Additional run options. Supported keys:
329329
`CloudWatchMetricsEnabled`: whether to enable CloudWatch metrics.
330330
`ResultsS3Prefix`: prefix for Amazon S3 to store results.

awswrangler/data_quality/_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def _start_ruleset_evaluation_run(
7575
catalog_id: Optional[str] = None,
7676
connection_name: Optional[str] = None,
7777
additional_options: Optional[Dict[str, str]] = None,
78-
additional_run_options: Optional[Dict[str, str]] = None,
78+
additional_run_options: Optional[Dict[str, Union[str, bool]]] = None,
7979
client_token: Optional[str] = None,
8080
boto3_session: Optional[boto3.Session] = None,
8181
) -> str:

tests/test_athena.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1045,7 +1045,6 @@ def test_bucketing_csv_saving(path, glue_database, glue_table, dtype):
10451045
df3 = wr.athena.read_sql_query(query, database=glue_database, params=query_params, ctas_approach=False)
10461046
scanned_bucketed = df3.query_metadata["Statistics"]["DataScannedInBytes"]
10471047

1048-
print(scanned_bucketed)
10491048
assert df2.equals(df3)
10501049
assert scanned_regular >= scanned_bucketed * saving_factor
10511050

tests/test_data_quality.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,15 @@ def test_ruleset_dqdl(df, path, glue_database, glue_table, glue_ruleset, glue_da
6969
assert df_results["Result"].eq("PASS").all()
7070

7171

72-
def test_recommendation_ruleset(df, path, glue_database, glue_table, glue_ruleset, glue_data_quality_role):
72+
@pytest.mark.parametrize("name", [False, True])
73+
def test_recommendation_ruleset(df, path, name, glue_database, glue_table, glue_ruleset, glue_data_quality_role):
7374
df_recommended_ruleset = wr.data_quality.create_recommendation_ruleset(
75+
name=f"{glue_ruleset}_recommended" if name else None,
7476
database=glue_database,
7577
table=glue_table,
7678
iam_role_arn=glue_data_quality_role,
7779
number_of_workers=2,
7880
)
79-
print(glue_data_quality_role)
8081
df_rules = df_recommended_ruleset.append(
8182
{"rule_type": "ColumnValues", "parameter": '"c2"', "expression": "in [0, 1, 2]"}, ignore_index=True
8283
)
@@ -90,11 +91,12 @@ def test_recommendation_ruleset(df, path, glue_database, glue_table, glue_rulese
9091
name=glue_ruleset,
9192
iam_role_arn=glue_data_quality_role,
9293
number_of_workers=2,
94+
additional_run_options={"CloudWatchMetricsEnabled": False},
9395
)
9496
assert df_results["Result"].eq("PASS").all()
9597

9698

97-
def test_ruleset_fail(df, path, glue_database, glue_table, glue_ruleset, glue_data_quality_role):
99+
def test_ruleset_fail(df, path, glue_database, glue_table, glue_ruleset, glue_data_quality_role, account_id):
98100
wr.data_quality.create_ruleset(
99101
name=glue_ruleset,
100102
database=glue_database,
@@ -105,6 +107,7 @@ def test_ruleset_fail(df, path, glue_database, glue_table, glue_ruleset, glue_da
105107
name=glue_ruleset,
106108
iam_role_arn=glue_data_quality_role,
107109
number_of_workers=2,
110+
catalog_id=account_id,
108111
)
109112
assert df_results["Result"][0] == "FAIL"
110113

@@ -135,6 +138,13 @@ def test_create_ruleset_already_exists(
135138
glue_table: str,
136139
glue_ruleset: str,
137140
) -> None:
141+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
142+
wr.data_quality.create_ruleset(
143+
name=glue_ruleset,
144+
database=glue_database,
145+
table=glue_table,
146+
)
147+
138148
wr.data_quality.create_ruleset(
139149
name=glue_ruleset,
140150
database=glue_database,
@@ -182,7 +192,7 @@ def test_update_ruleset(df: pd.DataFrame, glue_database: str, glue_table: str, g
182192
assert df_rules.equals(df_ruleset)
183193

184194

185-
def test_update_ruleset_does_not_exists(df: pd.DataFrame, glue_ruleset: str) -> None:
195+
def test_update_ruleset_exceptions(df: pd.DataFrame, glue_ruleset: str) -> None:
186196
df_rules = pd.DataFrame(
187197
{
188198
"rule_type": ["RowCount"],
@@ -198,6 +208,12 @@ def test_update_ruleset_does_not_exists(df: pd.DataFrame, glue_ruleset: str) ->
198208
df_rules=df_rules,
199209
)
200210

211+
with pytest.raises(wr.exceptions.InvalidArgumentValue):
212+
wr.data_quality.update_ruleset(name=glue_ruleset, df_rules=df_rules, mode="append")
213+
214+
with pytest.raises(wr.exceptions.InvalidArgumentCombination):
215+
wr.data_quality.update_ruleset(name=glue_ruleset)
216+
201217

202218
def test_upsert_ruleset(df: pd.DataFrame, glue_database: str, glue_table: str, glue_ruleset: str) -> None:
203219
df_rules = pd.DataFrame(
@@ -222,15 +238,13 @@ def test_upsert_ruleset(df: pd.DataFrame, glue_database: str, glue_table: str, g
222238
}
223239
)
224240

225-
new_glue_ruleset_name = f"{glue_ruleset} 2.0"
226241
wr.data_quality.update_ruleset(
227242
name=glue_ruleset,
228-
updated_name=new_glue_ruleset_name,
229243
mode="upsert",
230244
df_rules=df_upsert,
231245
)
232246

233-
df_ruleset = wr.data_quality.get_ruleset(name=new_glue_ruleset_name)
247+
df_ruleset = wr.data_quality.get_ruleset(name=glue_ruleset)
234248

235249
assert df_ruleset.shape == (5, 3)
236250
row_count = df_ruleset.loc[df_ruleset["rule_type"] == "RowCount"]

tests/test_opensearch.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,18 @@ def domain_endpoint_elasticsearch_7_10_fgac(cloudformation_outputs):
146146

147147
def test_connection_opensearch_1_0(domain_endpoint_opensearch_1_0):
148148
client = wr.opensearch.connect(host=domain_endpoint_opensearch_1_0)
149-
print(client.info())
150149
assert len(client.info()) > 0
151150

152151

153152
def test_connection_opensearch_1_0_https(domain_endpoint_opensearch_1_0):
154153
client = wr.opensearch.connect(host=f"https://{domain_endpoint_opensearch_1_0}")
155-
print(client.info())
156154
assert len(client.info()) > 0
157155

158156

159157
def test_connection_elasticsearch_7_10_fgac(domain_endpoint_elasticsearch_7_10_fgac, opensearch_password):
160158
client = wr.opensearch.connect(
161159
host=domain_endpoint_elasticsearch_7_10_fgac, username="test", password=opensearch_password
162160
)
163-
print(client.info())
164161
assert len(client.info()) > 0
165162

166163

0 commit comments

Comments
 (0)