Skip to content

Commit abf7c7b

Browse files
committed
Add first tests with moto #109
1 parent 8da6ae2 commit abf7c7b

File tree

2 files changed

+98
-0
lines changed

2 files changed

+98
-0
lines changed

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,4 @@ twine~=3.1.1
1717
wheel~=0.34.2
1818
sphinx~=3.0.1
1919
sphinx_bootstrap_theme~=0.7.1
20+
moto~=1.3.14
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import pytest
2+
import boto3
3+
import moto
4+
5+
import awswrangler as wr
6+
7+
from ._utils import get_df_csv, get_df_list, ensure_data_types
8+
9+
10+
@pytest.fixture(scope="module")
11+
def s3():
12+
with moto.mock_s3():
13+
boto3.resource("s3").create_bucket(Bucket="bucket")
14+
yield True
15+
16+
17+
@pytest.fixture(scope="module")
18+
def emr():
19+
with moto.mock_emr():
20+
yield True
21+
22+
23+
def test_csv(s3):
24+
path = "s3://bucket/test.csv"
25+
wr.s3.to_csv(df=get_df_csv(), path=path, index=False)
26+
df = wr.s3.read_csv(path=path)
27+
assert len(df.index) == 3
28+
assert len(df.columns) == 10
29+
30+
31+
def test_parquet(s3):
32+
path = "s3://bucket/test.parquet"
33+
wr.s3.to_parquet(df=get_df_list(), path=path, index=False, dataset=True, partition_cols=["par0", "par1"])
34+
df = wr.s3.read_parquet(path=path, dataset=True)
35+
ensure_data_types(df, has_list=True)
36+
assert len(df.index) == 3
37+
assert len(df.columns) == 18
38+
39+
40+
def test_emr(s3, emr):
41+
cluster_id = wr.emr.create_cluster(
42+
cluster_name="wrangler_cluster",
43+
logging_s3_path="s3://bucket/emr-logs/",
44+
emr_release="emr-5.29.0",
45+
subnet_id="foo",
46+
emr_ec2_role="EMR_EC2_DefaultRole",
47+
emr_role="EMR_DefaultRole",
48+
instance_type_master="m5.xlarge",
49+
instance_type_core="m5.xlarge",
50+
instance_type_task="m5.xlarge",
51+
instance_ebs_size_master=50,
52+
instance_ebs_size_core=50,
53+
instance_ebs_size_task=50,
54+
instance_num_on_demand_master=1,
55+
instance_num_on_demand_core=0,
56+
instance_num_on_demand_task=0,
57+
instance_num_spot_master=0,
58+
instance_num_spot_core=0,
59+
instance_num_spot_task=0,
60+
spot_bid_percentage_of_on_demand_master=100,
61+
spot_bid_percentage_of_on_demand_core=100,
62+
spot_bid_percentage_of_on_demand_task=100,
63+
spot_provisioning_timeout_master=5,
64+
spot_provisioning_timeout_core=5,
65+
spot_provisioning_timeout_task=5,
66+
spot_timeout_to_on_demand_master=False,
67+
spot_timeout_to_on_demand_core=False,
68+
spot_timeout_to_on_demand_task=False,
69+
python3=False,
70+
spark_glue_catalog=False,
71+
hive_glue_catalog=False,
72+
presto_glue_catalog=False,
73+
consistent_view=True,
74+
consistent_view_retry_count=6,
75+
consistent_view_retry_seconds=15,
76+
consistent_view_table_name="EMRConsistentView",
77+
bootstraps_paths=None,
78+
debugging=False,
79+
applications=["Hadoop", "Spark", "Ganglia", "Hive"],
80+
visible_to_all_users=True,
81+
key_pair_name=None,
82+
spark_log_level="ERROR",
83+
spark_jars_path=["s3://bucket/jars/"],
84+
spark_defaults={"spark.default.parallelism": "400"},
85+
maximize_resource_allocation=True,
86+
keep_cluster_alive_when_no_steps=False,
87+
termination_protected=False,
88+
spark_pyarrow=False,
89+
tags={"foo": "boo", "bar": "xoo"},
90+
)
91+
wr.emr.get_cluster_state(cluster_id=cluster_id)
92+
steps = []
93+
for cmd in ['echo "Hello"', "ls -la"]:
94+
steps.append(wr.emr.build_step(name=cmd, command=cmd))
95+
wr.emr.submit_steps(cluster_id=cluster_id, steps=steps)
96+
wr.emr.terminate_cluster(cluster_id=cluster_id)
97+
wr.s3.delete_objects("s3://bucket/emr-logs/")

0 commit comments

Comments
 (0)