Skip to content

Commit 90e5f18

Browse files
committed
cleanup
1 parent 8947750 commit 90e5f18

File tree

10 files changed

+43
-157
lines changed

10 files changed

+43
-157
lines changed

pandas/tests/io/conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,15 @@ def moto_s3_resource(moto_server):
6868
return s3
6969

7070

71+
@pytest.fixture
72+
def s3so(moto_server):
73+
return {
74+
"client_kwargs": {
75+
"endpoint_url": moto_server,
76+
}
77+
}
78+
79+
7180
@pytest.fixture
7281
def s3_bucket_public(moto_s3_resource):
7382
"""

pandas/tests/io/excel/test_readers.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -934,12 +934,7 @@ def test_read_from_http_url(self, httpserver, read_ext):
934934

935935
@td.skip_if_not_us_locale
936936
@pytest.mark.single_cpu
937-
def test_read_from_s3_url(self, read_ext, s3_bucket_public):
938-
s3so = {
939-
"client_kwargs": {
940-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url,
941-
}
942-
}
937+
def test_read_from_s3_url(self, read_ext, s3_bucket_public, s3so):
943938
with open("test1" + read_ext, "rb") as f:
944939
s3_bucket_public.put_object(Key="test1" + read_ext, Body=f)
945940

@@ -950,13 +945,8 @@ def test_read_from_s3_url(self, read_ext, s3_bucket_public):
950945
tm.assert_frame_equal(url_table, local_table)
951946

952947
@pytest.mark.single_cpu
953-
def test_read_from_s3_object(self, read_ext, s3_bucket_public):
948+
def test_read_from_s3_object(self, read_ext, s3_bucket_public, s3so):
954949
# GH 38788
955-
s3so = {
956-
"client_kwargs": {
957-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url,
958-
}
959-
}
960950
with open("test1" + read_ext, "rb") as f:
961951
s3_bucket_public.put_object(Key="test1" + read_ext, Body=f)
962952

pandas/tests/io/excel/test_style.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -323,11 +323,6 @@ def test_styler_to_s3(s3_bucket_public, s3so):
323323
mock_bucket_name = s3_bucket_public.name
324324
target_file = f"{uuid.uuid4()}.xlsx"
325325
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
326-
s3so = {
327-
"client_kwargs": {
328-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url
329-
}
330-
}
331326
styler = df.style.set_sticky(axis="index")
332327
styler.to_excel(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
333328
timeout = 5

pandas/tests/io/json/test_compression.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def test_read_zipped_json(datapath):
4343
@td.skip_if_not_us_locale
4444
@pytest.mark.single_cpu
4545
@pytest.mark.network
46-
def test_with_s3_url(compression, s3_bucket_public):
46+
def test_with_s3_url(compression, s3_bucket_public, s3so):
4747
# Bucket created in tests/io/conftest.py
4848
df = pd.read_json(StringIO('{"a": [1, 2, 3], "b": [4, 5, 6]}'))
4949

@@ -53,11 +53,6 @@ def test_with_s3_url(compression, s3_bucket_public):
5353
with open(path, "rb") as f:
5454
s3_bucket_public.put_object(Key=key, Body=f)
5555

56-
s3so = {
57-
"client_kwargs": {
58-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url
59-
}
60-
}
6156
roundtripped_df = pd.read_json(
6257
f"s3://{s3_bucket_public.name}/{key}",
6358
compression=compression,

pandas/tests/io/json/test_pandas.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,15 +1412,8 @@ def test_read_inline_jsonl(self):
14121412
@pytest.mark.single_cpu
14131413
@pytest.mark.network
14141414
@td.skip_if_not_us_locale
1415-
def test_read_s3_jsonl(self, s3_bucket_public_with_data):
1415+
def test_read_s3_jsonl(self, s3_bucket_public_with_data, s3so):
14161416
# GH17200
1417-
1418-
s3so = {
1419-
"client_kwargs": {
1420-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
1421-
}
1422-
}
1423-
14241417
result = read_json(
14251418
f"s3n://{s3_bucket_public_with_data.name}/items.jsonl",
14261419
lines=True,
@@ -2018,16 +2011,11 @@ def test_json_multiindex(self):
20182011

20192012
@pytest.mark.single_cpu
20202013
@pytest.mark.network
2021-
def test_to_s3(self, s3_bucket_public):
2014+
def test_to_s3(self, s3_bucket_public, s3so):
20222015
# GH 28375
20232016
mock_bucket_name = s3_bucket_public.name
20242017
target_file = f"{uuid.uuid4()}.json"
20252018
df = DataFrame({"x": [1, 2, 3], "y": [2, 4, 6]})
2026-
s3so = {
2027-
"client_kwargs": {
2028-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url
2029-
}
2030-
}
20312019
df.to_json(f"s3://{mock_bucket_name}/{target_file}", storage_options=s3so)
20322020
timeout = 5
20332021
while True:

pandas/tests/io/parser/test_network.py

Lines changed: 21 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,18 @@ class TestS3:
8989
@pytest.mark.parametrize("nrows", [None, 10])
9090
@pytest.mark.parametrize("engine", ["c", "python"])
9191
def test_parse_public_s3_bucket(
92-
self, s3_bucket_public_with_data, tips_df, suffix, compression, nrows, engine
92+
self,
93+
s3_bucket_public_with_data,
94+
s3so,
95+
tips_df,
96+
suffix,
97+
compression,
98+
nrows,
99+
engine,
93100
):
94101
# more of an integration test due to the not-public contents portion
95102
# can probably mock this though.
96103
pytest.importorskip("s3fs")
97-
s3so = {
98-
"client_kwargs": {
99-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
100-
}
101-
}
102104
df = read_csv(
103105
f"s3://{s3_bucket_public_with_data.name}/tips.csv{suffix}",
104106
nrows=nrows,
@@ -108,32 +110,19 @@ def test_parse_public_s3_bucket(
108110
)
109111
tm.assert_frame_equal(df, tips_df.iloc[:nrows])
110112

111-
def test_parse_private_s3_bucket(self, s3_bucket_private_with_data, tips_df):
113+
def test_parse_private_s3_bucket(self, s3_bucket_private_with_data, s3so, tips_df):
112114
# Read public file from bucket with not-public contents
113115
pytest.importorskip("s3fs")
114-
115-
s3so = {
116-
"client_kwargs": {
117-
"endpoint_url": (
118-
s3_bucket_private_with_data.meta.client.meta.endpoint_url
119-
)
120-
}
121-
}
122116
df = read_csv(
123117
f"s3://{s3_bucket_private_with_data.name}/tips.csv", storage_options=s3so
124118
)
125119
tm.assert_frame_equal(df, tips_df)
126120

127121
@pytest.mark.parametrize("scheme", ["s3n", "s3a"])
128122
def test_parse_public_bucket_s3n_s3a(
129-
self, s3_bucket_public_with_data, tips_df, scheme
123+
self, s3_bucket_public_with_data, s3so, tips_df, scheme
130124
):
131125
nrows = 10
132-
s3so = {
133-
"client_kwargs": {
134-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
135-
}
136-
}
137126
df = read_csv(
138127
f"{scheme}://{s3_bucket_public_with_data.name}/tips.csv",
139128
nrows=nrows,
@@ -151,15 +140,10 @@ def test_parse_public_bucket_s3n_s3a(
151140
)
152141
@pytest.mark.parametrize("engine", ["c", "python"])
153142
def test_parse_public_s3_bucket_chunked(
154-
self, s3_bucket_public_with_data, tips_df, suffix, compression, engine
143+
self, s3_bucket_public_with_data, s3so, tips_df, suffix, compression, engine
155144
):
156145
# Read with a chunksize
157146
chunksize = 5
158-
s3so = {
159-
"client_kwargs": {
160-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
161-
}
162-
}
163147
with read_csv(
164148
f"s3://{s3_bucket_public_with_data.name}/tips.csv{suffix}",
165149
chunksize=chunksize,
@@ -178,12 +162,9 @@ def test_parse_public_s3_bucket_chunked(
178162
tm.assert_frame_equal(true_df, df)
179163

180164
@pytest.mark.parametrize("suffix", ["", ".gz", ".bz2"])
181-
def test_infer_s3_compression(self, s3_bucket_public_with_data, tips_df, suffix):
182-
s3so = {
183-
"client_kwargs": {
184-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
185-
}
186-
}
165+
def test_infer_s3_compression(
166+
self, s3_bucket_public_with_data, s3so, tips_df, suffix
167+
):
187168
df = read_csv(
188169
f"s3://{s3_bucket_public_with_data.name}/tips.csv{suffix}",
189170
engine="python",
@@ -192,25 +173,13 @@ def test_infer_s3_compression(self, s3_bucket_public_with_data, tips_df, suffix)
192173
)
193174
tm.assert_frame_equal(df, tips_df)
194175

195-
def test_read_s3_fails(self, s3_bucket_public_with_data):
196-
s3so = {
197-
"client_kwargs": {
198-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
199-
}
200-
}
176+
def test_read_s3_fails(self, s3_bucket_public_with_data, s3so):
201177
msg = "The specified bucket does not exist"
202178
with pytest.raises(OSError, match=msg):
203179
read_csv("s3://nyqpug/asdf.csv", storage_options=s3so)
204180

205-
def test_read_s3_fails_private(self, s3_bucket_private_with_data):
181+
def test_read_s3_fails_private(self, s3_bucket_private_with_data, s3so):
206182
s3_url = f"{s3_bucket_private_with_data.name}/file.csv"
207-
s3so = {
208-
"client_kwargs": {
209-
"endpoint_url": (
210-
s3_bucket_private_with_data.meta.client.meta.endpoint_url
211-
)
212-
}
213-
}
214183
msg = rf"{s3_url}"
215184
# Receive a permission error when trying to read a private bucket.
216185
# It's irrelevant here that this isn't actually a table.
@@ -237,14 +206,9 @@ def test_read_csv_handles_boto_s3_object(
237206
tm.assert_frame_equal(result, expected)
238207

239208
@pytest.mark.single_cpu
240-
def test_read_csv_chunked_download(self, s3_bucket_public, caplog):
209+
def test_read_csv_chunked_download(self, s3_bucket_public, s3so, caplog):
241210
# 8 MB, S3FS uses 5MB chunks
242211
df = DataFrame(np.zeros((100000, 4)), columns=list("abcd"))
243-
s3so = {
244-
"client_kwargs": {
245-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url
246-
}
247-
}
248212
with BytesIO(df.to_csv().encode("utf-8")) as buf:
249213
s3_bucket_public.put_object(Key="large-file.csv", Body=buf)
250214
uri = f"{s3_bucket_public.name}/large-file.csv"
@@ -260,26 +224,18 @@ def test_read_csv_chunked_download(self, s3_bucket_public, caplog):
260224
# Less than 8 MB
261225
assert int(match.group("stop")) < 8000000
262226

263-
def test_read_s3_with_hash_in_key(self, s3_bucket_public_with_data, tips_df):
227+
def test_read_s3_with_hash_in_key(self, s3_bucket_public_with_data, s3so, tips_df):
264228
# GH 25945
265-
s3so = {
266-
"client_kwargs": {
267-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
268-
}
269-
}
270229
result = read_csv(
271230
f"s3://{s3_bucket_public_with_data.name}/tips#1.csv", storage_options=s3so
272231
)
273232
tm.assert_frame_equal(tips_df, result)
274233

275-
def test_read_feather_s3_file_path(self, s3_bucket_public_with_data, feather_file):
234+
def test_read_feather_s3_file_path(
235+
self, s3_bucket_public_with_data, s3so, feather_file
236+
):
276237
# GH 29055
277238
pytest.importorskip("pyarrow")
278-
s3so = {
279-
"client_kwargs": {
280-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
281-
}
282-
}
283239
expected = read_feather(feather_file)
284240
res = read_feather(
285241
f"s3://{s3_bucket_public_with_data.name}/simple_dataset.feather",

pandas/tests/io/test_fsspec.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -231,14 +231,8 @@ def test_fastparquet_options(fsspectest):
231231

232232
@pytest.mark.single_cpu
233233
@pytest.mark.parametrize("compression_suffix", ["", ".gz", ".bz2"])
234-
def test_from_s3_csv(s3_bucket_public_with_data, tips_file, compression_suffix):
234+
def test_from_s3_csv(s3_bucket_public_with_data, s3so, tips_file, compression_suffix):
235235
pytest.importorskip("s3fs")
236-
237-
s3so = {
238-
"client_kwargs": {
239-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
240-
}
241-
}
242236
df_from_s3 = read_csv(
243237
f"s3://{s3_bucket_public_with_data.name}/tips.csv{compression_suffix}",
244238
storage_options=s3so,
@@ -249,14 +243,8 @@ def test_from_s3_csv(s3_bucket_public_with_data, tips_file, compression_suffix):
249243

250244
@pytest.mark.single_cpu
251245
@pytest.mark.parametrize("protocol", ["s3", "s3a", "s3n"])
252-
def test_s3_protocols(s3_bucket_public_with_data, tips_file, protocol):
246+
def test_s3_protocols(s3_bucket_public_with_data, s3so, tips_file, protocol):
253247
pytest.importorskip("s3fs")
254-
255-
s3so = {
256-
"client_kwargs": {
257-
"endpoint_url": s3_bucket_public_with_data.meta.client.meta.endpoint_url
258-
}
259-
}
260248
df_from_s3 = read_csv(
261249
f"{protocol}://{s3_bucket_public_with_data.name}/tips.csv",
262250
storage_options=s3so,
@@ -267,16 +255,11 @@ def test_s3_protocols(s3_bucket_public_with_data, tips_file, protocol):
267255

268256
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) fastparquet")
269257
@pytest.mark.single_cpu
270-
def test_s3_parquet(s3_bucket_public, df1):
258+
def test_s3_parquet(s3_bucket_public, s3so, df1):
271259
pytest.importorskip("fastparquet")
272260
pytest.importorskip("s3fs")
273261

274262
fn = f"s3://{s3_bucket_public.name}/test.parquet"
275-
s3so = {
276-
"client_kwargs": {
277-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url
278-
}
279-
}
280263
df1.to_parquet(
281264
fn, index=False, engine="fastparquet", compression=None, storage_options=s3so
282265
)

pandas/tests/io/test_parquet.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -809,13 +809,8 @@ def test_categorical(self, pa):
809809
check_round_trip(df, pa)
810810

811811
@pytest.mark.single_cpu
812-
def test_s3_roundtrip_explicit_fs(self, df_compat, s3_bucket_public, pa):
812+
def test_s3_roundtrip_explicit_fs(self, df_compat, s3_bucket_public, s3so, pa):
813813
s3fs = pytest.importorskip("s3fs")
814-
s3so = {
815-
"client_kwargs": {
816-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url,
817-
}
818-
}
819814
s3 = s3fs.S3FileSystem(**s3so)
820815
kw = {"filesystem": s3}
821816
check_round_trip(
@@ -827,13 +822,8 @@ def test_s3_roundtrip_explicit_fs(self, df_compat, s3_bucket_public, pa):
827822
)
828823

829824
@pytest.mark.single_cpu
830-
def test_s3_roundtrip(self, df_compat, s3_bucket_public, pa):
825+
def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, pa):
831826
# GH #19134
832-
s3so = {
833-
"client_kwargs": {
834-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url,
835-
}
836-
}
837827
s3so = {"storage_options": s3so}
838828
check_round_trip(
839829
df_compat,
@@ -1316,13 +1306,8 @@ def test_filter_row_groups(self, fp):
13161306
assert len(result) == 1
13171307

13181308
@pytest.mark.single_cpu
1319-
def test_s3_roundtrip(self, df_compat, s3_bucket_public, fp):
1309+
def test_s3_roundtrip(self, df_compat, s3_bucket_public, s3so, fp):
13201310
# GH #19134
1321-
s3so = {
1322-
"client_kwargs": {
1323-
"endpoint_url": s3_bucket_public.meta.client.meta.endpoint_url,
1324-
}
1325-
}
13261311
check_round_trip(
13271312
df_compat,
13281313
fp,

0 commit comments

Comments
 (0)