1
1
import gzip
2
2
import io
3
3
import os
4
- from pathlib import Path
5
4
import subprocess
6
5
import sys
7
6
import tarfile
31
30
],
32
31
)
33
32
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
34
- def test_compression_size (obj , method , compression_only ):
33
+ def test_compression_size (obj , method , compression_only , temp_file ):
35
34
if compression_only == "tar" :
36
35
compression_only = {"method" : "tar" , "mode" : "w:gz" }
37
36
38
- with tm . ensure_clean () as path :
39
- getattr (obj , method )(path , compression = compression_only )
40
- compressed_size = os .path .getsize (path )
41
- getattr (obj , method )(path , compression = None )
42
- uncompressed_size = os .path .getsize (path )
43
- assert uncompressed_size > compressed_size
37
+ path = temp_file
38
+ getattr (obj , method )(path , compression = compression_only )
39
+ compressed_size = os .path .getsize (path )
40
+ getattr (obj , method )(path , compression = None )
41
+ uncompressed_size = os .path .getsize (path )
42
+ assert uncompressed_size > compressed_size
44
43
45
44
46
45
@pytest .mark .parametrize (
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
54
53
],
55
54
)
56
55
@pytest .mark .parametrize ("method" , ["to_csv" , "to_json" ])
57
- def test_compression_size_fh (obj , method , compression_only ):
58
- with tm .ensure_clean () as path :
59
- with icom .get_handle (
60
- path ,
61
- "w:gz" if compression_only == "tar" else "w" ,
62
- compression = compression_only ,
63
- ) as handles :
64
- getattr (obj , method )(handles .handle )
65
- assert not handles .handle .closed
66
- compressed_size = os .path .getsize (path )
67
- with tm .ensure_clean () as path :
68
- with icom .get_handle (path , "w" , compression = None ) as handles :
69
- getattr (obj , method )(handles .handle )
70
- assert not handles .handle .closed
71
- uncompressed_size = os .path .getsize (path )
72
- assert uncompressed_size > compressed_size
56
+ def test_compression_size_fh (obj , method , compression_only , temp_file ):
57
+ path = temp_file
58
+ with icom .get_handle (
59
+ path ,
60
+ "w:gz" if compression_only == "tar" else "w" ,
61
+ compression = compression_only ,
62
+ ) as handles :
63
+ getattr (obj , method )(handles .handle )
64
+ assert not handles .handle .closed
65
+ compressed_size = os .path .getsize (path )
66
+
67
+ # Create a new temporary file for uncompressed comparison
68
+ path2 = temp_file .parent / f"{ temp_file .stem } _uncompressed{ temp_file .suffix } "
69
+ path2 .touch ()
70
+ with icom .get_handle (path2 , "w" , compression = None ) as handles :
71
+ getattr (obj , method )(handles .handle )
72
+ assert not handles .handle .closed
73
+ uncompressed_size = os .path .getsize (path2 )
74
+ assert uncompressed_size > compressed_size
73
75
74
76
75
77
@pytest .mark .parametrize (
@@ -81,14 +83,19 @@ def test_compression_size_fh(obj, method, compression_only):
81
83
],
82
84
)
83
85
def test_dataframe_compression_defaults_to_infer (
84
- write_method , write_kwargs , read_method , compression_only , compression_to_extension
86
+ write_method ,
87
+ write_kwargs ,
88
+ read_method ,
89
+ compression_only ,
90
+ compression_to_extension ,
91
+ temp_file ,
85
92
):
86
93
# GH22004
87
94
input = pd .DataFrame ([[1.0 , 0 , - 4 ], [3.4 , 5 , 2 ]], columns = ["X" , "Y" , "Z" ])
88
95
extension = compression_to_extension [compression_only ]
89
- with tm . ensure_clean ( "compressed" + extension ) as path :
90
- getattr (input , write_method )(path , ** write_kwargs )
91
- output = read_method (path , compression = compression_only )
96
+ path = temp_file . parent / f"compressed { extension } "
97
+ getattr (input , write_method )(path , ** write_kwargs )
98
+ output = read_method (path , compression = compression_only )
92
99
tm .assert_frame_equal (output , input )
93
100
94
101
@@ -107,37 +114,38 @@ def test_series_compression_defaults_to_infer(
107
114
read_kwargs ,
108
115
compression_only ,
109
116
compression_to_extension ,
117
+ temp_file ,
110
118
):
111
119
# GH22004
112
120
input = pd .Series ([0 , 5 , - 2 , 10 ], name = "X" )
113
121
extension = compression_to_extension [compression_only ]
114
- with tm . ensure_clean ( "compressed" + extension ) as path :
115
- getattr (input , write_method )(path , ** write_kwargs )
116
- if "squeeze" in read_kwargs :
117
- kwargs = read_kwargs .copy ()
118
- del kwargs ["squeeze" ]
119
- output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
120
- "columns"
121
- )
122
- else :
123
- output = read_method (path , compression = compression_only , ** read_kwargs )
122
+ path = temp_file . parent / f"compressed { extension } "
123
+ getattr (input , write_method )(path , ** write_kwargs )
124
+ if "squeeze" in read_kwargs :
125
+ kwargs = read_kwargs .copy ()
126
+ del kwargs ["squeeze" ]
127
+ output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
128
+ "columns"
129
+ )
130
+ else :
131
+ output = read_method (path , compression = compression_only , ** read_kwargs )
124
132
tm .assert_series_equal (output , input , check_names = False )
125
133
126
134
127
- def test_compression_warning (compression_only ):
135
+ def test_compression_warning (compression_only , temp_file ):
128
136
# Assert that passing a file object to to_csv while explicitly specifying a
129
137
# compression protocol triggers a RuntimeWarning, as per GH21227.
130
138
df = pd .DataFrame (
131
139
100 * [[0.123456 , 0.234567 , 0.567567 ], [12.32112 , 123123.2 , 321321.2 ]],
132
140
columns = ["X" , "Y" , "Z" ],
133
141
)
134
- with tm . ensure_clean () as path :
135
- with icom .get_handle (path , "w" , compression = compression_only ) as handles :
136
- with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
137
- df .to_csv (handles .handle , compression = compression_only )
142
+ path = temp_file
143
+ with icom .get_handle (path , "w" , compression = compression_only ) as handles :
144
+ with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
145
+ df .to_csv (handles .handle , compression = compression_only )
138
146
139
147
140
- def test_compression_binary (compression_only ):
148
+ def test_compression_binary (compression_only , temp_file ):
141
149
"""
142
150
Binary file handles support compression.
143
151
@@ -150,13 +158,13 @@ def test_compression_binary(compression_only):
150
158
)
151
159
152
160
# with a file
153
- with tm . ensure_clean () as path :
154
- with open (path , mode = "wb" ) as file :
155
- df .to_csv (file , mode = "wb" , compression = compression_only )
156
- file .seek (0 ) # file shouldn't be closed
157
- tm .assert_frame_equal (
158
- df , pd .read_csv (path , index_col = 0 , compression = compression_only )
159
- )
161
+ path = temp_file
162
+ with open (path , mode = "wb" ) as file :
163
+ df .to_csv (file , mode = "wb" , compression = compression_only )
164
+ file .seek (0 ) # file shouldn't be closed
165
+ tm .assert_frame_equal (
166
+ df , pd .read_csv (path , index_col = 0 , compression = compression_only )
167
+ )
160
168
161
169
# with BytesIO
162
170
file = io .BytesIO ()
@@ -167,7 +175,7 @@ def test_compression_binary(compression_only):
167
175
)
168
176
169
177
170
- def test_gzip_reproducibility_file_name ():
178
+ def test_gzip_reproducibility_file_name (temp_file ):
171
179
"""
172
180
Gzip should create reproducible archives with mtime.
173
181
@@ -183,13 +191,12 @@ def test_gzip_reproducibility_file_name():
183
191
compression_options = {"method" : "gzip" , "mtime" : 1 }
184
192
185
193
# test for filename
186
- with tm .ensure_clean () as path :
187
- path = Path (path )
188
- df .to_csv (path , compression = compression_options )
189
- time .sleep (0.1 )
190
- output = path .read_bytes ()
191
- df .to_csv (path , compression = compression_options )
192
- assert output == path .read_bytes ()
194
+ path = temp_file
195
+ df .to_csv (path , compression = compression_options )
196
+ time .sleep (0.1 )
197
+ output = path .read_bytes ()
198
+ df .to_csv (path , compression = compression_options )
199
+ assert output == path .read_bytes ()
193
200
194
201
195
202
def test_gzip_reproducibility_file_object ():
@@ -259,14 +266,14 @@ def test_with_missing_lzma_runtime():
259
266
],
260
267
)
261
268
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
262
- def test_gzip_compression_level (obj , method ):
269
+ def test_gzip_compression_level (obj , method , temp_file ):
263
270
# GH33196
264
- with tm . ensure_clean () as path :
265
- getattr (obj , method )(path , compression = "gzip" )
266
- compressed_size_default = os .path .getsize (path )
267
- getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
268
- compressed_size_fast = os .path .getsize (path )
269
- assert compressed_size_default < compressed_size_fast
271
+ path = temp_file
272
+ getattr (obj , method )(path , compression = "gzip" )
273
+ compressed_size_default = os .path .getsize (path )
274
+ getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
275
+ compressed_size_fast = os .path .getsize (path )
276
+ assert compressed_size_default < compressed_size_fast
270
277
271
278
272
279
@pytest .mark .parametrize (
@@ -280,15 +287,15 @@ def test_gzip_compression_level(obj, method):
280
287
],
281
288
)
282
289
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
283
- def test_xz_compression_level_read (obj , method ):
284
- with tm . ensure_clean () as path :
285
- getattr (obj , method )(path , compression = "xz" )
286
- compressed_size_default = os .path .getsize (path )
287
- getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
288
- compressed_size_fast = os .path .getsize (path )
289
- assert compressed_size_default < compressed_size_fast
290
- if method == "to_csv" :
291
- pd .read_csv (path , compression = "xz" )
290
+ def test_xz_compression_level_read (obj , method , temp_file ):
291
+ path = temp_file
292
+ getattr (obj , method )(path , compression = "xz" )
293
+ compressed_size_default = os .path .getsize (path )
294
+ getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
295
+ compressed_size_fast = os .path .getsize (path )
296
+ assert compressed_size_default < compressed_size_fast
297
+ if method == "to_csv" :
298
+ pd .read_csv (path , compression = "xz" )
292
299
293
300
294
301
@pytest .mark .parametrize (
@@ -302,13 +309,13 @@ def test_xz_compression_level_read(obj, method):
302
309
],
303
310
)
304
311
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
305
- def test_bzip_compression_level (obj , method ):
312
+ def test_bzip_compression_level (obj , method , temp_file ):
306
313
"""GH33196 bzip needs file size > 100k to show a size difference between
307
314
compression levels, so here we just check if the call works when
308
315
compression is passed as a dict.
309
316
"""
310
- with tm . ensure_clean () as path :
311
- getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
317
+ path = temp_file
318
+ getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312
319
313
320
314
321
@pytest .mark .parametrize (
@@ -318,21 +325,21 @@ def test_bzip_compression_level(obj, method):
318
325
(".tar" , tarfile .TarFile ),
319
326
],
320
327
)
321
- def test_empty_archive_zip (suffix , archive ):
322
- with tm . ensure_clean ( filename = suffix ) as path :
323
- with archive (path , "w" ):
324
- pass
325
- with pytest .raises (ValueError , match = "Zero files found" ):
326
- pd .read_csv (path )
328
+ def test_empty_archive_zip (suffix , archive , temp_file ):
329
+ path = temp_file . parent / f"archive { suffix } "
330
+ with archive (path , "w" ):
331
+ pass
332
+ with pytest .raises (ValueError , match = "Zero files found" ):
333
+ pd .read_csv (path )
327
334
328
335
329
- def test_ambiguous_archive_zip ():
330
- with tm . ensure_clean ( filename = " .zip") as path :
331
- with zipfile .ZipFile (path , "w" ) as file :
332
- file .writestr ("a.csv" , "foo,bar" )
333
- file .writestr ("b.csv" , "foo,bar" )
334
- with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
335
- pd .read_csv (path )
336
+ def test_ambiguous_archive_zip (temp_file ):
337
+ path = temp_file . parent / "archive .zip"
338
+ with zipfile .ZipFile (path , "w" ) as file :
339
+ file .writestr ("a.csv" , "foo,bar" )
340
+ file .writestr ("b.csv" , "foo,bar" )
341
+ with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
342
+ pd .read_csv (path )
336
343
337
344
338
345
def test_ambiguous_archive_tar (tmp_path ):
@@ -352,24 +359,24 @@ def test_ambiguous_archive_tar(tmp_path):
352
359
pd .read_csv (tarpath )
353
360
354
361
355
- def test_tar_gz_to_different_filename ():
356
- with tm . ensure_clean ( filename = " .foo") as file :
357
- pd .DataFrame (
358
- [["1" , "2" ]],
359
- columns = ["foo" , "bar" ],
360
- ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
361
- with gzip .open (file ) as uncompressed :
362
- with tarfile .TarFile (fileobj = uncompressed ) as archive :
363
- members = archive .getmembers ()
364
- assert len (members ) == 1
365
- content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
366
-
367
- if is_platform_windows ():
368
- expected = "foo,bar\r \n 1,2\r \n "
369
- else :
370
- expected = "foo,bar\n 1,2\n "
371
-
372
- assert content == expected
362
+ def test_tar_gz_to_different_filename (temp_file ):
363
+ file = temp_file . parent / "archive .foo"
364
+ pd .DataFrame (
365
+ [["1" , "2" ]],
366
+ columns = ["foo" , "bar" ],
367
+ ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
368
+ with gzip .open (file ) as uncompressed :
369
+ with tarfile .TarFile (fileobj = uncompressed ) as archive :
370
+ members = archive .getmembers ()
371
+ assert len (members ) == 1
372
+ content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
373
+
374
+ if is_platform_windows ():
375
+ expected = "foo,bar\r \n 1,2\r \n "
376
+ else :
377
+ expected = "foo,bar\n 1,2\n "
378
+
379
+ assert content == expected
373
380
374
381
375
382
def test_tar_no_error_on_close ():
0 commit comments