1
1
import gzip
2
2
import io
3
3
import os
4
- from pathlib import Path
5
4
import subprocess
6
5
import sys
7
6
import tarfile
31
30
],
32
31
)
33
32
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
34
- def test_compression_size (obj , method , compression_only ):
33
+ def test_compression_size (obj , method , compression_only , temp_file ):
35
34
if compression_only == "tar" :
36
35
compression_only = {"method" : "tar" , "mode" : "w:gz" }
37
36
38
- with tm . ensure_clean () as path :
39
- getattr (obj , method )(path , compression = compression_only )
40
- compressed_size = os .path .getsize (path )
41
- getattr (obj , method )(path , compression = None )
42
- uncompressed_size = os .path .getsize (path )
43
- assert uncompressed_size > compressed_size
37
+ path = temp_file
38
+ getattr (obj , method )(path , compression = compression_only )
39
+ compressed_size = os .path .getsize (path )
40
+ getattr (obj , method )(path , compression = None )
41
+ uncompressed_size = os .path .getsize (path )
42
+ assert uncompressed_size > compressed_size
44
43
45
44
46
45
@pytest .mark .parametrize (
@@ -54,22 +53,25 @@ def test_compression_size(obj, method, compression_only):
54
53
],
55
54
)
56
55
@pytest .mark .parametrize ("method" , ["to_csv" , "to_json" ])
57
- def test_compression_size_fh (obj , method , compression_only ):
58
- with tm .ensure_clean () as path :
59
- with icom .get_handle (
60
- path ,
61
- "w:gz" if compression_only == "tar" else "w" ,
62
- compression = compression_only ,
63
- ) as handles :
64
- getattr (obj , method )(handles .handle )
65
- assert not handles .handle .closed
66
- compressed_size = os .path .getsize (path )
67
- with tm .ensure_clean () as path :
68
- with icom .get_handle (path , "w" , compression = None ) as handles :
69
- getattr (obj , method )(handles .handle )
70
- assert not handles .handle .closed
71
- uncompressed_size = os .path .getsize (path )
72
- assert uncompressed_size > compressed_size
56
+ def test_compression_size_fh (obj , method , compression_only , temp_file ):
57
+ path = temp_file
58
+ with icom .get_handle (
59
+ path ,
60
+ "w:gz" if compression_only == "tar" else "w" ,
61
+ compression = compression_only ,
62
+ ) as handles :
63
+ getattr (obj , method )(handles .handle )
64
+ assert not handles .handle .closed
65
+ compressed_size = os .path .getsize (path )
66
+
67
+ # Create a new temporary file for uncompressed comparison
68
+ path2 = temp_file .parent / f"{ temp_file .stem } _uncompressed{ temp_file .suffix } "
69
+ path2 .touch ()
70
+ with icom .get_handle (path2 , "w" , compression = None ) as handles :
71
+ getattr (obj , method )(handles .handle )
72
+ assert not handles .handle .closed
73
+ uncompressed_size = os .path .getsize (path2 )
74
+ assert uncompressed_size > compressed_size
73
75
74
76
75
77
@pytest .mark .parametrize (
@@ -81,14 +83,14 @@ def test_compression_size_fh(obj, method, compression_only):
81
83
],
82
84
)
83
85
def test_dataframe_compression_defaults_to_infer (
84
- write_method , write_kwargs , read_method , compression_only , compression_to_extension
86
+ write_method , write_kwargs , read_method , compression_only , compression_to_extension , temp_file
85
87
):
86
88
# GH22004
87
89
input = pd .DataFrame ([[1.0 , 0 , - 4 ], [3.4 , 5 , 2 ]], columns = ["X" , "Y" , "Z" ])
88
90
extension = compression_to_extension [compression_only ]
89
- with tm . ensure_clean ( "compressed" + extension ) as path :
90
- getattr (input , write_method )(path , ** write_kwargs )
91
- output = read_method (path , compression = compression_only )
91
+ path = temp_file . parent / f"compressed { extension } "
92
+ getattr (input , write_method )(path , ** write_kwargs )
93
+ output = read_method (path , compression = compression_only )
92
94
tm .assert_frame_equal (output , input )
93
95
94
96
@@ -107,37 +109,38 @@ def test_series_compression_defaults_to_infer(
107
109
read_kwargs ,
108
110
compression_only ,
109
111
compression_to_extension ,
112
+ temp_file ,
110
113
):
111
114
# GH22004
112
115
input = pd .Series ([0 , 5 , - 2 , 10 ], name = "X" )
113
116
extension = compression_to_extension [compression_only ]
114
- with tm . ensure_clean ( "compressed" + extension ) as path :
115
- getattr (input , write_method )(path , ** write_kwargs )
116
- if "squeeze" in read_kwargs :
117
- kwargs = read_kwargs .copy ()
118
- del kwargs ["squeeze" ]
119
- output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
120
- "columns"
121
- )
122
- else :
123
- output = read_method (path , compression = compression_only , ** read_kwargs )
117
+ path = temp_file . parent / f"compressed { extension } "
118
+ getattr (input , write_method )(path , ** write_kwargs )
119
+ if "squeeze" in read_kwargs :
120
+ kwargs = read_kwargs .copy ()
121
+ del kwargs ["squeeze" ]
122
+ output = read_method (path , compression = compression_only , ** kwargs ).squeeze (
123
+ "columns"
124
+ )
125
+ else :
126
+ output = read_method (path , compression = compression_only , ** read_kwargs )
124
127
tm .assert_series_equal (output , input , check_names = False )
125
128
126
129
127
- def test_compression_warning (compression_only ):
130
+ def test_compression_warning (compression_only , temp_file ):
128
131
# Assert that passing a file object to to_csv while explicitly specifying a
129
132
# compression protocol triggers a RuntimeWarning, as per GH21227.
130
133
df = pd .DataFrame (
131
134
100 * [[0.123456 , 0.234567 , 0.567567 ], [12.32112 , 123123.2 , 321321.2 ]],
132
135
columns = ["X" , "Y" , "Z" ],
133
136
)
134
- with tm . ensure_clean () as path :
135
- with icom .get_handle (path , "w" , compression = compression_only ) as handles :
136
- with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
137
- df .to_csv (handles .handle , compression = compression_only )
137
+ path = temp_file
138
+ with icom .get_handle (path , "w" , compression = compression_only ) as handles :
139
+ with tm .assert_produces_warning (RuntimeWarning , match = "has no effect" ):
140
+ df .to_csv (handles .handle , compression = compression_only )
138
141
139
142
140
- def test_compression_binary (compression_only ):
143
+ def test_compression_binary (compression_only , temp_file ):
141
144
"""
142
145
Binary file handles support compression.
143
146
@@ -150,13 +153,13 @@ def test_compression_binary(compression_only):
150
153
)
151
154
152
155
# with a file
153
- with tm . ensure_clean () as path :
154
- with open (path , mode = "wb" ) as file :
155
- df .to_csv (file , mode = "wb" , compression = compression_only )
156
- file .seek (0 ) # file shouldn't be closed
157
- tm .assert_frame_equal (
158
- df , pd .read_csv (path , index_col = 0 , compression = compression_only )
159
- )
156
+ path = temp_file
157
+ with open (path , mode = "wb" ) as file :
158
+ df .to_csv (file , mode = "wb" , compression = compression_only )
159
+ file .seek (0 ) # file shouldn't be closed
160
+ tm .assert_frame_equal (
161
+ df , pd .read_csv (path , index_col = 0 , compression = compression_only )
162
+ )
160
163
161
164
# with BytesIO
162
165
file = io .BytesIO ()
@@ -167,7 +170,7 @@ def test_compression_binary(compression_only):
167
170
)
168
171
169
172
170
- def test_gzip_reproducibility_file_name ():
173
+ def test_gzip_reproducibility_file_name (temp_file ):
171
174
"""
172
175
Gzip should create reproducible archives with mtime.
173
176
@@ -183,13 +186,12 @@ def test_gzip_reproducibility_file_name():
183
186
compression_options = {"method" : "gzip" , "mtime" : 1 }
184
187
185
188
# test for filename
186
- with tm .ensure_clean () as path :
187
- path = Path (path )
188
- df .to_csv (path , compression = compression_options )
189
- time .sleep (0.1 )
190
- output = path .read_bytes ()
191
- df .to_csv (path , compression = compression_options )
192
- assert output == path .read_bytes ()
189
+ path = temp_file
190
+ df .to_csv (path , compression = compression_options )
191
+ time .sleep (0.1 )
192
+ output = path .read_bytes ()
193
+ df .to_csv (path , compression = compression_options )
194
+ assert output == path .read_bytes ()
193
195
194
196
195
197
def test_gzip_reproducibility_file_object ():
@@ -259,14 +261,14 @@ def test_with_missing_lzma_runtime():
259
261
],
260
262
)
261
263
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
262
- def test_gzip_compression_level (obj , method ):
264
+ def test_gzip_compression_level (obj , method , temp_file ):
263
265
# GH33196
264
- with tm . ensure_clean () as path :
265
- getattr (obj , method )(path , compression = "gzip" )
266
- compressed_size_default = os .path .getsize (path )
267
- getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
268
- compressed_size_fast = os .path .getsize (path )
269
- assert compressed_size_default < compressed_size_fast
266
+ path = temp_file
267
+ getattr (obj , method )(path , compression = "gzip" )
268
+ compressed_size_default = os .path .getsize (path )
269
+ getattr (obj , method )(path , compression = {"method" : "gzip" , "compresslevel" : 1 })
270
+ compressed_size_fast = os .path .getsize (path )
271
+ assert compressed_size_default < compressed_size_fast
270
272
271
273
272
274
@pytest .mark .parametrize (
@@ -280,15 +282,15 @@ def test_gzip_compression_level(obj, method):
280
282
],
281
283
)
282
284
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
283
- def test_xz_compression_level_read (obj , method ):
284
- with tm . ensure_clean () as path :
285
- getattr (obj , method )(path , compression = "xz" )
286
- compressed_size_default = os .path .getsize (path )
287
- getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
288
- compressed_size_fast = os .path .getsize (path )
289
- assert compressed_size_default < compressed_size_fast
290
- if method == "to_csv" :
291
- pd .read_csv (path , compression = "xz" )
285
+ def test_xz_compression_level_read (obj , method , temp_file ):
286
+ path = temp_file
287
+ getattr (obj , method )(path , compression = "xz" )
288
+ compressed_size_default = os .path .getsize (path )
289
+ getattr (obj , method )(path , compression = {"method" : "xz" , "preset" : 1 })
290
+ compressed_size_fast = os .path .getsize (path )
291
+ assert compressed_size_default < compressed_size_fast
292
+ if method == "to_csv" :
293
+ pd .read_csv (path , compression = "xz" )
292
294
293
295
294
296
@pytest .mark .parametrize (
@@ -302,13 +304,13 @@ def test_xz_compression_level_read(obj, method):
302
304
],
303
305
)
304
306
@pytest .mark .parametrize ("method" , ["to_pickle" , "to_json" , "to_csv" ])
305
- def test_bzip_compression_level (obj , method ):
307
+ def test_bzip_compression_level (obj , method , temp_file ):
306
308
"""GH33196 bzip needs file size > 100k to show a size difference between
307
309
compression levels, so here we just check if the call works when
308
310
compression is passed as a dict.
309
311
"""
310
- with tm . ensure_clean () as path :
311
- getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312
+ path = temp_file
313
+ getattr (obj , method )(path , compression = {"method" : "bz2" , "compresslevel" : 1 })
312
314
313
315
314
316
@pytest .mark .parametrize (
@@ -318,21 +320,21 @@ def test_bzip_compression_level(obj, method):
318
320
(".tar" , tarfile .TarFile ),
319
321
],
320
322
)
321
- def test_empty_archive_zip (suffix , archive ):
322
- with tm . ensure_clean ( filename = suffix ) as path :
323
- with archive (path , "w" ):
324
- pass
325
- with pytest .raises (ValueError , match = "Zero files found" ):
326
- pd .read_csv (path )
323
+ def test_empty_archive_zip (suffix , archive , temp_file ):
324
+ path = temp_file . parent / f"archive { suffix } "
325
+ with archive (path , "w" ):
326
+ pass
327
+ with pytest .raises (ValueError , match = "Zero files found" ):
328
+ pd .read_csv (path )
327
329
328
330
329
- def test_ambiguous_archive_zip ():
330
- with tm . ensure_clean ( filename = " .zip") as path :
331
- with zipfile .ZipFile (path , "w" ) as file :
332
- file .writestr ("a.csv" , "foo,bar" )
333
- file .writestr ("b.csv" , "foo,bar" )
334
- with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
335
- pd .read_csv (path )
331
+ def test_ambiguous_archive_zip (temp_file ):
332
+ path = temp_file . parent / "archive .zip"
333
+ with zipfile .ZipFile (path , "w" ) as file :
334
+ file .writestr ("a.csv" , "foo,bar" )
335
+ file .writestr ("b.csv" , "foo,bar" )
336
+ with pytest .raises (ValueError , match = "Multiple files found in ZIP file" ):
337
+ pd .read_csv (path )
336
338
337
339
338
340
def test_ambiguous_archive_tar (tmp_path ):
@@ -352,24 +354,24 @@ def test_ambiguous_archive_tar(tmp_path):
352
354
pd .read_csv (tarpath )
353
355
354
356
355
- def test_tar_gz_to_different_filename ():
356
- with tm . ensure_clean ( filename = " .foo") as file :
357
- pd .DataFrame (
358
- [["1" , "2" ]],
359
- columns = ["foo" , "bar" ],
360
- ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
361
- with gzip .open (file ) as uncompressed :
362
- with tarfile .TarFile (fileobj = uncompressed ) as archive :
363
- members = archive .getmembers ()
364
- assert len (members ) == 1
365
- content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
366
-
367
- if is_platform_windows ():
368
- expected = "foo,bar\r \n 1,2\r \n "
369
- else :
370
- expected = "foo,bar\n 1,2\n "
371
-
372
- assert content == expected
357
+ def test_tar_gz_to_different_filename (temp_file ):
358
+ file = temp_file . parent / "archive .foo"
359
+ pd .DataFrame (
360
+ [["1" , "2" ]],
361
+ columns = ["foo" , "bar" ],
362
+ ).to_csv (file , compression = {"method" : "tar" , "mode" : "w:gz" }, index = False )
363
+ with gzip .open (file ) as uncompressed :
364
+ with tarfile .TarFile (fileobj = uncompressed ) as archive :
365
+ members = archive .getmembers ()
366
+ assert len (members ) == 1
367
+ content = archive .extractfile (members [0 ]).read ().decode ("utf8" )
368
+
369
+ if is_platform_windows ():
370
+ expected = "foo,bar\r \n 1,2\r \n "
371
+ else :
372
+ expected = "foo,bar\n 1,2\n "
373
+
374
+ assert content == expected
373
375
374
376
375
377
def test_tar_no_error_on_close ():
0 commit comments