1
1
import io
2
2
import tarfile
3
3
import zipfile
4
+ from unittest .mock import patch
4
5
5
6
import pandas as pd
6
7
import pytest
7
8
8
- from janitor .io import read_archive
9
+ from janitor .io import (
10
+ _infer_file_type ,
11
+ read_archive ,
12
+ )
9
13
10
14
15
+ # Fixtures for creating test archives
11
16
@pytest .fixture
12
- def zip_test_file (tmp_path ):
13
- """Fixture pour créer un fichier ZIP de test ."""
14
- zip_path = tmp_path / "test .zip"
17
+ def dummy_zip_file (tmp_path ):
18
+ """Create a dummy ZIP file containing two CSV files ."""
19
+ zip_path = tmp_path / "dummy .zip"
15
20
with zipfile .ZipFile (zip_path , mode = "w" ) as zf :
16
21
zf .writestr ("file1.csv" , "col1,col2\n 1,2\n 3,4" )
17
22
zf .writestr ("file2.csv" , "col3,col4\n 5,6\n 7,8" )
18
23
return zip_path
19
24
20
25
21
26
@pytest .fixture
22
- def tar_test_file (tmp_path ):
23
- """Fixture pour créer un fichier TAR de test ."""
24
- tar_path = tmp_path / "test .tar.gz"
27
+ def dummy_tar_file (tmp_path ):
28
+ """Create a dummy TAR file containing two CSV files ."""
29
+ tar_path = tmp_path / "dummy .tar.gz"
25
30
with tarfile .open (tar_path , mode = "w:gz" ) as tf :
26
31
info1 = tarfile .TarInfo (name = "file1.csv" )
27
32
data1 = io .BytesIO (b"col1,col2\n 1,2\n 3,4" )
@@ -35,23 +40,27 @@ def tar_test_file(tmp_path):
35
40
return tar_path
36
41
37
42
38
- def test_read_zip_archive (zip_test_file ):
43
+ # Tests for reading archives via `read_archive`
44
+ def test_read_zip_archive (dummy_zip_file ):
45
+ """Test reading a specific file from a ZIP archive."""
39
46
result = read_archive (
40
- str (zip_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
47
+ str (dummy_zip_file ), extract_to_df = True , selected_files = ["file1.csv" ]
41
48
)
42
49
assert isinstance (result , pd .DataFrame )
43
50
assert list (result .columns ) == ["col1" , "col2" ]
44
51
assert result .shape == (2 , 2 )
45
52
46
53
47
- def test_list_files_in_zip (zip_test_file ):
48
- result = read_archive (str (zip_test_file ), extract_to_df = False )
54
+ def test_list_files_in_zip (dummy_zip_file ):
55
+ """Test listing files in a ZIP archive."""
56
+ result = read_archive (str (dummy_zip_file ), extract_to_df = False )
49
57
assert isinstance (result , list )
50
58
assert "file1.csv" in result
51
59
assert "file2.csv" in result
52
60
53
61
54
- def test_no_compatible_files (tmp_path ):
62
+ def test_no_compatible_files_in_zip (tmp_path ):
63
+ """Test handling a ZIP archive with no compatible files."""
55
64
zip_path = tmp_path / "empty.zip"
56
65
with zipfile .ZipFile (zip_path , mode = "w" ) as zf :
57
66
zf .writestr ("file1.txt" , "Just some text" )
@@ -61,17 +70,82 @@ def test_no_compatible_files(tmp_path):
61
70
read_archive (str (zip_path ))
62
71
63
72
64
- def test_read_tar_archive (tar_test_file ):
73
+ def test_read_tar_archive (dummy_tar_file ):
74
+ """Test reading a specific file from a TAR archive."""
65
75
result = read_archive (
66
- str (tar_test_file ), extract_to_df = True , selected_files = ["file1.csv" ]
76
+ str (dummy_tar_file ), extract_to_df = True , selected_files = ["file1.csv" ]
67
77
)
68
78
assert isinstance (result , pd .DataFrame )
69
79
assert list (result .columns ) == ["col1" , "col2" ]
70
80
assert result .shape == (2 , 2 )
71
81
72
82
73
- def test_list_files_in_tar (tar_test_file ):
74
- result = read_archive (str (tar_test_file ), extract_to_df = False )
83
+ def test_list_files_in_tar (dummy_tar_file ):
84
+ """Test listing files in a TAR archive."""
85
+ result = read_archive (str (dummy_tar_file ), extract_to_df = False )
75
86
assert isinstance (result , list )
76
87
assert "file1.csv" in result
77
88
assert "file2.csv" in result
89
+
90
+
91
+ def test_no_compatible_files_in_tar (tmp_path ):
92
+ """Test handling a TAR archive with no compatible files."""
93
+ tar_path = tmp_path / "invalid.tar.gz"
94
+ with tarfile .open (tar_path , mode = "w:gz" ) as tf :
95
+ info = tarfile .TarInfo (name = "file1.txt" )
96
+ data = io .BytesIO (b"Just some text" )
97
+ info .size = data .getbuffer ().nbytes
98
+ tf .addfile (info , data )
99
+ with pytest .raises (
100
+ ValueError , match = "No compatible files found in the archive"
101
+ ):
102
+ read_archive (str (tar_path ))
103
+
104
+
105
+ # Tests for unsupported file types
106
+ def test_read_archive_unsupported_file ():
107
+ """Test handling unsupported file types."""
108
+ with pytest .raises (
109
+ ValueError ,
110
+ match = "Cannot infer file type from the file extension. "
111
+ "Please specify the 'file_type' parameter." ,
112
+ ):
113
+ read_archive ("test.unsupported" )
114
+
115
+
116
+ def test_read_archive_no_extension ():
117
+ """Test handling files with no extension."""
118
+ with pytest .raises (
119
+ ValueError ,
120
+ match = "Cannot infer file type from the file extension. "
121
+ "Please specify the 'file_type' parameter." ,
122
+ ):
123
+ read_archive ("testfile" )
124
+
125
+
126
+ # Tests for interactive file selection
127
+ def test_interactive_file_selection_valid (dummy_zip_file ):
128
+ """Test valid input for interactive file selection."""
129
+ user_input = "1,2"
130
+ with patch ("builtins.input" , return_value = user_input ):
131
+ result = read_archive (str (dummy_zip_file ), extract_to_df = False )
132
+ assert "file1.csv" in result
133
+ assert "file2.csv" in result
134
+
135
+
136
+ # Tests for file type inference
137
+ def test_infer_file_type_valid ():
138
+ """Test valid file type inference."""
139
+ assert _infer_file_type ("test.zip" ) == "zip"
140
+ assert _infer_file_type ("test.tar" ) == "tar"
141
+ assert _infer_file_type ("test.tar.gz" ) == "tar.gz"
142
+
143
+
144
+ def test_infer_file_type_invalid ():
145
+ """Test invalid file type inference."""
146
+ with pytest .raises (
147
+ ValueError ,
148
+ match = "Cannot infer file type from the file extension. "
149
+ "Please specify the 'file_type' parameter." ,
150
+ ):
151
+ _infer_file_type ("testfile" )
0 commit comments