1
+ import numpy as np
2
+ import pytest
3
+
4
+ import pandas as pd
5
+ from pandas import (
6
+ DataFrame ,
7
+ check ,
8
+ )
9
+ import pandas ._testing as tm
10
+
11
+
12
+ class TestCheck :
13
+ def test_basic_functionality (self ):
14
+ """Test basic functionality of pd.check()."""
15
+ df = DataFrame ({
16
+ 'A' : [1 , 2 , None , 4 ],
17
+ 'B' : ['x' , 'y' , 'x' , None ],
18
+ 'C' : [1.0 , 2.0 , 3.0 , 4.0 ]
19
+ })
20
+
21
+ result = check (df )
22
+
23
+ expected = DataFrame ({
24
+ 'unique' : [3 , 2 , 4 ],
25
+ 'non_null' : [3 , 3 , 4 ],
26
+ 'missing' : [1 , 1 , 0 ],
27
+ 'missing_pct' : [25.00 , 25.00 , 0.00 ]
28
+ }, index = ['A' , 'B' , 'C' ])
29
+
30
+ tm .assert_frame_equal (result , expected )
31
+
32
+ def test_empty_dataframe (self ):
33
+ """Test check() with empty DataFrame."""
34
+ df = DataFrame ()
35
+
36
+ result = check (df )
37
+
38
+ expected = DataFrame ({
39
+ 'unique' : [],
40
+ 'non_null' : [],
41
+ 'missing' : [],
42
+ 'missing_pct' : []
43
+ }).astype ('int64' )
44
+ expected ['missing_pct' ] = expected ['missing_pct' ].astype ('float64' )
45
+
46
+ tm .assert_frame_equal (result , expected )
47
+
48
+ def test_all_null_column (self ):
49
+ """Test check() with a column that is all null."""
50
+ df = DataFrame ({
51
+ 'A' : [1 , 2 , 3 ],
52
+ 'B' : [None , None , None ],
53
+ 'C' : ['x' , 'y' , 'z' ]
54
+ })
55
+
56
+ result = check (df )
57
+
58
+ expected = DataFrame ({
59
+ 'unique' : [3 , 0 , 3 ],
60
+ 'non_null' : [3 , 0 , 3 ],
61
+ 'missing' : [0 , 3 , 0 ],
62
+ 'missing_pct' : [0.00 , 100.00 , 0.00 ]
63
+ }, index = ['A' , 'B' , 'C' ])
64
+
65
+ tm .assert_frame_equal (result , expected )
66
+
67
+ def test_no_missing_values (self ):
68
+ """Test check() with DataFrame that has no missing values."""
69
+ df = DataFrame ({
70
+ 'A' : [1 , 2 , 3 , 4 ],
71
+ 'B' : ['w' , 'x' , 'y' , 'z' ],
72
+ 'C' : [1.1 , 2.2 , 3.3 , 4.4 ]
73
+ })
74
+
75
+ result = check (df )
76
+
77
+ expected = DataFrame ({
78
+ 'unique' : [4 , 4 , 4 ],
79
+ 'non_null' : [4 , 4 , 4 ],
80
+ 'missing' : [0 , 0 , 0 ],
81
+ 'missing_pct' : [0.00 , 0.00 , 0.00 ]
82
+ }, index = ['A' , 'B' , 'C' ])
83
+
84
+ tm .assert_frame_equal (result , expected )
85
+
86
+ def test_round_digits_parameter (self ):
87
+ """Test check() with different round_digits parameter."""
88
+ df = DataFrame ({
89
+ 'A' : [1 , None , None ], # 2/3 = 66.666... % missing
90
+ 'B' : [1 , 2 , 3 ]
91
+ })
92
+
93
+ # Test with default round_digits=2
94
+ result_default = check (df )
95
+ expected_default = DataFrame ({
96
+ 'unique' : [1 , 3 ],
97
+ 'non_null' : [1 , 3 ],
98
+ 'missing' : [2 , 0 ],
99
+ 'missing_pct' : [66.67 , 0.00 ]
100
+ }, index = ['A' , 'B' ])
101
+ tm .assert_frame_equal (result_default , expected_default )
102
+
103
+ # Test with round_digits=0
104
+ result_zero = check (df , round_digits = 0 )
105
+ expected_zero = DataFrame ({
106
+ 'unique' : [1 , 3 ],
107
+ 'non_null' : [1 , 3 ],
108
+ 'missing' : [2 , 0 ],
109
+ 'missing_pct' : [67.0 , 0.0 ]
110
+ }, index = ['A' , 'B' ])
111
+ tm .assert_frame_equal (result_zero , expected_zero )
112
+
113
+ # Test with round_digits=4
114
+ result_four = check (df , round_digits = 4 )
115
+ expected_four = DataFrame ({
116
+ 'unique' : [1 , 3 ],
117
+ 'non_null' : [1 , 3 ],
118
+ 'missing' : [2 , 0 ],
119
+ 'missing_pct' : [66.6667 , 0.0000 ]
120
+ }, index = ['A' , 'B' ])
121
+ tm .assert_frame_equal (result_four , expected_four )
122
+
123
+ def test_various_dtypes (self ):
124
+ """Test check() with various data types."""
125
+ df = DataFrame ({
126
+ 'int_col' : [1 , 2 , None ],
127
+ 'float_col' : [1.1 , None , 3.3 ],
128
+ 'str_col' : ['a' , 'b' , None ],
129
+ 'bool_col' : [True , False , None ],
130
+ 'datetime_col' : pd .to_datetime (['2020-01-01' , '2020-01-02' , None ])
131
+ })
132
+
133
+ result = check (df )
134
+
135
+ expected = DataFrame ({
136
+ 'unique' : [2 , 2 , 2 , 2 , 2 ],
137
+ 'non_null' : [2 , 2 , 2 , 2 , 2 ],
138
+ 'missing' : [1 , 1 , 1 , 1 , 1 ],
139
+ 'missing_pct' : [33.33 , 33.33 , 33.33 , 33.33 , 33.33 ]
140
+ }, index = ['int_col' , 'float_col' , 'str_col' , 'bool_col' , 'datetime_col' ])
141
+
142
+ tm .assert_frame_equal (result , expected )
143
+
144
+ def test_duplicate_values (self ):
145
+ """Test check() with columns containing duplicate values."""
146
+ df = DataFrame ({
147
+ 'A' : [1 , 1 , 2 , 2 , 2 ],
148
+ 'B' : ['x' , 'x' , 'x' , 'y' , 'y' ],
149
+ 'C' : [1 , 1 , 1 , 1 , 1 ] # All same value
150
+ })
151
+
152
+ result = check (df )
153
+
154
+ expected = DataFrame ({
155
+ 'unique' : [2 , 2 , 1 ],
156
+ 'non_null' : [5 , 5 , 5 ],
157
+ 'missing' : [0 , 0 , 0 ],
158
+ 'missing_pct' : [0.00 , 0.00 , 0.00 ]
159
+ }, index = ['A' , 'B' , 'C' ])
160
+
161
+ tm .assert_frame_equal (result , expected )
162
+
163
+ def test_single_row_dataframe (self ):
164
+ """Test check() with single row DataFrame."""
165
+ df = DataFrame ({
166
+ 'A' : [1 ],
167
+ 'B' : [None ],
168
+ 'C' : ['test' ]
169
+ })
170
+
171
+ result = check (df )
172
+
173
+ expected = DataFrame ({
174
+ 'unique' : [1 , 0 , 1 ],
175
+ 'non_null' : [1 , 0 , 1 ],
176
+ 'missing' : [0 , 1 , 0 ],
177
+ 'missing_pct' : [0.00 , 100.00 , 0.00 ]
178
+ }, index = ['A' , 'B' , 'C' ])
179
+
180
+ tm .assert_frame_equal (result , expected )
181
+
182
+ def test_single_column_dataframe (self ):
183
+ """Test check() with single column DataFrame."""
184
+ df = DataFrame ({
185
+ 'A' : [1 , 2 , None , 4 ]
186
+ })
187
+
188
+ result = check (df )
189
+
190
+ expected = DataFrame ({
191
+ 'unique' : [3 ],
192
+ 'non_null' : [3 ],
193
+ 'missing' : [1 ],
194
+ 'missing_pct' : [25.00 ]
195
+ }, index = ['A' ])
196
+
197
+ tm .assert_frame_equal (result , expected )
198
+
199
+ def test_non_dataframe_raises_error (self ):
200
+ """Test that check() raises appropriate error for non-DataFrame input."""
201
+ with pytest .raises (AttributeError ):
202
+ check ("not a dataframe" )
203
+
204
+ with pytest .raises (AttributeError ):
205
+ check ([1 , 2 , 3 ])
206
+
207
+ def test_return_type (self ):
208
+ """Test that check() returns a DataFrame."""
209
+ df = DataFrame ({'A' : [1 , 2 , 3 ]})
210
+ result = check (df )
211
+ assert isinstance (result , DataFrame )
212
+
213
+ def test_column_order_preserved (self ):
214
+ """Test that the order of columns is preserved in the result."""
215
+ df = DataFrame ({
216
+ 'Z' : [1 , 2 , 3 ],
217
+ 'A' : [4 , 5 , 6 ],
218
+ 'M' : [7 , 8 , 9 ]
219
+ })
220
+
221
+ result = check (df )
222
+
223
+ expected_index = ['Z' , 'A' , 'M' ]
224
+ tm .assert_index_equal (result .index , pd .Index (expected_index ))
0 commit comments