Skip to content

Commit fbf5c66

Browse files
committed
test: add li violations validtor, validator test (stats violator missing
1 parent 325d28d commit fbf5c66

File tree

2 files changed

+634
-6
lines changed

2 files changed

+634
-6
lines changed
Lines changed: 385 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,385 @@
1+
import geopandas as gpd
2+
import pandas as pd
3+
from shapely.geometry import Point
4+
5+
from src.config.config import USE_CRS
6+
from src.validation.li_violations import LIViolationsOutputValidator
7+
8+
9+
def _create_li_violations_test_data(base_test_data):
10+
"""Create test data with only the columns expected by the LI violations validator."""
11+
return pd.DataFrame(
12+
{
13+
"opa_id": base_test_data["opa_id"],
14+
"all_violations_past_year": [0, 1, 2], # Valid integer values
15+
"open_violations_past_year": [0, 0, 1], # Valid integer values
16+
"geometry": base_test_data["geometry"],
17+
}
18+
)
19+
20+
21+
def test_li_violations_validator_schema_valid_data(base_test_data):
22+
"""Test that the validator accepts valid data."""
23+
test_data = _create_li_violations_test_data(base_test_data)
24+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
25+
26+
validator = LIViolationsOutputValidator()
27+
result = validator.validate(gdf, check_stats=False)
28+
29+
# Should pass with valid data
30+
assert result.success
31+
assert len(result.errors) == 0
32+
33+
34+
def test_li_violations_validator_schema_edge_cases(base_test_data):
35+
"""Test that the validator handles edge cases correctly."""
36+
# Test with missing required columns
37+
test_data = pd.DataFrame(
38+
{
39+
"opa_id": base_test_data["opa_id"],
40+
# Missing all_violations_past_year and open_violations_past_year columns
41+
"geometry": base_test_data["geometry"],
42+
}
43+
)
44+
45+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
46+
47+
validator = LIViolationsOutputValidator()
48+
result = validator.validate(gdf, check_stats=False)
49+
50+
# Should fail due to missing violation columns
51+
assert not result.success
52+
assert len(result.errors) > 0
53+
54+
# Test with null violation values
55+
test_data = pd.DataFrame(
56+
{
57+
"opa_id": base_test_data["opa_id"],
58+
"all_violations_past_year": [0, None, 2], # Null value
59+
"open_violations_past_year": [0, 0, None], # Null value
60+
"geometry": base_test_data["geometry"],
61+
}
62+
)
63+
64+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
65+
66+
validator = LIViolationsOutputValidator()
67+
result = validator.validate(gdf, check_stats=False)
68+
69+
# Should fail due to null violation values
70+
assert not result.success
71+
assert len(result.errors) > 0
72+
73+
74+
def test_li_violations_validator_row_level_validation(base_test_data):
75+
"""Test row-level validation that works with any dataset size."""
76+
test_data = _create_li_violations_test_data(base_test_data)
77+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
78+
79+
validator = LIViolationsOutputValidator()
80+
result = validator.validate(gdf, check_stats=False)
81+
82+
# Should pass row-level validation
83+
assert result.success
84+
assert len(result.errors) == 0
85+
86+
87+
def test_li_violations_validator_missing_required_columns(base_test_data):
88+
"""Test that the validator catches missing required columns."""
89+
# Test with missing opa_id column
90+
test_data = pd.DataFrame(
91+
{
92+
# Missing opa_id column
93+
"all_violations_past_year": [0, 1, 2],
94+
"open_violations_past_year": [0, 0, 1],
95+
"geometry": base_test_data["geometry"],
96+
}
97+
)
98+
99+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
100+
101+
validator = LIViolationsOutputValidator()
102+
result = validator.validate(gdf, check_stats=False)
103+
104+
# Should fail due to missing opa_id column
105+
assert not result.success
106+
assert len(result.errors) > 0
107+
108+
# Test with missing all_violations_past_year column
109+
test_data = pd.DataFrame(
110+
{
111+
"opa_id": base_test_data["opa_id"],
112+
# Missing all_violations_past_year column
113+
"open_violations_past_year": [0, 0, 1],
114+
"geometry": base_test_data["geometry"],
115+
}
116+
)
117+
118+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
119+
120+
validator = LIViolationsOutputValidator()
121+
result = validator.validate(gdf, check_stats=False)
122+
123+
# Should fail due to missing all_violations_past_year column
124+
assert not result.success
125+
assert len(result.errors) > 0
126+
127+
# Test with missing open_violations_past_year column
128+
test_data = pd.DataFrame(
129+
{
130+
"opa_id": base_test_data["opa_id"],
131+
"all_violations_past_year": [0, 1, 2],
132+
# Missing open_violations_past_year column
133+
"geometry": base_test_data["geometry"],
134+
}
135+
)
136+
137+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
138+
139+
validator = LIViolationsOutputValidator()
140+
result = validator.validate(gdf, check_stats=False)
141+
142+
# Should fail due to missing open_violations_past_year column
143+
assert not result.success
144+
assert len(result.errors) > 0
145+
146+
147+
def test_li_violations_validator_non_integer_all_violations(base_test_data):
148+
"""Test that the validator catches non-integer all_violations_past_year values."""
149+
test_data = pd.DataFrame(
150+
{
151+
"opa_id": base_test_data["opa_id"],
152+
"all_violations_past_year": [
153+
0,
154+
"maybe",
155+
2,
156+
], # Non-integer value
157+
"open_violations_past_year": [0, 0, 1],
158+
"geometry": base_test_data["geometry"],
159+
}
160+
)
161+
162+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
163+
164+
validator = LIViolationsOutputValidator()
165+
result = validator.validate(gdf, check_stats=False)
166+
167+
# Should fail due to non-integer all_violations_past_year values
168+
assert not result.success
169+
assert len(result.errors) > 0
170+
171+
172+
def test_li_violations_validator_non_integer_open_violations(base_test_data):
173+
"""Test that the validator catches non-integer open_violations_past_year values."""
174+
test_data = pd.DataFrame(
175+
{
176+
"opa_id": base_test_data["opa_id"],
177+
"all_violations_past_year": [0, 1, 2],
178+
"open_violations_past_year": [
179+
0,
180+
0.5,
181+
1,
182+
], # Non-integer value
183+
"geometry": base_test_data["geometry"],
184+
}
185+
)
186+
187+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
188+
189+
validator = LIViolationsOutputValidator()
190+
result = validator.validate(gdf, check_stats=False)
191+
192+
# Should fail due to non-integer open_violations_past_year values
193+
assert not result.success
194+
assert len(result.errors) > 0
195+
196+
197+
def test_li_violations_validator_null_all_violations_values(base_test_data):
198+
"""Test that the validator catches null values in all_violations_past_year column."""
199+
test_data = pd.DataFrame(
200+
{
201+
"opa_id": base_test_data["opa_id"],
202+
"all_violations_past_year": [
203+
0,
204+
None,
205+
2,
206+
], # Null value
207+
"open_violations_past_year": [0, 0, 1],
208+
"geometry": base_test_data["geometry"],
209+
}
210+
)
211+
212+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
213+
214+
validator = LIViolationsOutputValidator()
215+
result = validator.validate(gdf, check_stats=False)
216+
217+
# Should fail due to null values in all_violations_past_year column
218+
assert not result.success
219+
assert len(result.errors) > 0
220+
221+
222+
def test_li_violations_validator_null_open_violations_values(base_test_data):
223+
"""Test that the validator catches null values in open_violations_past_year column."""
224+
test_data = pd.DataFrame(
225+
{
226+
"opa_id": base_test_data["opa_id"],
227+
"all_violations_past_year": [0, 1, 2],
228+
"open_violations_past_year": [
229+
0,
230+
None,
231+
1,
232+
], # Null value
233+
"geometry": base_test_data["geometry"],
234+
}
235+
)
236+
237+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
238+
239+
validator = LIViolationsOutputValidator()
240+
result = validator.validate(gdf, check_stats=False)
241+
242+
# Should fail due to null values in open_violations_past_year column
243+
assert not result.success
244+
assert len(result.errors) > 0
245+
246+
247+
def test_li_violations_validator_negative_values(base_test_data):
248+
"""Test that the validator catches negative values in violation columns."""
249+
# Test with negative all_violations_past_year values
250+
test_data = pd.DataFrame(
251+
{
252+
"opa_id": base_test_data["opa_id"],
253+
"all_violations_past_year": [
254+
0,
255+
-1,
256+
2,
257+
], # Negative value
258+
"open_violations_past_year": [0, 0, 1],
259+
"geometry": base_test_data["geometry"],
260+
}
261+
)
262+
263+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
264+
265+
validator = LIViolationsOutputValidator()
266+
result = validator.validate(gdf, check_stats=False)
267+
268+
# Should fail due to negative all_violations_past_year values
269+
assert not result.success
270+
assert len(result.errors) > 0
271+
272+
# Test with negative open_violations_past_year values
273+
test_data = pd.DataFrame(
274+
{
275+
"opa_id": base_test_data["opa_id"],
276+
"all_violations_past_year": [0, 1, 2],
277+
"open_violations_past_year": [
278+
0,
279+
-1,
280+
1,
281+
], # Negative value
282+
"geometry": base_test_data["geometry"],
283+
}
284+
)
285+
286+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
287+
288+
validator = LIViolationsOutputValidator()
289+
result = validator.validate(gdf, check_stats=False)
290+
291+
# Should fail due to negative open_violations_past_year values
292+
assert not result.success
293+
assert len(result.errors) > 0
294+
295+
296+
def test_li_violations_validator_empty_dataframe(empty_dataframe):
297+
"""Test that the validator handles empty dataframes correctly."""
298+
gdf = gpd.GeoDataFrame(empty_dataframe, geometry="geometry", crs=USE_CRS)
299+
300+
validator = LIViolationsOutputValidator()
301+
result = validator.validate(gdf, check_stats=False)
302+
303+
# Should fail due to missing required columns in empty dataframe
304+
assert not result.success
305+
assert len(result.errors) > 0
306+
307+
308+
def test_li_violations_validator_duplicate_opa_ids(base_test_data):
309+
"""Test that the validator catches duplicate OPA IDs."""
310+
# Create data with duplicate OPA IDs using proper Philadelphia coordinates
311+
test_data = pd.DataFrame(
312+
{
313+
"opa_id": ["351243200", "351243200", "212525650"], # Duplicate OPA ID
314+
"all_violations_past_year": [0, 1, 2],
315+
"open_violations_past_year": [0, 0, 1],
316+
"geometry": [
317+
Point(
318+
2695530.9812315595, 234150.64579590267
319+
), # Philadelphia coordinates in EPSG:2272
320+
Point(
321+
2695530.9812315595, 234150.64579590267
322+
), # Same geometry for duplicate
323+
Point(2718195.202635317, 275457.41980949586),
324+
],
325+
}
326+
)
327+
328+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
329+
330+
validator = LIViolationsOutputValidator()
331+
result = validator.validate(gdf, check_stats=False)
332+
333+
# Should fail due to duplicate OPA IDs
334+
assert not result.success
335+
assert len(result.errors) > 0
336+
337+
338+
def test_li_violations_validator_non_string_opa_id(base_test_data):
339+
"""Test that the validator catches non-string OPA ID values."""
340+
test_data = pd.DataFrame(
341+
{
342+
"opa_id": [
343+
"351243200",
344+
123456789, # Non-string OPA ID
345+
"212525650",
346+
],
347+
"all_violations_past_year": [0, 1, 2],
348+
"open_violations_past_year": [0, 0, 1],
349+
"geometry": base_test_data["geometry"],
350+
}
351+
)
352+
353+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
354+
355+
validator = LIViolationsOutputValidator()
356+
result = validator.validate(gdf, check_stats=False)
357+
358+
# Should fail due to non-string OPA ID values
359+
assert not result.success
360+
assert len(result.errors) > 0
361+
362+
363+
def test_li_violations_validator_null_opa_id(base_test_data):
364+
"""Test that the validator catches null OPA ID values."""
365+
test_data = pd.DataFrame(
366+
{
367+
"opa_id": [
368+
"351243200",
369+
None, # Null OPA ID
370+
"212525650",
371+
],
372+
"all_violations_past_year": [0, 1, 2],
373+
"open_violations_past_year": [0, 0, 1],
374+
"geometry": base_test_data["geometry"],
375+
}
376+
)
377+
378+
gdf = gpd.GeoDataFrame(test_data, geometry="geometry", crs=USE_CRS)
379+
380+
validator = LIViolationsOutputValidator()
381+
result = validator.validate(gdf, check_stats=False)
382+
383+
# Should fail due to null OPA ID values
384+
assert not result.success
385+
assert len(result.errors) > 0

0 commit comments

Comments
 (0)