Skip to content

Commit f1e1373

Browse files
committed
Add col-col validation tests for some validation methods
1 parent 8c9741a commit f1e1373

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed

tests/test_validate.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@
8282
"tbl_dates_times_text_sqlite",
8383
]
8484

85+
TBL_TRUE_DATES_TIMES_LIST = [
86+
"tbl_true_dates_times_pd",
87+
"tbl_true_dates_times_pl",
88+
]
89+
8590

8691
@pytest.fixture
8792
def tbl_pd():
@@ -104,6 +109,18 @@ def tbl_dates_times_text_pd():
104109
)
105110

106111

112+
@pytest.fixture
113+
def tbl_true_dates_times_pd():
114+
return pd.DataFrame(
115+
{
116+
"date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
117+
"date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
118+
"dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
119+
"dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
120+
}
121+
)
122+
123+
107124
@pytest.fixture
108125
def tbl_pl():
109126
return pl.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7], "z": [8, 8, 8, 8]})
@@ -125,6 +142,27 @@ def tbl_dates_times_text_pl():
125142
)
126143

127144

145+
@pytest.fixture
146+
def tbl_true_dates_times_pl():
147+
pl_df = pl.DataFrame(
148+
{
149+
"date_1": ["2021-01-01", "2021-02-01"],
150+
"date_2": ["2021-02-01", "2021-03-01"],
151+
"dttm_1": ["2021-01-01 02:30:00", "2021-02-01 02:30:00"],
152+
"dttm_2": ["2021-02-01 03:30:00", "2021-03-01 03:30:00"],
153+
}
154+
)
155+
156+
return pl_df.with_columns(
157+
[
158+
pl.col("date_1").str.to_date(),
159+
pl.col("date_2").str.to_date(),
160+
pl.col("dttm_1").str.to_datetime(),
161+
pl.col("dttm_2").str.to_datetime(),
162+
]
163+
)
164+
165+
128166
@pytest.fixture
129167
def tbl_parquet():
130168
file_path = pathlib.Path.cwd() / "tests" / "tbl_files" / "tbl_xyz.parquet"
@@ -4065,6 +4103,162 @@ def test_col_schema_match_columns_only():
40654103
)
40664104

40674105

4106+
@pytest.mark.parametrize("tbl_fixture", TBL_TRUE_DATES_TIMES_LIST)
4107+
def test_date_validation_across_cols(request, tbl_fixture):
4108+
# {
4109+
# "date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
4110+
# "date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
4111+
# "dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
4112+
# "dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
4113+
# }
4114+
4115+
tbl = request.getfixturevalue(tbl_fixture)
4116+
4117+
assert (
4118+
Validate(data=tbl)
4119+
.col_vals_gt(
4120+
columns="date_2",
4121+
value=col("date_1"),
4122+
)
4123+
.interrogate()
4124+
.n_passed(i=1, scalar=True)
4125+
== 2
4126+
)
4127+
4128+
assert (
4129+
Validate(data=tbl)
4130+
.col_vals_ge(
4131+
columns="date_2",
4132+
value=col("date_1"),
4133+
)
4134+
.interrogate()
4135+
.n_passed(i=1, scalar=True)
4136+
== 2
4137+
)
4138+
4139+
assert (
4140+
Validate(data=tbl)
4141+
.col_vals_eq(
4142+
columns="date_2",
4143+
value=col("date_1"),
4144+
)
4145+
.interrogate()
4146+
.n_passed(i=1, scalar=True)
4147+
== 0
4148+
)
4149+
4150+
assert (
4151+
Validate(data=tbl)
4152+
.col_vals_ne(
4153+
columns="date_2",
4154+
value=col("date_1"),
4155+
)
4156+
.interrogate()
4157+
.n_passed(i=1, scalar=True)
4158+
== 2
4159+
)
4160+
4161+
assert (
4162+
Validate(data=tbl)
4163+
.col_vals_lt(
4164+
columns="date_1",
4165+
value=col("date_2"),
4166+
)
4167+
.interrogate()
4168+
.n_passed(i=1, scalar=True)
4169+
== 2
4170+
)
4171+
4172+
assert (
4173+
Validate(data=tbl)
4174+
.col_vals_le(
4175+
columns="date_1",
4176+
value=col("date_2"),
4177+
)
4178+
.interrogate()
4179+
.n_passed(i=1, scalar=True)
4180+
== 2
4181+
)
4182+
4183+
4184+
@pytest.mark.parametrize("tbl_fixture", TBL_TRUE_DATES_TIMES_LIST)
4185+
def test_datetime_validation_across_cols(request, tbl_fixture):
4186+
# {
4187+
# "date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
4188+
# "date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
4189+
# "dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
4190+
# "dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
4191+
# }
4192+
4193+
tbl = request.getfixturevalue(tbl_fixture)
4194+
4195+
assert (
4196+
Validate(data=tbl)
4197+
.col_vals_gt(
4198+
columns="dttm_2",
4199+
value=col("dttm_1"),
4200+
)
4201+
.interrogate()
4202+
.n_passed(i=1, scalar=True)
4203+
== 2
4204+
)
4205+
4206+
assert (
4207+
Validate(data=tbl)
4208+
.col_vals_ge(
4209+
columns="dttm_2",
4210+
value=col("dttm_1"),
4211+
)
4212+
.interrogate()
4213+
.n_passed(i=1, scalar=True)
4214+
== 2
4215+
)
4216+
4217+
assert (
4218+
Validate(data=tbl)
4219+
.col_vals_eq(
4220+
columns="dttm_2",
4221+
value=col("dttm_1"),
4222+
)
4223+
.interrogate()
4224+
.n_passed(i=1, scalar=True)
4225+
== 0
4226+
)
4227+
4228+
assert (
4229+
Validate(data=tbl)
4230+
.col_vals_ne(
4231+
columns="dttm_2",
4232+
value=col("dttm_1"),
4233+
)
4234+
.interrogate()
4235+
.n_passed(i=1, scalar=True)
4236+
== 2
4237+
)
4238+
4239+
assert (
4240+
Validate(data=tbl)
4241+
.col_vals_lt(
4242+
columns="dttm_1",
4243+
value=col("dttm_2"),
4244+
)
4245+
.interrogate()
4246+
.n_passed(i=1, scalar=True)
4247+
== 2
4248+
)
4249+
4250+
assert (
4251+
Validate(data=tbl)
4252+
.col_vals_le(
4253+
columns="dttm_1",
4254+
value=col("dttm_2"),
4255+
)
4256+
.interrogate()
4257+
.n_passed(i=1, scalar=True)
4258+
== 2
4259+
)
4260+
4261+
40684262
@pytest.mark.parametrize(
40694263
"tbl_fixture", ["tbl_pd_variable_names", "tbl_pl_variable_names", "tbl_memtable_variable_names"]
40704264
)

0 commit comments

Comments
 (0)