Skip to content

Commit 2384fde

Browse files
authored
Merge pull request #110 from posit-dev/fix-validations-to-work-with-datetime
fix: allow validations to work with dates/datetimes
2 parents ea6e7c4 + f1e1373 commit 2384fde

File tree

3 files changed

+223
-8
lines changed

3 files changed

+223
-8
lines changed

pointblank/_constants.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
]
88

99
COMPATIBLE_DTYPES = {
10-
"gt": ["numeric"],
11-
"lt": ["numeric"],
12-
"eq": ["numeric"],
13-
"ne": ["numeric"],
14-
"ge": ["numeric"],
15-
"le": ["numeric"],
10+
"gt": ["numeric", "datetime"],
11+
"lt": ["numeric", "datetime"],
12+
"eq": ["numeric", "datetime"],
13+
"ne": ["numeric", "datetime"],
14+
"ge": ["numeric", "datetime"],
15+
"le": ["numeric", "datetime"],
1616
"between": ["numeric"],
1717
"outside": ["numeric"],
1818
"in_set": ["numeric", "str"],

pointblank/validate.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7987,14 +7987,19 @@ def _prep_column_text(column: list[str]) -> str:
79877987

79887988

79897989
def _prep_values_text(
7990-
values: str | int | float | list[str | int | float],
7990+
values: str
7991+
| int
7992+
| float
7993+
| datetime.datetime
7994+
| datetime.date
7995+
| list[str | int | float | datetime.datetime | datetime.date],
79917996
lang: str,
79927997
limit: int = 3,
79937998
) -> str:
79947999
if isinstance(values, ColumnLiteral):
79958000
return f"`{values}`"
79968001

7997-
if isinstance(values, (str, int, float)):
8002+
if isinstance(values, (str, int, float, datetime.datetime, datetime.date)):
79988003
values = [values]
79998004

80008005
length_values = len(values)
@@ -8005,6 +8010,14 @@ def _prep_values_text(
80058010
if length_values > limit:
80068011
num_omitted = length_values - limit
80078012

8013+
# Format datetime objects as strings if present
8014+
formatted_values = []
8015+
for value in values[:limit]:
8016+
if isinstance(value, (datetime.datetime, datetime.date)):
8017+
formatted_values.append(f"`{value.isoformat()}`")
8018+
else:
8019+
formatted_values.append(f"`{value}`")
8020+
80088021
values_str = ", ".join([f"`{value}`" for value in values[:limit]])
80098022

80108023
additional_text = EXPECT_FAIL_TEXT["values_text"][lang]
@@ -8014,6 +8027,14 @@ def _prep_values_text(
80148027
values_str = f"{values_str}, {additional_str}"
80158028

80168029
else:
8030+
# Format datetime objects as strings if present
8031+
formatted_values = []
8032+
for value in values:
8033+
if isinstance(value, (datetime.datetime, datetime.date)):
8034+
formatted_values.append(f"`{value.isoformat()}`")
8035+
else:
8036+
formatted_values.append(f"`{value}`")
8037+
80178038
values_str = ", ".join([f"`{value}`" for value in values])
80188039

80198040
return values_str

tests/test_validate.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@
8282
"tbl_dates_times_text_sqlite",
8383
]
8484

85+
TBL_TRUE_DATES_TIMES_LIST = [
86+
"tbl_true_dates_times_pd",
87+
"tbl_true_dates_times_pl",
88+
]
89+
8590

8691
@pytest.fixture
8792
def tbl_pd():
@@ -104,6 +109,18 @@ def tbl_dates_times_text_pd():
104109
)
105110

106111

112+
@pytest.fixture
113+
def tbl_true_dates_times_pd():
114+
return pd.DataFrame(
115+
{
116+
"date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
117+
"date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
118+
"dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
119+
"dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
120+
}
121+
)
122+
123+
107124
@pytest.fixture
108125
def tbl_pl():
109126
return pl.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7], "z": [8, 8, 8, 8]})
@@ -125,6 +142,27 @@ def tbl_dates_times_text_pl():
125142
)
126143

127144

145+
@pytest.fixture
146+
def tbl_true_dates_times_pl():
147+
pl_df = pl.DataFrame(
148+
{
149+
"date_1": ["2021-01-01", "2021-02-01"],
150+
"date_2": ["2021-02-01", "2021-03-01"],
151+
"dttm_1": ["2021-01-01 02:30:00", "2021-02-01 02:30:00"],
152+
"dttm_2": ["2021-02-01 03:30:00", "2021-03-01 03:30:00"],
153+
}
154+
)
155+
156+
return pl_df.with_columns(
157+
[
158+
pl.col("date_1").str.to_date(),
159+
pl.col("date_2").str.to_date(),
160+
pl.col("dttm_1").str.to_datetime(),
161+
pl.col("dttm_2").str.to_datetime(),
162+
]
163+
)
164+
165+
128166
@pytest.fixture
129167
def tbl_parquet():
130168
file_path = pathlib.Path.cwd() / "tests" / "tbl_files" / "tbl_xyz.parquet"
@@ -4065,6 +4103,162 @@ def test_col_schema_match_columns_only():
40654103
)
40664104

40674105

4106+
@pytest.mark.parametrize("tbl_fixture", TBL_TRUE_DATES_TIMES_LIST)
4107+
def test_date_validation_across_cols(request, tbl_fixture):
4108+
# {
4109+
# "date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
4110+
# "date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
4111+
# "dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
4112+
# "dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
4113+
# }
4114+
4115+
tbl = request.getfixturevalue(tbl_fixture)
4116+
4117+
assert (
4118+
Validate(data=tbl)
4119+
.col_vals_gt(
4120+
columns="date_2",
4121+
value=col("date_1"),
4122+
)
4123+
.interrogate()
4124+
.n_passed(i=1, scalar=True)
4125+
== 2
4126+
)
4127+
4128+
assert (
4129+
Validate(data=tbl)
4130+
.col_vals_ge(
4131+
columns="date_2",
4132+
value=col("date_1"),
4133+
)
4134+
.interrogate()
4135+
.n_passed(i=1, scalar=True)
4136+
== 2
4137+
)
4138+
4139+
assert (
4140+
Validate(data=tbl)
4141+
.col_vals_eq(
4142+
columns="date_2",
4143+
value=col("date_1"),
4144+
)
4145+
.interrogate()
4146+
.n_passed(i=1, scalar=True)
4147+
== 0
4148+
)
4149+
4150+
assert (
4151+
Validate(data=tbl)
4152+
.col_vals_ne(
4153+
columns="date_2",
4154+
value=col("date_1"),
4155+
)
4156+
.interrogate()
4157+
.n_passed(i=1, scalar=True)
4158+
== 2
4159+
)
4160+
4161+
assert (
4162+
Validate(data=tbl)
4163+
.col_vals_lt(
4164+
columns="date_1",
4165+
value=col("date_2"),
4166+
)
4167+
.interrogate()
4168+
.n_passed(i=1, scalar=True)
4169+
== 2
4170+
)
4171+
4172+
assert (
4173+
Validate(data=tbl)
4174+
.col_vals_le(
4175+
columns="date_1",
4176+
value=col("date_2"),
4177+
)
4178+
.interrogate()
4179+
.n_passed(i=1, scalar=True)
4180+
== 2
4181+
)
4182+
4183+
4184+
@pytest.mark.parametrize("tbl_fixture", TBL_TRUE_DATES_TIMES_LIST)
4185+
def test_datetime_validation_across_cols(request, tbl_fixture):
4186+
# {
4187+
# "date_1": pd.to_datetime(["2021-01-01", "2021-02-01"]),
4188+
# "date_2": pd.to_datetime(["2021-02-01", "2021-03-01"]),
4189+
# "dttm_1": pd.to_datetime(["2021-01-01 02:30:00", "2021-02-01 02:30:00"]),
4190+
# "dttm_2": pd.to_datetime(["2021-02-01 03:30:00", "2021-03-01 03:30:00"]),
4191+
# }
4192+
4193+
tbl = request.getfixturevalue(tbl_fixture)
4194+
4195+
assert (
4196+
Validate(data=tbl)
4197+
.col_vals_gt(
4198+
columns="dttm_2",
4199+
value=col("dttm_1"),
4200+
)
4201+
.interrogate()
4202+
.n_passed(i=1, scalar=True)
4203+
== 2
4204+
)
4205+
4206+
assert (
4207+
Validate(data=tbl)
4208+
.col_vals_ge(
4209+
columns="dttm_2",
4210+
value=col("dttm_1"),
4211+
)
4212+
.interrogate()
4213+
.n_passed(i=1, scalar=True)
4214+
== 2
4215+
)
4216+
4217+
assert (
4218+
Validate(data=tbl)
4219+
.col_vals_eq(
4220+
columns="dttm_2",
4221+
value=col("dttm_1"),
4222+
)
4223+
.interrogate()
4224+
.n_passed(i=1, scalar=True)
4225+
== 0
4226+
)
4227+
4228+
assert (
4229+
Validate(data=tbl)
4230+
.col_vals_ne(
4231+
columns="dttm_2",
4232+
value=col("dttm_1"),
4233+
)
4234+
.interrogate()
4235+
.n_passed(i=1, scalar=True)
4236+
== 2
4237+
)
4238+
4239+
assert (
4240+
Validate(data=tbl)
4241+
.col_vals_lt(
4242+
columns="dttm_1",
4243+
value=col("dttm_2"),
4244+
)
4245+
.interrogate()
4246+
.n_passed(i=1, scalar=True)
4247+
== 2
4248+
)
4249+
4250+
assert (
4251+
Validate(data=tbl)
4252+
.col_vals_le(
4253+
columns="dttm_1",
4254+
value=col("dttm_2"),
4255+
)
4256+
.interrogate()
4257+
.n_passed(i=1, scalar=True)
4258+
== 2
4259+
)
4260+
4261+
40684262
@pytest.mark.parametrize(
40694263
"tbl_fixture", ["tbl_pd_variable_names", "tbl_pl_variable_names", "tbl_memtable_variable_names"]
40704264
)

0 commit comments

Comments
 (0)