Skip to content

Commit 5c8d942

Browse files
authored
Added tests, before/after for time and date, and better regex for GPS (#15)
1 parent fdcec09 commit 5c8d942

File tree

3 files changed

+151
-14
lines changed

3 files changed

+151
-14
lines changed

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ In all cases, you will need to at least include a list of validators and associa
108108
* *ignore_space* (Default False): whether to trim the values for spaces before checking validity
109109
* *ignore_case* (Default False): whether to ignore the case
110110

111-
The last 3 parameters will affect all the validators (when relevant), but can be overriden at the validator level (eg, you can set 'empty_ok' to True for all, but set it to False for a specific validator).
111+
The last 3 parameters will affect all the validators (when relevant), but can be overriden at the validator level (eg, you can set 'empty_ok' to True for all, but set it to False for a specific validator).
112112

113113

114114
## Python format
@@ -165,13 +165,17 @@ All validators (except NoValidator) have the 'empty_ok' option, which will consi
165165
* *valid_values*: Dict with the *linked_column* values as keys, and list of valid values as values
166166
* Ex: {"Test": ['1', '2'], "Test2": ['3', '4']}
167167
* EmailValidator(empty_ok=False)
168-
* DateValidator(day_first=True, empty_ok=False)
169-
* Validate that a value is a date.
168+
* DateValidator(day_first=True, empty_ok=False, before=None, after=None)
169+
* Validate that a value is a date.
170170
* *day_first* (Default True): Whether to consider the day as the first part of the date for ambiguous values.
171-
* TimeValidator(empty_ok=False)
171+
* *before* Latest date allowed
172+
* *after*: Earliest date allowed
173+
* TimeValidator(empty_ok=False, before=None, after=None)
172174
* Validate that a value is a time of the day
175+
* *before* Latest value allowed
176+
* *after*: Earliest value allowed
173177
* UniqueValidator(unique_with=[], empty_ok=False)
174-
* Validate that a column has only unique values.
178+
* Validate that a column has only unique values.
175179
* *unique_with*: List of column names if you need a tuple of column values to be unique.
176180
* Ex: *I want the tuple (value of column A, value of column B) to be unique*
177181
* OntologyValidator(ontology, root_term="", empty_ok=False)
@@ -195,5 +199,5 @@ All validators (except NoValidator) have the 'empty_ok' option, which will consi
195199
* RegexValidator(regex, excel_formulat="", empty_ok=False)
196200
* Validate that a term match a specific regex
197201
* **No in-file validation generated** *unless using excel_formula*
198-
* *excel_formula*: Custom rules for in-file validation. [Examples here](http://www.contextures.com/xlDataVal07.html).
202+
* *excel_formula*: Custom rules for in-file validation. [Examples here](http://www.contextures.com/xlDataVal07.html).
199203
* "{CNAME}" will be replaced by the appropriate column name

checkcel/validators.py

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,25 @@ def describe(self, column_name):
314314
class DateValidator(Validator):
315315
""" Validates that a field is a Date """
316316

317-
def __init__(self, day_first=True, **kwargs):
317+
def __init__(self, day_first=True, before=None, after=None, **kwargs):
318318
super(DateValidator, self).__init__(**kwargs)
319319
self.day_first = day_first
320320

321+
if before:
322+
try:
323+
parser.parse(before).date()
324+
except parser.ParserError as e:
325+
raise BadValidatorException(e)
326+
327+
if after:
328+
try:
329+
parser.parse(after).date()
330+
except parser.ParserError as e:
331+
raise BadValidatorException(e)
332+
333+
self.before = before
334+
self.after = after
335+
321336
def validate(self, field, row_number, row={}):
322337
if self.ignore_space:
323338
field = field.strip()
@@ -326,7 +341,17 @@ def validate(self, field, row_number, row={}):
326341
if field or not self.empty_ok:
327342
# Pandas auto convert fields into dates (ignoring the parse_dates=False)
328343
field = str(field)
329-
parser.parse(field, dayfirst=self.day_first).date()
344+
date = parser.parse(field, dayfirst=self.day_first).date()
345+
346+
if self.before and not date < parser.parse(self.before, dayfirst=self.day_first).date():
347+
self.invalid_dict["invalid_set"].add(field)
348+
self.invalid_dict["invalid_rows"].add(row_number)
349+
raise ValidationException("Value {} is not before {}".format(field, self.before))
350+
351+
if self.after and not date > parser.parse(self.after, dayfirst=self.day_first).date():
352+
self.invalid_dict["invalid_set"].add(field)
353+
self.invalid_dict["invalid_rows"].add(row_number)
354+
raise ValidationException("Value {} is not after {}".format(field, self.after))
330355

331356
except parser.ParserError as e:
332357
self.invalid_dict["invalid_set"].add(field)
@@ -339,28 +364,76 @@ def bad(self):
339364

340365
def generate(self, column, additional_column=None, additional_worksheet=None):
341366
# GreaterThanOrEqual for validity with ODS.
342-
dv = DataValidation(type="date", formula1='01/01/1900', operator='greaterThanOrEqual')
367+
params = {"type": "date"}
368+
if (self.before is not None and self.after is not None):
369+
params["formula1"] = parser.parse(self.after).strftime("%Y/%m/%d")
370+
params["formula2"] = parser.parse(self.before).strftime("%Y/%m/%d")
371+
params["operator"] = "between"
372+
elif self.before is not None:
373+
params["formula1"] = parser.parse(self.before).strftime("%Y/%m/%d")
374+
params["operator"] = "lessThanOrEqual"
375+
elif self.after is not None:
376+
params["formula1"] = parser.parse(self.after).strftime("%Y/%m/%d")
377+
params["operator"] = "greaterThanOrEqual"
378+
379+
dv = DataValidation(**params)
343380
dv.add("{}2:{}1048576".format(column, column))
344381
return dv
345382

346383
def describe(self, column_name):
347-
return "{} : Date {}".format(column_name, "(required)" if not self.empty_ok else "")
384+
text = "{} : Date".format(column_name)
385+
if (self.after is not None and self.before is not None):
386+
text += " ({} - {})".format(self.after, self.before)
387+
elif self.after is not None:
388+
text += " >= {}".format(self.after)
389+
elif self.before is not None:
390+
text += " <= {}".format(self.before)
391+
392+
if not self.empty_ok:
393+
text += " (required)"
394+
395+
return text
348396

349397

350398
class TimeValidator(Validator):
351399
""" Validates that a field is a Time """
352400

353-
def __init__(self, **kwargs):
401+
def __init__(self, before=None, after=None, **kwargs):
354402
super(TimeValidator, self).__init__(**kwargs)
355403

404+
if before:
405+
try:
406+
parser.parse(before).time()
407+
except parser.ParserError as e:
408+
raise BadValidatorException(e)
409+
410+
if after:
411+
try:
412+
parser.parse(after).time()
413+
except parser.ParserError as e:
414+
raise BadValidatorException(e)
415+
416+
self.before = before
417+
self.after = after
418+
356419
def validate(self, field, row_number, row={}):
357420
if self.ignore_space:
358421
field = field.strip()
359422
try:
360423
if field or not self.empty_ok:
361424
# Pandas auto convert fields into dates (ignoring the parse_dates=False)
362425
field = str(field)
363-
parser.parse(field).time()
426+
time = parser.parse(field).time()
427+
428+
if self.before and not time < parser.parse(self.before).time():
429+
self.invalid_dict["invalid_set"].add(field)
430+
self.invalid_dict["invalid_rows"].add(row_number)
431+
raise ValidationException("Value {} is not before {}".format(field, self.before))
432+
433+
if self.after and not time > parser.parse(self.after).time():
434+
self.invalid_dict["invalid_set"].add(field)
435+
self.invalid_dict["invalid_rows"].add(row_number)
436+
raise ValidationException("Value {} is not after {}".format(field, self.after))
364437

365438
except parser.ParserError as e:
366439
self.invalid_dict["invalid_set"].add(field)
@@ -373,12 +446,36 @@ def bad(self):
373446

374447
def generate(self, column, additional_column=None, additional_worksheet=None):
375448
# GreaterThanOrEqual for validity with ODS.
376-
dv = DataValidation(type="time")
449+
450+
params = {"type": "time"}
451+
if (self.before is not None and self.after is not None):
452+
params["formula1"] = parser.parse(self.after).strftime("%H:%M:%S")
453+
params["formula2"] = parser.parse(self.before).strftime("%H:%M:%S")
454+
params["operator"] = "between"
455+
elif self.before is not None:
456+
params["formula1"] = parser.parse(self.before).strftime("%H:%M:%S")
457+
params["operator"] = "lessThanOrEqual"
458+
elif self.after is not None:
459+
params["formula1"] = parser.parse(self.after).strftime("%H:%M:%S")
460+
params["operator"] = "greaterThanOrEqual"
461+
462+
dv = DataValidation(**params)
377463
dv.add("{}2:{}1048576".format(column, column))
378464
return dv
379465

380466
def describe(self, column_name):
381-
return "{} : Time {}".format(column_name, "(required)" if not self.empty_ok else "")
467+
text = "{} : Time".format(column_name)
468+
if (self.after is not None and self.before is not None):
469+
text += " ({} - {})".format(self.after, self.before)
470+
elif self.after is not None:
471+
text += " >= {}".format(self.after)
472+
elif self.before is not None:
473+
text += " <= {}".format(self.before)
474+
475+
if not self.empty_ok:
476+
text += " (required)"
477+
478+
return text
382479

383480

384481
class EmailValidator(Validator):

tests/test_validate.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,24 @@ def test_invalid(self):
209209
assert val is False
210210
assert len(validation.failures['my_column']) == 2
211211

212+
def test_invalid_before(self):
213+
data = {'my_column': ['01/01/2000', '10/10/2010']}
214+
validators = {'my_column': DateValidator(before="05/05/2005")}
215+
df = pd.DataFrame.from_dict(data)
216+
validation = Checkcel(data=df, empty_ok=False, validators=validators)
217+
val = validation.validate()
218+
assert val is False
219+
assert len(validation.failures['my_column']) == 1
220+
221+
def test_invalid_after(self):
222+
data = {'my_column': ['01/01/2000', '10/10/2010']}
223+
validators = {'my_column': DateValidator(after="05/05/2005")}
224+
df = pd.DataFrame.from_dict(data)
225+
validation = Checkcel(data=df, empty_ok=False, validators=validators)
226+
val = validation.validate()
227+
assert val is False
228+
assert len(validation.failures['my_column']) == 1
229+
212230
def test_invalid_empty(self):
213231
data = {'my_column': ['01/01/1970', '']}
214232
validators = {'my_column': DateValidator()}
@@ -244,6 +262,24 @@ def test_invalid(self):
244262
assert val is False
245263
assert len(validation.failures['my_column']) == 2
246264

265+
def test_invalid_before(self):
266+
data = {'my_column': ['14h23', '16h30']}
267+
validators = {'my_column': TimeValidator(before="15h00")}
268+
df = pd.DataFrame.from_dict(data)
269+
validation = Checkcel(data=df, empty_ok=False, validators=validators)
270+
val = validation.validate()
271+
assert val is False
272+
assert len(validation.failures['my_column']) == 1
273+
274+
def test_invalid_after(self):
275+
data = {'my_column': ['14h23', '16h30']}
276+
validators = {'my_column': TimeValidator(after="15h00")}
277+
df = pd.DataFrame.from_dict(data)
278+
validation = Checkcel(data=df, empty_ok=False, validators=validators)
279+
val = validation.validate()
280+
assert val is False
281+
assert len(validation.failures['my_column']) == 1
282+
247283
def test_invalid_empty(self):
248284
data = {'my_column': ['13h10', '']}
249285
validators = {'my_column': TimeValidator()}

0 commit comments

Comments
 (0)