|
3 | 3 | Generic odML validation framework.
|
4 | 4 | """
|
5 | 5 |
|
| 6 | +import re |
6 | 7 | from . import dtypes
|
7 | 8 |
|
8 | 9 | LABEL_ERROR = 'error'
|
@@ -386,3 +387,56 @@ def property_values_check(prop):
|
386 | 387 |
|
387 | 388 |
|
388 | 389 | Validation.register_handler('property', property_values_check)
|
| 390 | + |
| 391 | + |
| 392 | +def property_values_string_check(prop): |
| 393 | + """ |
| 394 | + PROTOTYPE |
| 395 | +
|
| 396 | + Tests whether values with dtype "string" are maybe of different dtype. |
| 397 | +
|
| 398 | + :param prop: property the validation is applied on. |
| 399 | + """ |
| 400 | + |
| 401 | + if prop.dtype != "string" or not prop.values: |
| 402 | + return |
| 403 | + |
| 404 | + dtype_checks = { |
| 405 | + 'int': r'^(-+)?\d+$', |
| 406 | + 'date': r'^\d{2,4}-\d{1,2}-\d{1,2}$', |
| 407 | + 'datetime': r'^\d{2,4}-\d{1,2}-\d{1,2} \d{2}:\d{2}(:\d{2})?$', |
| 408 | + 'time': r'^\d{2}:\d{2}(:\d{2})?$', |
| 409 | + 'float': r'^(-+)?\d+\.\d+$', |
| 410 | + 'tuple': r'^\((.*?)\)', |
| 411 | + 'boolean': r'^TRUE|FALSE|True|False|t|f+$', |
| 412 | + 'text': r'[\r\n]'} |
| 413 | + |
| 414 | + val_dtypes = [] |
| 415 | + |
| 416 | + for val in prop.values: |
| 417 | + curr_dtype = "string" |
| 418 | + |
| 419 | + for check_dtype in dtype_checks.items(): |
| 420 | + if bool(re.compile(check_dtype[1]).match(val.strip())): |
| 421 | + if check_dtype[0] == "tuple" and val.count(';') > 0: |
| 422 | + curr_dtype = str(val.count(';') + 1) + "-" + check_dtype[0] |
| 423 | + else: |
| 424 | + curr_dtype = check_dtype[0] |
| 425 | + break |
| 426 | + if check_dtype[0] == "text" and len(re.findall(check_dtype[1], val.strip())) > 0: |
| 427 | + curr_dtype = check_dtype[0] |
| 428 | + break |
| 429 | + |
| 430 | + val_dtypes += [curr_dtype] |
| 431 | + |
| 432 | + res_dtype = max(set(val_dtypes), key=val_dtypes.count) |
| 433 | + |
| 434 | + if len(set(val_dtypes)) > 1: |
| 435 | + res_dtype = "string" |
| 436 | + |
| 437 | + if res_dtype != "string": |
| 438 | + msg = 'Dtype of property "%s" currently is "string", but might fit dtype "%s"!' % (prop.name, res_dtype) |
| 439 | + yield ValidationError(prop, msg, LABEL_WARNING) |
| 440 | + |
| 441 | + |
| 442 | +Validation.register_handler('property', property_values_string_check) |
0 commit comments