Skip to content

Commit bc27f9b

Browse files
Mv validate methods & soft deprecate Validator
1 parent f20aab1 commit bc27f9b

File tree

5 files changed

+190
-234
lines changed

5 files changed

+190
-234
lines changed

frictionless/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,6 @@
4040
from .table import Lookup as Lookup
4141
from .table import Row as Row
4242
from .transformer import Transformer as Transformer
43+
44+
# Deprecated
4345
from .validator import Validator as Validator

frictionless/package/package.py

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
from __future__ import annotations
22

3+
from multiprocessing import Pool
34
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Union
45

56
import attrs
67
from typing_extensions import Self
78

89
from .. import errors, fields, helpers, settings
10+
from ..checklist import Checklist
911
from ..exception import FrictionlessException
1012
from ..metadata import Metadata
1113
from ..platform import platform
14+
from ..report import Report
1215
from ..resource import Resource
1316
from ..system import system
1417
from ..transformer import Transformer
15-
from ..validator import Validator
1618
from .factory import Factory
1719

1820
if TYPE_CHECKING:
1921
from .. import types
2022
from ..catalog import Dataset
21-
from ..checklist import Checklist
2223
from ..detector import Detector
2324
from ..dialect import Control, Dialect
2425
from ..indexer import IOnProgress, IOnRow
@@ -483,14 +484,54 @@ def validate(
483484
Report: validation report
484485
485486
"""
486-
validator = Validator()
487-
return validator.validate_package(
488-
self,
489-
checklist=checklist,
490-
name=name,
491-
parallel=parallel,
492-
limit_rows=limit_rows,
493-
limit_errors=limit_errors,
487+
# Create state
488+
timer = helpers.Timer()
489+
reports: List[Report] = []
490+
resources = self.resources if name is None else [self.get_resource(name)]
491+
with_foreign_keys = any(
492+
res.schema and res.schema.foreign_keys for res in resources
493+
)
494+
495+
# Prepare checklist
496+
checklist = checklist or Checklist()
497+
498+
# Validate metadata
499+
try:
500+
self.to_descriptor(validate=True)
501+
except FrictionlessException as exception:
502+
return Report.from_validation(time=timer.time, errors=exception.to_errors())
503+
504+
# Validate sequential
505+
if not parallel or with_foreign_keys:
506+
for resource in resources:
507+
report = resource.validate(
508+
checklist=checklist,
509+
limit_errors=limit_errors,
510+
limit_rows=limit_rows,
511+
)
512+
reports.append(report)
513+
514+
# Validate parallel
515+
else:
516+
with Pool() as pool:
517+
options_pool: List[Dict[str, Any]] = []
518+
for resource in resources:
519+
options: Any = {}
520+
options["resource"] = {}
521+
options["resource"]["descriptor"] = resource.to_descriptor()
522+
options["resource"]["basepath"] = resource.basepath
523+
options["validate"] = {}
524+
options["validate"]["limit_rows"] = limit_rows
525+
options["validate"]["limit_errors"] = limit_errors
526+
options_pool.append(options)
527+
report_descriptors = pool.map(_validate_parallel, options_pool)
528+
for report_descriptor in report_descriptors:
529+
reports.append(Report.from_descriptor(report_descriptor))
530+
531+
# Return report
532+
return Report.from_validation_reports(
533+
time=timer.time,
534+
reports=reports,
494535
)
495536

496537
# Convert
@@ -707,3 +748,11 @@ def metadata_export(self): # type: ignore
707748
# descriptor = {"$frictionless": "package/v2", **descriptor}
708749

709750
return descriptor
751+
752+
753+
def _validate_parallel(options: types.IDescriptor) -> types.IDescriptor:
754+
resource_options = options["resource"]
755+
validate_options = options["validate"]
756+
resource = Resource.from_descriptor(**resource_options)
757+
report = resource.validate(**validate_options)
758+
return report.to_descriptor()

frictionless/resource/resource.py

Lines changed: 109 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,22 @@
88
from typing_extensions import Self
99

1010
from .. import errors, fields, helpers, settings
11+
from ..checklist import Checklist
1112
from ..detector import Detector
1213
from ..dialect import Control, Dialect
1314
from ..exception import FrictionlessException
1415
from ..metadata import Metadata
1516
from ..platform import platform
17+
from ..report import Report
1618
from ..schema import Schema
1719
from ..system import system
18-
from ..validator import Validator
1920
from .factory import Factory
2021
from .stats import ResourceStats
2122

2223
if TYPE_CHECKING:
2324
from .. import types
24-
from ..checklist import Checklist
25+
from ..error import Error
2526
from ..package import Package
26-
from ..report import Report
2727
from ..system import Loader
2828

2929

@@ -619,8 +619,112 @@ def validate(
619619
Report: validation report
620620
621621
"""
622-
validator = Validator()
623-
return validator.validate_resource(self, checklist=checklist)
622+
# Create state
623+
partial = False
624+
timer = helpers.Timer()
625+
labels: List[str] = []
626+
errors: List[Error] = []
627+
warnings: List[str] = []
628+
629+
# Prepare checklist
630+
checklist = checklist or Checklist()
631+
checks = checklist.connect(self)
632+
633+
# Validate metadata
634+
try:
635+
self.to_descriptor(validate=True)
636+
except FrictionlessException as exception:
637+
return Report.from_validation_task(
638+
self, time=timer.time, errors=exception.to_errors()
639+
)
640+
641+
# TODO: remove in next version
642+
# Ignore not-supported hashings
643+
if self.hash:
644+
algorithm, _ = helpers.parse_resource_hash_v1(self.hash)
645+
if algorithm not in ["md5", "sha256"]:
646+
warning = "hash is ignored; supported algorithms: md5/sha256"
647+
warnings.append(warning)
648+
649+
# Prepare resource
650+
if self.closed:
651+
try:
652+
self.open()
653+
except FrictionlessException as exception:
654+
self.close()
655+
return Report.from_validation_task(
656+
self, time=timer.time, errors=exception.to_errors()
657+
)
658+
659+
# Validate data
660+
with self:
661+
# Validate start
662+
for index, check in enumerate(checks):
663+
for error in check.validate_start():
664+
if error.type == "check-error":
665+
del checks[index]
666+
if checklist.match(error):
667+
errors.append(error)
668+
669+
# Validate file
670+
if not isinstance(self, platform.frictionless_resources.TableResource):
671+
if self.hash is not None or self.bytes is not None:
672+
helpers.pass_through(self.byte_stream)
673+
674+
# Validate table
675+
else:
676+
row_count = 0
677+
labels = self.labels
678+
while True:
679+
row_count += 1
680+
681+
# Emit row
682+
try:
683+
row = next(self.row_stream) # type: ignore
684+
except FrictionlessException as exception:
685+
errors.append(exception.error)
686+
continue
687+
except StopIteration:
688+
break
689+
690+
# Validate row
691+
for check in checks:
692+
for error in check.validate_row(row):
693+
if checklist.match(error):
694+
errors.append(error)
695+
696+
# Callback row
697+
if on_row:
698+
on_row(row)
699+
700+
# Limit rows
701+
if limit_rows:
702+
if row_count >= limit_rows:
703+
warning = f"reached row limit: {limit_rows}"
704+
warnings.append(warning)
705+
partial = True
706+
break
707+
708+
# Limit errors
709+
if limit_errors:
710+
if len(errors) >= limit_errors:
711+
errors = errors[:limit_errors]
712+
warning = f"reached error limit: {limit_errors}"
713+
warnings.append(warning)
714+
partial = True
715+
break
716+
717+
# Validate end
718+
if not partial:
719+
for check in checks:
720+
for error in check.validate_end():
721+
if checklist.match(error):
722+
errors.append(error)
723+
724+
# Return report
725+
return Report.from_validation_task(
726+
self, time=timer.time, labels=labels, errors=errors, warnings=warnings
727+
)
624728

625729
# Export
626730

frictionless/resources/table.py

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from frictionless.schema.field import Field
99

10-
from .. import errors, helpers, settings
10+
from .. import errors, helpers
1111
from ..analyzer import Analyzer
1212
from ..dialect import Dialect
1313
from ..exception import FrictionlessException
@@ -17,11 +17,9 @@
1717
from ..system import system
1818
from ..table import Header, Lookup, Row, Table
1919
from ..transformer import Transformer
20-
from ..validator import Validator
2120

2221
if TYPE_CHECKING:
2322
from .. import types
24-
from ..checklist import Checklist
2523
from ..indexer import IOnProgress, IOnRow
2624
from ..pipeline import Pipeline
2725
from ..system import Loader, Parser
@@ -626,27 +624,6 @@ def transform(self, pipeline: Pipeline):
626624
transformer = Transformer()
627625
return transformer.transform_table_resource(self, pipeline)
628626

629-
# Validate
630-
631-
def validate(
632-
self,
633-
checklist: Optional[Checklist] = None,
634-
*,
635-
name: Optional[str] = None,
636-
on_row: Optional[types.ICallbackFunction] = None,
637-
parallel: bool = False,
638-
limit_rows: Optional[int] = None,
639-
limit_errors: int = settings.DEFAULT_LIMIT_ERRORS,
640-
):
641-
validator = Validator()
642-
return validator.validate_resource(
643-
self,
644-
checklist=checklist,
645-
on_row=on_row,
646-
limit_rows=limit_rows,
647-
limit_errors=limit_errors,
648-
)
649-
650627
# Export
651628

652629
def to_view(self, type: str = "look", **options: Any):

0 commit comments

Comments
 (0)