Skip to content

Commit e2f2a33

Browse files
Dispatching tests according to method change
1 parent 0498e9b commit e2f2a33

19 files changed

+625
-624
lines changed

frictionless/validator/__spec__/package/test_general.py renamed to frictionless/package/__spec__/test_validate.py

Lines changed: 316 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import pathlib
3+
from copy import deepcopy
34

45
import pytest
56

@@ -11,6 +12,7 @@
1112
Resource,
1213
Schema,
1314
fields,
15+
platform,
1416
)
1517

1618
# General
@@ -302,7 +304,10 @@ def test_validate_package_using_detector_schema_sync_issue_847():
302304
Resource(
303305
data=[["f1"], ["v1"], ["v2"], ["v3"]],
304306
schema=Schema(
305-
fields=[fields.StringField(name="f1"), fields.StringField(name="f2")],
307+
fields=[
308+
fields.StringField(name="f1"),
309+
fields.StringField(name="f2"),
310+
],
306311
),
307312
),
308313
]
@@ -362,3 +367,313 @@ def test_package_licenses_required_path_or_name_issue_1290():
362367
descriptor = {"resources": [], "licenses": [{"title": "title"}]}
363368
report = Package.validate_descriptor(descriptor)
364369
assert report.errors[0].note.count('license requires "path" or "name"')
370+
371+
372+
def test_package_validate_with_skip_errors():
373+
## Test runs on data with two blank-row errors, one primary-key error, see
374+
# first test case
375+
test_cases = [
376+
{"ignore": [], "expect_errors": ["blank-row", "primary-key", "blank-row"]},
377+
{"ignore": ["primary-key"], "expect_errors": ["blank-row", "blank-row"]},
378+
{"ignore": ["blank-row"], "expect_errors": ["primary-key"]},
379+
{"ignore": ["blank-row", "primary-key"], "expect_errors": []},
380+
]
381+
382+
for tc in test_cases:
383+
with open("data/invalid/datapackage.json") as file:
384+
package = Package(json.load(file), basepath="data/invalid")
385+
checklist = Checklist(skip_errors=tc["ignore"])
386+
387+
report = package.validate(checklist)
388+
389+
assert report.flatten(["type"]) == [[t] for t in tc["expect_errors"]]
390+
391+
392+
# Stats
393+
394+
DESCRIPTOR_SH = {
395+
"resources": [
396+
{
397+
"name": "resource1",
398+
"path": "data/table.csv",
399+
"hash": "sha256:a1fd6c5ff3494f697874deeb07f69f8667e903dd94a7bc062dd57550cea26da8",
400+
"bytes": 30,
401+
}
402+
]
403+
}
404+
405+
406+
@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows")
407+
def test_package_validate_stats():
408+
source = deepcopy(DESCRIPTOR_SH)
409+
package = Package(source)
410+
report = package.validate()
411+
assert report.valid
412+
413+
414+
def test_package_validate_stats_invalid():
415+
source = deepcopy(DESCRIPTOR_SH)
416+
source["resources"][0]["hash"] += "a"
417+
source["resources"][0]["bytes"] += 1
418+
package = Package(source)
419+
report = package.validate()
420+
assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [
421+
[None, None, "hash-count"],
422+
[None, None, "byte-count"],
423+
]
424+
425+
426+
@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows")
427+
def test_package_validate_stats_size():
428+
source = deepcopy(DESCRIPTOR_SH)
429+
source["resources"][0].pop("hash")
430+
package = Package(source)
431+
report = package.validate()
432+
assert report.valid
433+
434+
435+
def test_package_validate_stats_size_invalid():
436+
source = deepcopy(DESCRIPTOR_SH)
437+
source["resources"][0]["bytes"] += 1
438+
source["resources"][0].pop("hash")
439+
package = Package(source)
440+
report = package.validate()
441+
assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [
442+
[None, None, "byte-count"],
443+
]
444+
445+
446+
@pytest.mark.skipif(platform.type == "windows", reason="Fix on Windows")
447+
def test_package_validate_stats_hash():
448+
source = deepcopy(DESCRIPTOR_SH)
449+
source["resources"][0].pop("bytes")
450+
package = Package(source)
451+
report = package.validate()
452+
assert report.valid
453+
454+
455+
def test_package_validate_check_file_package_stats_hash_invalid():
456+
source = deepcopy(DESCRIPTOR_SH)
457+
source["resources"][0].pop("bytes")
458+
source["resources"][0]["hash"] += "a"
459+
package = Package(source)
460+
report = package.validate()
461+
assert report.flatten(["rowNumber", "fieldNumber", "type"]) == [
462+
[None, None, "hash-count"],
463+
]
464+
465+
466+
# Schema
467+
468+
DESCRIPTOR_FK = {
469+
"resources": [
470+
{
471+
"name": "cities",
472+
"data": [
473+
["id", "name", "next_id"],
474+
[1, "london", 2],
475+
[2, "paris", 3],
476+
[3, "rome", 4],
477+
[4, "rio", None],
478+
],
479+
"schema": {
480+
"fields": [
481+
{"name": "id", "type": "integer"},
482+
{"name": "name", "type": "string"},
483+
{"name": "next_id", "type": "integer"},
484+
],
485+
"foreignKeys": [
486+
{
487+
"fields": "next_id",
488+
"reference": {"resource": "", "fields": "id"},
489+
},
490+
{
491+
"fields": "id",
492+
"reference": {"resource": "people", "fields": "label"},
493+
},
494+
],
495+
},
496+
},
497+
{
498+
"name": "people",
499+
"data": [["label", "population"], [1, 8], [2, 2], [3, 3], [4, 6]],
500+
},
501+
],
502+
}
503+
504+
MULTI_FK_RESSOURCE = {
505+
"name": "travel_time",
506+
"data": [["from", "to", "hours"], [1, 2, 1.5], [2, 3, 8], [3, 4, 18]],
507+
"schema": {
508+
"fields": [
509+
{"name": "from", "type": "integer"},
510+
{"name": "to", "type": "integer"},
511+
{"name": "hours", "type": "number"},
512+
],
513+
"foreignKeys": [
514+
{
515+
"fields": ["from", "to"],
516+
"reference": {"resource": "cities", "fields": ["id", "next_id"]},
517+
}
518+
],
519+
},
520+
}
521+
522+
523+
def test_package_validate_schema_foreign_key_error():
524+
descriptor = deepcopy(DESCRIPTOR_FK)
525+
package = Package(descriptor)
526+
report = package.validate()
527+
assert report.valid
528+
529+
530+
def test_package_validate_schema_foreign_key_not_defined():
531+
descriptor = deepcopy(DESCRIPTOR_FK)
532+
del descriptor["resources"][0]["schema"]["foreignKeys"]
533+
package = Package(descriptor)
534+
report = package.validate()
535+
assert report.valid
536+
537+
538+
def test_package_validate_schema_foreign_key_self_referenced_resource_violation():
539+
descriptor = deepcopy(DESCRIPTOR_FK)
540+
del descriptor["resources"][0]["data"][4]
541+
package = Package(descriptor)
542+
report = package.validate()
543+
assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [
544+
[4, None, "foreign-key", ["3", "rome", "4"]],
545+
]
546+
547+
548+
def test_package_validate_schema_foreign_key_internal_resource_violation():
549+
descriptor = deepcopy(DESCRIPTOR_FK)
550+
del descriptor["resources"][1]["data"][4]
551+
package = Package(descriptor)
552+
report = package.validate()
553+
assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [
554+
[5, None, "foreign-key", ["4", "rio", ""]],
555+
]
556+
557+
558+
def test_package_validate_schema_foreign_key_internal_resource_violation_non_existent():
559+
descriptor = deepcopy(DESCRIPTOR_FK)
560+
descriptor["resources"][1]["data"] = [["label", "population"], [10, 10]]
561+
package = Package(descriptor)
562+
report = package.validate()
563+
assert report.flatten(["rowNumber", "fieldNumber", "type", "cells"]) == [
564+
[2, None, "foreign-key", ["1", "london", "2"]],
565+
[3, None, "foreign-key", ["2", "paris", "3"]],
566+
[4, None, "foreign-key", ["3", "rome", "4"]],
567+
[5, None, "foreign-key", ["4", "rio", ""]],
568+
]
569+
570+
571+
def test_package_validate_schema_multiple_foreign_key():
572+
descriptor = deepcopy(DESCRIPTOR_FK)
573+
descriptor["resources"].append(MULTI_FK_RESSOURCE)
574+
package = Package(descriptor)
575+
report = package.validate()
576+
assert report.valid
577+
578+
579+
def test_package_validate_schema_multiple_foreign_key_resource_violation_non_existent():
580+
descriptor = deepcopy(DESCRIPTOR_FK)
581+
# remove London
582+
del descriptor["resources"][0]["data"][1]
583+
descriptor["resources"].append(MULTI_FK_RESSOURCE)
584+
package = Package(descriptor)
585+
report = package.validate()
586+
assert report.flatten(["rowNumber", "fieldNumber", "type", "cells", "note"]) == [
587+
[
588+
2,
589+
None,
590+
"foreign-key",
591+
["1", "2", "1.5"],
592+
'for "from, to": values "1, 2" not found in the lookup table "cities" as "id, next_id"',
593+
],
594+
]
595+
596+
597+
def test_package_validate_schema_multiple_foreign_key_violations():
598+
descriptor = deepcopy(DESCRIPTOR_FK)
599+
# Add some wrong fks
600+
descriptor["resources"][0]["data"][3][0] = 5
601+
descriptor["resources"][0]["data"][4][0] = 6
602+
descriptor["resources"].append(MULTI_FK_RESSOURCE)
603+
package = Package(descriptor)
604+
report = package.validate()
605+
assert report.flatten(
606+
[
607+
"rowNumber",
608+
"fieldNames",
609+
"fieldCells",
610+
"referenceName",
611+
"referenceFieldNames",
612+
]
613+
) == [
614+
[3, ["next_id"], ["3"], "", ["id"]],
615+
[4, ["next_id"], ["4"], "", ["id"]],
616+
[4, ["id"], ["5"], "people", ["label"]],
617+
[5, ["id"], ["6"], "people", ["label"]],
618+
[4, ["from", "to"], ["3", "4"], "cities", ["id", "next_id"]],
619+
]
620+
621+
622+
# Bugs
623+
624+
625+
def test_package_validate_using_detector_schema_sync_issue_847():
626+
package = Package(
627+
resources=[
628+
Resource(
629+
data=[["f1"], ["v1"], ["v2"], ["v3"]],
630+
schema=Schema(
631+
fields=[
632+
fields.AnyField(name="f1"),
633+
fields.AnyField(name="f2"),
634+
]
635+
),
636+
),
637+
]
638+
)
639+
for resource in package.resources:
640+
resource.detector = Detector(schema_sync=True)
641+
report = package.validate()
642+
assert report.valid
643+
644+
645+
# Parallel
646+
647+
# Note: to test parallel validation, do not use foreign keys to prevent an
648+
# automatic fallback on single-core execution
649+
650+
651+
@pytest.mark.ci
652+
def test_package_validate_parallel_from_dict():
653+
with open("data/datapackage.json") as file:
654+
package = Package(json.load(file), basepath="data")
655+
report = package.validate(parallel=True)
656+
assert report.valid
657+
658+
659+
@pytest.mark.ci
660+
def test_package_validate_parallel_from_dict_invalid():
661+
with open("data/invalid/datapackage_no_foreign_key.json") as file:
662+
package = Package(json.load(file), basepath="data/invalid")
663+
report = package.validate(parallel=True)
664+
assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [
665+
[1, 3, None, "blank-row"],
666+
[1, 3, None, "primary-key"],
667+
[2, 4, None, "blank-row"],
668+
]
669+
670+
671+
@pytest.mark.ci
672+
def test_package_validate_with_parallel():
673+
package = Package("data/invalid/datapackage_no_foreign_key.json")
674+
report = package.validate(parallel=True)
675+
assert report.flatten(["taskNumber", "rowNumber", "fieldNumber", "type"]) == [
676+
[1, 3, None, "blank-row"],
677+
[1, 3, None, "primary-key"],
678+
[2, 4, None, "blank-row"],
679+
]

0 commit comments

Comments
 (0)