Skip to content

Commit 75be599

Browse files
amelie-rondotroll
andauthored
Incorrect errors when validating data with a missiing required column (#1620)
* TDD: add test case with missing required header and schema_sync=True detector option in validation of a table resource: test fails * TDD: fix test case by removing fields info related to missing labels in creation of row_stream: test passes * Add test case for validation TableResource with schema_sync=True detector option with two missing required header: test passes * Refacto: factorize test cases and rename test function * Refacto: update field_info directly in row_stream local function just before processing Row creation * Sort imports * Linting: lint and format files * Refacto: Move added block dealing with missing required labels in __open_run_stream() TableResource method * Add test cases --------- Co-authored-by: roll <[email protected]>
1 parent 6100dc9 commit 75be599

File tree

2 files changed

+79
-2
lines changed

2 files changed

+79
-2
lines changed

frictionless/resources/table.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,6 @@ def row_stream():
310310
for row_number, cells in enumerated_content_stream:
311311
self.stats.rows += 1
312312

313-
# Create row
314313
row = Row(
315314
cells,
316315
field_info=field_info,
@@ -387,7 +386,16 @@ def row_stream():
387386
# Yield row
388387
yield row
389388

390-
# Crreate row stream
389+
# NB: missing required labels are not included in the
390+
# field_info parameter used for row creation
391+
if self.detector.schema_sync:
392+
for field in self.schema.fields:
393+
if field.name not in self.labels and field.name in field_info["names"]:
394+
field_index = field_info["names"].index(field.name)
395+
del field_info["names"][field_index]
396+
del field_info["objects"][field_index]
397+
del field_info["mapping"][field.name]
398+
# # Create row stream
391399
self.__row_stream = row_stream()
392400

393401
# Read

tests/validator/resource/test_schema.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
from copy import deepcopy
2+
13
import pytest
24

5+
import frictionless
36
from frictionless import Checklist, Detector, FrictionlessException, Schema, fields
47
from frictionless.resources import TableResource
58

@@ -304,3 +307,69 @@ def test_resource_validate_less_actual_fields_with_required_constraint_issue_950
304307
[3, 3, "constraint-error"],
305308
[3, 3, "missing-cell"],
306309
]
310+
311+
312+
def test_resource_with_missing_required_header_with_schema_sync_is_true_issue_1611():
313+
schema_descriptor_1 = {
314+
"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
315+
"fields": [
316+
{
317+
"name": "A",
318+
"title": "Field A",
319+
"type": "string",
320+
"constraints": {"required": True},
321+
},
322+
{"name": "B", "title": "Field B", "type": "string"},
323+
{"name": "C", "title": "Field C", "type": "string"},
324+
],
325+
}
326+
327+
schema_descriptor_2 = deepcopy(schema_descriptor_1)
328+
# Add required constraint on "C" field
329+
schema_descriptor_2["fields"][2]["constraints"] = {"required": True}
330+
331+
test_cases = [
332+
{
333+
"schema": schema_descriptor_1,
334+
"source": [["B", "C"], ["b", "c"]],
335+
"expected_flattened_report": [
336+
[None, 3, "A", "missing-label"],
337+
],
338+
},
339+
{
340+
"schema": schema_descriptor_2,
341+
"source": [["B"], ["b"]],
342+
"expected_flattened_report": [
343+
[None, 2, "A", "missing-label"],
344+
[None, 3, "C", "missing-label"],
345+
],
346+
},
347+
{
348+
"schema": schema_descriptor_2,
349+
"source": [
350+
["A", "B"],
351+
["a", "b"],
352+
["a1"],
353+
["a2", "b2"],
354+
[],
355+
["a3", "b3", "c3"],
356+
],
357+
"expected_flattened_report": [
358+
[None, 3, "C", "missing-label"],
359+
[3, 2, "B", "missing-cell"],
360+
[5, None, None, "blank-row"],
361+
[6, 3, "", "extra-cell"],
362+
],
363+
},
364+
]
365+
for tc in test_cases:
366+
schema = Schema.from_descriptor(tc["schema"])
367+
resource = TableResource(
368+
tc["source"], schema=schema, detector=Detector(schema_sync=True)
369+
)
370+
report = frictionless.validate(resource)
371+
print(report.flatten(["rowNumber", "fieldNumber", "fieldName", "type"]))
372+
assert (
373+
report.flatten(["rowNumber", "fieldNumber", "fieldName", "type"])
374+
== tc["expected_flattened_report"]
375+
)

0 commit comments

Comments
 (0)