Skip to content

Commit 0ba2809

Browse files
authored
Validate duplicated header (dfurtado#42)
* Duplicate header validation * Added header validation tests * Updated README
1 parent 91e3a96 commit 0ba2809

File tree

4 files changed

+65
-7
lines changed

4 files changed

+65
-7
lines changed

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ dataclass_csv.DataclassReader(
8585
)
8686
```
8787

88+
All keyword arguments support by `DictReader` are supported by the `DataclassReader`, with the addition of:
89+
90+
`validate_header` - The `DataclassReader` will raise a `ValueError` if the CSV file cointain columns with the same name. This
91+
validation is performed to avoid data being overwritten. To skip this validation set `validate_header=False` when creating a
92+
instance of the `DataclassReader`, see an example below:
93+
94+
```python
95+
reader = DataclassReader(f, User, validate_header=False)
96+
```
97+
8898
If you run this code you should see an output like this:
8999

90100
```python

dataclass_csv/dataclass_reader.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,26 @@
88
from .field_mapper import FieldMapper
99
from .exceptions import CsvValueError
1010

11+
from collections import Counter
12+
13+
14+
def _verify_duplicate_header_items(header):
15+
if header is not None and len(header) == 0:
16+
return
17+
18+
header_counter = Counter(header)
19+
duplicated = [k for k, v in header_counter.items() if v > 1]
20+
21+
if len(duplicated) > 0:
22+
raise ValueError(
23+
(
24+
"It seems like the CSV file contain duplicated header "
25+
f"values: {duplicated}. This may cause inconsistent data. "
26+
"Use the kwarg validate_header=False when initializing the "
27+
"DataclassReader to skip the header validation."
28+
)
29+
)
30+
1131

1232
class DataclassReader:
1333
def __init__(
@@ -32,10 +52,15 @@ def __init__(
3252
self._optional_fields = self._get_optional_fields()
3353
self._field_mapping: Dict[str, Dict[str, Any]] = {}
3454

55+
validate_header = kwds.pop("validate_header", True)
56+
3557
self._reader = csv.DictReader(
3658
f, fieldnames, restkey, restval, dialect, *args, **kwds
3759
)
3860

61+
if validate_header:
62+
_verify_duplicate_header_items(self._reader.fieldnames)
63+
3964
def _get_optional_fields(self):
4065
return [
4166
field.name

tests/conftest.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55

66
@pytest.fixture()
77
def create_csv(tmpdir_factory):
8-
def func(data, filename='user.csv', factory=tmpdir_factory):
8+
def func(data, fieldnames=None, filename="user.csv", factory=tmpdir_factory):
99

1010
assert data
1111

12-
file = tmpdir_factory.mktemp('data').join(filename)
12+
file = tmpdir_factory.mktemp("data").join(filename)
1313

1414
row = data[0] if isinstance(data, list) else data
1515

16-
with file.open('w') as f:
17-
writer = DictWriter(f, fieldnames=row.keys())
16+
header = fieldnames if fieldnames is not None else row.keys()
17+
18+
with file.open("w") as f:
19+
writer = DictWriter(f, fieldnames=header)
1820
writer.writeheader()
19-
addrow = (
20-
writer.writerows if isinstance(data, list) else writer.writerow
21-
)
21+
addrow = writer.writerows if isinstance(data, list) else writer.writerow
2222
addrow(data)
2323

2424
return file

tests/test_dataclass_reader.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,26 @@ def test_raise_error_when_field_not_found(create_csv):
225225
):
226226
reader = DataclassReader(f, UserWithEmail)
227227
list(reader)
228+
229+
230+
def test_raise_error_when_duplicate_header_items(create_csv):
231+
csv_file = create_csv(
232+
{"name": "User1", "email": "[email protected]"},
233+
fieldnames=["name", "email", "name"],
234+
)
235+
236+
with csv_file.open() as f:
237+
with pytest.raises(ValueError):
238+
reader = DataclassReader(f, UserWithEmail)
239+
list(reader)
240+
241+
242+
def test_skip_header_validation(create_csv):
243+
csv_file = create_csv(
244+
{"name": "User1", "email": "[email protected]"},
245+
fieldnames=["name", "email", "name"],
246+
)
247+
248+
with csv_file.open() as f:
249+
reader = DataclassReader(f, UserWithEmail, validate_header=False)
250+
list(reader)

0 commit comments

Comments
 (0)