Skip to content

Commit ad374b2

Browse files
committed
v0.0.67 introduce update_schema processor
1 parent c525ef7 commit ad374b2

File tree

6 files changed

+62
-1
lines changed

6 files changed

+62
-1
lines changed

PROCESSORS.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,24 @@ You can use `update_resource` to rename a resource like so:
619619
update_resource('current-name', name='new-name')
620620
```
621621

622+
#### update_schema.py
623+
Update schema properties for one or more resources in the package
624+
625+
```python
626+
def update_schema(resources, **metadata):
627+
pass
628+
```
629+
630+
- `resources`
631+
- A name of a resource to operate on
632+
- A regular expression matching resource names
633+
- A list of resource names
634+
- `None` indicates operation should be done on all resources
635+
- The index of the resource in the package
636+
- `metadata` - Any allowed schema property (according to the [spec]([https://frictionlessdata.io/specs/table-schema/#descriptor)) can be provided here.
637+
638+
You can use `update_schema` to add a `missingValues` property, change the primary key etc.
639+
622640
#### set_primary_key.py
623641
Updates the primary key for one or more resources in the package
624642

dataflows/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.66
1+
0.0.67

dataflows/base/schema_validator.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,8 @@ def schema_validator(resource, iterator,
5252
continue
5353

5454
yield row
55+
56+
57+
schema_validator.drop = drop
58+
schema_validator.ignore = ignore
59+
schema_validator.raise_exception = raise_exception

dataflows/processors/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@
2222
from .unstream import unstream
2323
from .update_package import update_package, add_metadata
2424
from .update_resource import update_resource
25+
from .update_schema import update_schema
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from dataflows import PackageWrapper
2+
from dataflows.helpers.resource_matcher import ResourceMatcher
3+
4+
5+
def update_schema(resources, **props):
6+
7+
def func(package: PackageWrapper):
8+
matcher = ResourceMatcher(resources, package.pkg)
9+
for resource in package.pkg.descriptor['resources']:
10+
if matcher.match(resource['name']):
11+
resource.setdefault('schema', {}).update(props)
12+
yield package.pkg
13+
14+
res_iter = iter(package)
15+
for r in res_iter:
16+
if matcher.match(r.res.name):
17+
yield r.it
18+
else:
19+
yield r
20+
21+
return func

tests/test_lib.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,22 @@ def test_update_resource():
763763
assert dp.descriptor['resources'][4]['source'] == 'thewild'
764764

765765

766+
def test_update_schema():
767+
from dataflows import Flow, printer, update_schema, validate
768+
769+
f = Flow(
770+
[['a', '-'], ['a', 0]],
771+
update_schema(-1, missingValues=['-']),
772+
validate(),
773+
printer()
774+
)
775+
results, dp, stats = f.results()
776+
print(dp.descriptor)
777+
assert results[0] == [
778+
dict(col0='a', col1=None),
779+
dict(col0='a', col1=0),
780+
]
781+
766782
def test_set_type_resources():
767783
from dataflows import Flow, set_type, validate
768784

0 commit comments

Comments
 (0)