Skip to content

Commit de38ae2

Browse files
First attempt : separate descriptor from io logic
1 parent e2a0177 commit de38ae2

23 files changed

+722
-614
lines changed

frictionless/detector/detector.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from .. import helpers, settings
1111
from ..dialect import Dialect
1212
from ..exception import FrictionlessException
13-
from ..fields import AnyField
1413
from ..metadata import Metadata
1514
from ..platform import platform
1615
from ..schema import Field, Schema
@@ -400,7 +399,7 @@ def detect_schema(
400399
# For not inferred fields we use the "any" type field as a default
401400
for index, name in enumerate(names):
402401
if fields[index] is None:
403-
fields[index] = AnyField(name=name, schema=schema) # type: ignore
402+
fields[index] = Field(name=name, schema=schema) # type: ignore
404403
schema.fields = fields # type: ignore
405404

406405
# Sync schema

frictionless/fields/__init__.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
from .any import AnyField as AnyField
2-
from .array import ArrayField as ArrayField
3-
from .boolean import BooleanField as BooleanField
4-
from .date import DateField as DateField
5-
from .datetime import DatetimeField as DatetimeField
6-
from .duration import DurationField as DurationField
7-
from .geojson import GeojsonField as GeojsonField
8-
from .geopoint import GeopointField as GeopointField
9-
from .integer import IntegerField as IntegerField
10-
from .number import NumberField as NumberField
11-
from .object import ObjectField as ObjectField
12-
from .string import StringField as StringField
13-
from .time import TimeField as TimeField
14-
from .year import YearField as YearField
15-
from .yearmonth import YearmonthField as YearmonthField
1+
from .any import AnyReadWriter as AnyReadWriter
2+
from .array import ArrayReadWriter as ArrayReadWriter
3+
from .boolean import BooleanReadWriter as BooleanReadWriter
4+
from .date import DateReadWriter as DateReadWriter
5+
from .datetime import DatetimeReadWriter as DatetimeReadWriter
6+
from .duration import DurationReadWriter as DurationReadWriter
7+
from .geojson import GeoJSONReadWriter as GeoJSONReadWriter
8+
from .geopoint import GeoPointReadWriter as GeoPointReadWriter
9+
from .integer import IntegerReadWriter as IntegerReadWriter
10+
from .list import ListReadWriter as ListReadWriter
11+
from .number import NumberReadWriter as NumberReadWriter
12+
from .object import ObjectReadWriter as ObjectReadWriter
13+
from .string import StringReadWriter as StringReadWriter
14+
from .time import TimeReadWriter as TimeReadWriter
15+
from .year import YearReadWriter as YearReadWriter
16+
from .yearmonth import YearmonthReadWriter as YearmonthReadWriter

frictionless/fields/any.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
from __future__ import annotations
22

3-
import attrs
3+
from dataclasses import dataclass
44

5-
from ..schema import Field
5+
from .helpers import default_create_value_reader, default_create_value_writer
66

77

8-
@attrs.define(kw_only=True, repr=False)
9-
class AnyField(Field):
10-
type = "any"
11-
builtin = True
12-
supported_constraints = [
13-
"required",
14-
"enum",
15-
]
8+
@dataclass
9+
class AnyReadWriter:
10+
def create_value_reader(self):
11+
return default_create_value_reader()
12+
13+
def create_value_writer(self):
14+
return default_create_value_writer()

frictionless/fields/array.py

Lines changed: 65 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,65 @@
1-
from __future__ import annotations
2-
3-
import json
4-
from typing import Any, Dict, Optional
5-
6-
import attrs
7-
8-
from ..schema import Field
9-
10-
11-
@attrs.define(kw_only=True, repr=False)
12-
class ArrayField(Field):
13-
type = "array"
14-
builtin = True
15-
supported_constraints = [
16-
"required",
17-
"minLength",
18-
"maxLength",
19-
"enum",
20-
]
21-
22-
array_item: Optional[Dict[str, Any]] = attrs.field(factory=dict)
23-
"""
24-
A dictionary that specifies the type and other constraints for the
25-
data that will be read in this data type field.
26-
"""
27-
28-
# Read
29-
30-
def create_cell_reader(self):
31-
default_reader = super().create_cell_reader()
32-
33-
# Create field
34-
field_reader = None
35-
if self.array_item:
36-
descriptor = self.array_item.copy()
37-
descriptor.pop("arrayItem", None)
38-
descriptor.setdefault("name", self.name)
39-
descriptor.setdefault("type", "any")
40-
field = Field.from_descriptor(descriptor)
41-
field_reader = field.create_cell_reader()
42-
43-
# Create reader
44-
def cell_reader(cell: Any):
45-
cell, notes = default_reader(cell)
46-
if cell is not None and not notes and field_reader:
47-
for index, item in enumerate(cell):
48-
item_cell, item_notes = field_reader(item)
49-
if item_notes:
50-
notes = notes or {}
51-
for name, note in item_notes.items():
52-
notes[name] = f"array item {note}"
53-
cell[index] = item_cell
54-
return cell, notes
55-
56-
return cell_reader
57-
58-
def create_value_reader(self):
59-
# Create reader
60-
def value_reader(cell: Any): # type: ignore
61-
if not isinstance(cell, list):
62-
if isinstance(cell, str):
63-
try:
64-
cell = json.loads(cell)
65-
except Exception:
66-
return None
67-
if not isinstance(cell, list):
68-
return None
69-
elif isinstance(cell, tuple):
70-
cell = list(cell) # type: ignore
71-
else:
72-
return None
73-
return cell # type: ignore
74-
75-
return value_reader
76-
77-
# Write
78-
79-
def create_value_writer(self):
80-
# Create writer
81-
def value_writer(cell: Any):
82-
return json.dumps(cell)
83-
84-
return value_writer
85-
86-
# Metadata
87-
88-
metadata_profile_patch = {
89-
"properties": {
90-
"arrayItem": {"type": "object"},
91-
}
92-
}
1+
# from __future__ import annotations
2+
3+
# import json
4+
# from dataclasses import dataclass
5+
# from typing import Any
6+
7+
# from ..schema.field_descriptor import ArrayFieldDescriptor, FieldDescriptor
8+
from . import AnyReadWriter
9+
10+
ArrayReadWriter = AnyReadWriter
11+
# @dataclass
12+
# class ArrayReadWriter:
13+
# _descriptor: ArrayFieldDescriptor
14+
15+
# def create_cell_reader(self):
16+
# # Create field
17+
# field_reader = None
18+
19+
# if self._descriptor.arrayItem:
20+
# item_descriptor = self._descriptor.arrayItem
21+
# field = Fielditem_descriptor.from_descriptor(item_descriptor)
22+
# field_reader = field.create_cell_reader()
23+
24+
# # Create reader
25+
# def cell_reader(cell: Any):
26+
# cell, notes = default_reader(cell)
27+
# if cell is not None and not notes and field_reader:
28+
# for index, item in enumerate(cell):
29+
# item_cell, item_notes = field_reader(item)
30+
# if item_notes:
31+
# notes = notes or {}
32+
# for name, note in item_notes.items():
33+
# notes[name] = f"array item {note}"
34+
# cell[index] = item_cell
35+
# return cell, notes
36+
37+
# return cell_reader
38+
39+
# def create_value_reader(self):
40+
# # Create reader
41+
# def value_reader(cell: Any): # type: ignore
42+
# if not isinstance(cell, list):
43+
# if isinstance(cell, str):
44+
# try:
45+
# cell = json.loads(cell)
46+
# except Exception:
47+
# return None
48+
# if not isinstance(cell, list):
49+
# return None
50+
# elif isinstance(cell, tuple):
51+
# cell = list(cell) # type: ignore
52+
# else:
53+
# return None
54+
# return cell # type: ignore
55+
56+
# return value_reader
57+
58+
# # Write
59+
60+
# def create_value_writer(self):
61+
# # Create writer
62+
# def value_writer(cell: Any):
63+
# return json.dumps(cell)
64+
65+
# return value_writer

frictionless/fields/boolean.py

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,29 @@
11
from __future__ import annotations
22

3+
from dataclasses import dataclass
34
from typing import Any, Dict, List
45

5-
import attrs
6-
76
from .. import settings
8-
from ..schema import Field
9-
7+
from .field_descriptor import BooleanFieldDescriptor
108

11-
@attrs.define(kw_only=True, repr=False)
12-
class BooleanField(Field):
13-
type = "boolean"
14-
builtin = True
15-
supported_constraints = [
16-
"required",
17-
"enum",
18-
]
199

20-
true_values: List[str] = attrs.field(factory=settings.DEFAULT_TRUE_VALUES.copy)
21-
"""
22-
It defines the values to be read as true values while reading data. The default
23-
true values are ["true", "True", "TRUE", "1"].
24-
"""
10+
@dataclass
11+
class BooleanReadWriter:
12+
_descriptor: BooleanFieldDescriptor
2513

26-
false_values: List[str] = attrs.field(factory=settings.DEFAULT_FALSE_VALUES.copy)
27-
"""
28-
It defines the values to be read as false values while reading data. The default
29-
true values are ["false", "False", "FALSE", "0"].
30-
"""
14+
def true_values(self) -> List[str]:
15+
return self._descriptor.trueValues or settings.DEFAULT_TRUE_VALUES
3116

32-
# Read
17+
def false_values(self) -> List[str]:
18+
return self._descriptor.falseValues or settings.DEFAULT_FALSE_VALUES
3319

3420
def create_value_reader(self):
3521
# Create mapping
3622
mapping: Dict[str, bool] = {}
37-
for value in self.true_values:
23+
24+
for value in self.true_values():
3825
mapping[value] = True
39-
for value in self.false_values:
26+
for value in self.false_values():
4027
mapping[value] = False
4128

4229
# Create reader
@@ -48,20 +35,8 @@ def value_reader(cell: Any):
4835

4936
return value_reader
5037

51-
# Write
52-
5338
def create_value_writer(self):
54-
# Create writer
5539
def value_writer(cell: Any):
56-
return self.true_values[0] if cell else self.false_values[0]
40+
return self.true_values()[0] if cell else self.false_values()[0]
5741

5842
return value_writer
59-
60-
# Metadata
61-
62-
metadata_profile_patch = {
63-
"properties": {
64-
"trueValues": {"type": "array", "items": {"type": "string"}},
65-
"falseValues": {"type": "array", "items": {"type": "string"}},
66-
}
67-
}

frictionless/fields/date.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,20 @@
11
from __future__ import annotations
22

3+
from dataclasses import dataclass
34
from datetime import date, datetime
45
from typing import Any
56

6-
import attrs
7-
87
from .. import settings
98
from ..platform import platform
10-
from ..schema import Field
9+
from .field_descriptor import DateFieldDescriptor
1110

1211

13-
@attrs.define(kw_only=True, repr=False)
14-
class DateField(Field):
15-
type = "date"
16-
builtin = True
17-
supported_constraints = [
18-
"required",
19-
"minimum",
20-
"maximum",
21-
"enum",
22-
]
12+
@dataclass
13+
class DateReadWriter:
14+
_descriptor: DateFieldDescriptor
2315

24-
# Read
16+
def format(self):
17+
return self._descriptor.format or settings.DEFAULT_FIELD_FORMAT
2518

2619
# TODO: use different value_readers based on format (see string)
2720
def create_value_reader(self):
@@ -42,12 +35,12 @@ def value_reader(cell: Any):
4235
if not isinstance(cell, str):
4336
return None
4437
try:
45-
if self.format == "default":
38+
if self.format() == "default":
4639
cell = datetime.strptime(cell, settings.DEFAULT_DATE_PATTERN).date()
47-
elif self.format == "any":
40+
elif self.format() == "any":
4841
cell = platform.dateutil_parser.parse(cell).date()
4942
else:
50-
cell = datetime.strptime(cell, self.format).date()
43+
cell = datetime.strptime(cell, self.format()).date()
5144
except Exception:
5245
return None
5346
return cell
@@ -58,7 +51,7 @@ def value_reader(cell: Any):
5851

5952
def create_value_writer(self):
6053
# Create format
61-
format = self.format
54+
format = self.format()
6255
if format == settings.DEFAULT_FIELD_FORMAT:
6356
format = settings.DEFAULT_DATE_PATTERN
6457

0 commit comments

Comments
 (0)