Skip to content

Commit 566c333

Browse files
doc: metadata documentation
1 parent c48cc79 commit 566c333

File tree

1 file changed

+182
-9
lines changed

1 file changed

+182
-9
lines changed

frictionless/metadata/metadata.py

Lines changed: 182 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,29 @@
3737
class Metadata:
3838
"""Metadata representation
3939
40+
This class provides functionality for serialization / deserialization of
41+
python chlid classes to descriptors.
42+
43+
A **descriptor** is a JSON serializable `dict`.
44+
A **profile** is a JSON Schema dict that sets expectations on the format
45+
of the descriptor.
46+
4047
For proper functioning a child class must be decorated by
4148
"@attrs.define(kw_only=True, repr=False)" and ensure that
42-
"Metadata.__attrs_post_init__" is called
49+
"Metadata.__attrs_post_init__" is called :
50+
51+
- `kw_only=True` is required because this class will need explicit
52+
keywords to be able to track which properties have been set at
53+
initialization (see implementation of `__new__`, which uses the keyword
54+
arguments `kwargs`)
55+
- `repr=False` is to avoid `attrs` to overwrite the inherited `__repr__`
56+
function defined in this class.
4357
4458
"""
4559

4660
custom: dict[str, Any] = {}
4761
"""
48-
List of custom parameters. Any extra properties will be added
62+
List of custom parameters. Any extra property will be added
4963
to the custom property.
5064
"""
5165

@@ -61,6 +75,14 @@ def __attrs_post_init__(self):
6175
self.metadata_initiated = True
6276

6377
def __setattr__(self, name: str, value: Any):
78+
"""Side effects when setting a property
79+
80+
Properties starting with `_` or `metadata_` have no side effects.
81+
82+
For all other properties, the "metatdata_assigned" and
83+
"metatadata_defaults" are update, depending of if the value has been
84+
set explicitely or implicitely as the default respectively.
85+
"""
6486
if not name.startswith(("_", "metadata_")):
6587
if self.metadata_initiated:
6688
if value is not None:
@@ -74,6 +96,7 @@ def __setattr__(self, name: str, value: Any):
7496
super().__setattr__(name, value)
7597

7698
def __repr__(self) -> str:
99+
"""Prints the descriptor of the object"""
77100
return pprint.pformat(self.to_descriptor(), sort_dicts=False)
78101

79102
@property
@@ -105,7 +128,15 @@ def handle_data(self, data: str):
105128
# Defined
106129

107130
def list_defined(self) -> List[str]:
131+
"""Returns a list of all properties that have been defined.
132+
133+
TODO : the difference with metadata_assigned is that it lists values
134+
that are set in the class that are different from `metadata_defaults`.
135+
How is that possible, I thought metadata_defaults can only be set to
136+
the defaults ?
137+
"""
108138
defined = list(self.metadata_assigned)
139+
109140
for name, default in self.metadata_defaults.items():
110141
value = getattr(self, name, None)
111142
if isinstance(value, type):
@@ -118,15 +149,27 @@ def add_defined(self, name: str) -> None:
118149
self.metadata_assigned.add(name)
119150

120151
def has_defined(self, name: str) -> bool:
152+
"""Whether a property has been defined explicitely"""
121153
return name in self.list_defined()
122154

123155
def get_defined(self, name: str, *, default: Any = None) -> Any:
156+
"""Retrieve the value of a property if it has been explicitely
157+
assigned, or return a default value otherwise"""
124158
if self.has_defined(name):
125159
return getattr(self, name)
126-
if default is not None:
127-
return default
160+
161+
return default
128162

129163
def set_not_defined(self, name: str, value: Any, *, distinct: bool = False) -> None:
164+
"""If no property with "name" has already been assigned, then assign
165+
"value" to this property, but without the side effects of setting an
166+
attribute (see
167+
`__setattr__`, in particular, "has_defined(name)" will still return
168+
False after definition).
169+
170+
Setting `distinct=True` will prevent from overwriting an already set
171+
(including default values or values set with this method already)
172+
"""
130173
if not self.has_defined(name) and value is not None:
131174
if distinct and getattr(self, name, None) == value:
132175
return
@@ -141,6 +184,11 @@ def validate_descriptor(
141184
*,
142185
basepath: Optional[str] = None,
143186
) -> Report:
187+
"""Validate a descriptor
188+
189+
To do so, it tries to convert a descriptor into a class instance, and
190+
report errors it has encountered (if any)
191+
"""
144192
errors = []
145193
timer = helpers.Timer()
146194
try:
@@ -165,36 +213,63 @@ def from_descriptor(
165213
allow_invalid: bool = False,
166214
**options: Any,
167215
) -> Self:
216+
"""Constructs an instance from a descriptor.
217+
218+
This method will identify the most specialized Class and instantiate
219+
it given information provided in the descriptor.
220+
221+
"descriptor" can be provided as a path to a descriptor file. The path
222+
can be relative to a base path provided as an option with the name
223+
"basepath".
224+
225+
If `allow_invalid = True`, the class creation will try to continue
226+
despite the descriptor having errors.
227+
"""
168228
descriptor_path = None
229+
169230
if isinstance(descriptor, str):
170231
descriptor_path = descriptor
171232
basepath = options.pop("basepath", None)
172233
descriptor = helpers.join_basepath(descriptor, basepath)
173234
if "basepath" in inspect.signature(cls.__init__).parameters:
174235
options["basepath"] = helpers.parse_basepath(descriptor)
236+
175237
descriptor = cls.metadata_retrieve(descriptor)
176-
# TODO: remove in next version
238+
239+
# TODO: remove in v6
177240
# Transform with a base class in case the type is not available
178241
cls.metadata_transform(descriptor)
179-
type = descriptor.get("type")
242+
243+
expected_type = descriptor.get("type")
244+
245+
# python class "type" property, if present, has precedence over descriptor type
180246
class_type = vars(cls).get("type")
181247
if isinstance(class_type, str):
182-
type = class_type
183-
Class = cls.metadata_select_class(type)
248+
expected_type = class_type
249+
250+
# Get the most specialized class associated with the expected_type
251+
# (defaults to the current class if `expected_type` is `None`)
252+
Class = cls.metadata_select_class(expected_type)
184253
Error = Class.metadata_Error or platform.frictionless_errors.MetadataError
254+
185255
Class.metadata_transform(descriptor)
186256
errors = list(Class.metadata_validate(descriptor))
257+
187258
if not allow_invalid:
188259
if errors:
189260
error = Error(note="descriptor is not valid")
190261
raise FrictionlessException(error, reasons=errors)
262+
191263
metadata = Class.metadata_import(descriptor, **helpers.remove_non_values(options))
192264
if descriptor_path:
193265
metadata.metadata_descriptor_path = descriptor_path
194266
metadata.metadata_descriptor_initial = metadata.to_descriptor()
195267
return metadata # type: ignore
196268

197269
def to_descriptor(self, *, validate: bool = False) -> types.IDescriptor:
270+
"""Return a descriptor associated to the class instance.
271+
If `validate = True`, the descriptor will additionnaly be validated.
272+
"""
198273
descriptor = self.metadata_export()
199274
if validate:
200275
Error = self.metadata_Error or platform.frictionless_errors.MetadataError
@@ -277,16 +352,56 @@ def to_markdown(self, path: Optional[str] = None, table: bool = False) -> str:
277352
metadata_type: ClassVar[str]
278353
metadata_Error: ClassVar[Optional[Type[Error]]] = None
279354
metadata_profile: ClassVar[Dict[str, Any]] = {}
355+
"""A JSON Schema like dictionary that defines the expected format of the descriptor"""
356+
280357
metadata_profile_patch: ClassVar[Dict[str, Any]] = {}
358+
"""Change to the expected format of the descriptor
359+
360+
This will usually be used by child classes to amend and build upon the
361+
descriptor of their parent.
362+
"""
363+
281364
metadata_profile_merged: ClassVar[Dict[str, Any]] = {}
365+
"""Provides a consolidated definition of the descriptor, taking into
366+
account a `metadata_profile` and all `metadata_profile_patch`es that
367+
apply.
368+
"""
369+
282370
metadata_initiated: bool = False
371+
283372
metadata_assigned: Set[str] = set()
373+
"""Set of all names of properties to which a value (different from None)
374+
has been _explicitely_ assigned (including with explicit arguments at
375+
object initialization)"""
376+
284377
metadata_defaults: Dict[str, Any] = {}
378+
"""Names and values of properties that have not been
379+
explicitely set, and that have been set to a default value instead"""
380+
285381
metadata_descriptor_path: Optional[str] = None
382+
"""Descriptor file path
383+
If applicable, i.e. if a class has been instantiated with
384+
a descriptor read from a file
385+
"""
386+
286387
metadata_descriptor_initial: Optional[types.IDescriptor] = None
388+
"""Descriptor used for class instantiation
389+
If applicable, i.e. if a class has been instantiated with
390+
a descriptor
391+
"""
287392

288393
@classmethod
289394
def metadata_select_class(cls, type: Optional[str]) -> Type[Metadata]:
395+
"""Allows to specify a more specialized class for the "type" given as
396+
input
397+
398+
When a class can be dispatched into several different more
399+
specialized classes, this function makes the link between the type and
400+
the class.
401+
402+
Otherwise, "type" is expected to be None, and the current class is
403+
returned.
404+
"""
290405
if type:
291406
note = f'unsupported type for "{cls.metadata_type}": {type}'
292407
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
@@ -299,6 +414,11 @@ def metadata_select_property_class(cls, name: str) -> Optional[Type[Metadata]]:
299414

300415
@classmethod
301416
def metadata_ensure_profile(cls):
417+
"""Consolidates `metadata_profile` and `metadata_profile_patch`es
418+
419+
All patches are applied, in order from parent to child, in case of
420+
multiple successive inheritance.
421+
"""
302422
if not cls.__dict__.get("metadata_profile_merged", None):
303423
cls.metadata_profile_merged = cls.metadata_profile
304424
for subcls in reversed(cls.mro()):
@@ -310,14 +430,32 @@ def metadata_ensure_profile(cls):
310430

311431
@classmethod
312432
def metadata_retrieve(
313-
cls, descriptor: Union[types.IDescriptor, str], *, size: Optional[int] = None
433+
cls,
434+
descriptor: Union[types.IDescriptor, str, Path],
435+
*,
436+
size: Optional[int] = None,
314437
) -> types.IDescriptor:
438+
"""Copy or fetch the "descriptor" as a dictionnary.
439+
440+
If "descriptor" is a string or Path, then it is interpreted as a
441+
(possibly remote) path to a descriptor file.
442+
443+
The content of the file is expected to be in JSON format, except if
444+
the filename has an explicit `.yaml` extension.
445+
446+
"""
315447
try:
316448
if isinstance(descriptor, Mapping):
317449
return deepcopy(descriptor)
450+
451+
# Types are tested explicitely,
452+
# for providing feedback to users that do not comply with
453+
# the function signature and provide a wrong type
318454
if isinstance(descriptor, (str, Path)): # type: ignore
455+
# descriptor is read from (possibly remote) file
319456
if isinstance(descriptor, Path):
320457
descriptor = str(descriptor)
458+
321459
if helpers.is_remote_path(descriptor):
322460
session = platform.frictionless.system.http_session
323461
response = session.get(descriptor, stream=True)
@@ -328,20 +466,36 @@ def metadata_retrieve(
328466
else:
329467
with open(descriptor, encoding="utf-8") as file:
330468
content = file.read(size)
469+
331470
if descriptor.endswith(".yaml"):
332471
metadata = platform.yaml.safe_load(io.StringIO(content))
333472
else:
334473
metadata = json.loads(content)
474+
335475
assert isinstance(metadata, dict)
336476
return metadata # type: ignore
477+
337478
raise TypeError("descriptor type is not supported")
479+
338480
except Exception as exception:
339481
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
340482
note = f'cannot retrieve metadata "{descriptor}" because "{exception}"'
341483
raise FrictionlessException(Error(note=note)) from exception
342484

343485
@classmethod
344486
def metadata_transform(cls, descriptor: types.IDescriptor):
487+
"""Transform the descriptor inplace before serializing into a python class
488+
instance.
489+
490+
The transformation applies recursively to any property handled with
491+
`metadata_select_property_class(name)`.
492+
493+
The actual transformation steps are defined by child classes, which must call
494+
`super().metadata_transform` to ensure recursive transformation.
495+
496+
This can be used for instance for retrocompatibility, converting
497+
former descriptors into new ones.
498+
"""
345499
profile = cls.metadata_ensure_profile()
346500
for name in profile.get("properties", {}):
347501
value = descriptor.get(name)
@@ -364,12 +518,22 @@ def metadata_validate(
364518
profile: Optional[Union[types.IDescriptor, str]] = None,
365519
error_class: Optional[Type[Error]] = None,
366520
) -> Generator[Error, None, None]:
521+
"""Validates a descriptor according to a profile
522+
523+
A **profile** is a JSON Schema dict that sets expectations on the format
524+
of the descriptor.
525+
526+
The profile to validate can be set explicitely ("profile" parameter),
527+
otherwise it defaults to the class profile.
528+
"""
367529
Error = error_class
368530
if not Error:
369531
Error = cls.metadata_Error or platform.frictionless_errors.MetadataError
532+
370533
profile = profile or cls.metadata_ensure_profile()
371534
if isinstance(profile, str):
372535
profile = cls.metadata_retrieve(profile)
536+
373537
validator_class = platform.jsonschema.validators.validator_for(profile) # type: ignore
374538
validator = validator_class(profile) # type: ignore
375539
for error in validator.iter_errors(descriptor): # type: ignore
@@ -379,6 +543,7 @@ def metadata_validate(
379543
if metadata_path:
380544
note = f"{note} at property '{metadata_path}'"
381545
yield Error(note=note)
546+
382547
for name in profile.get("properties", {}):
383548
value = descriptor.get(name)
384549
Class = cls.metadata_select_property_class(name)
@@ -400,6 +565,10 @@ def metadata_import(
400565
with_basepath: bool = False,
401566
**options: Any,
402567
) -> Self:
568+
"""Deserialization of a descriptor to a class instance
569+
570+
The deserialization and serialization must be lossless
571+
"""
403572
merged_options = {}
404573
profile = cls.metadata_ensure_profile()
405574
basepath = options.pop("basepath", None)
@@ -431,6 +600,10 @@ def metadata_import(
431600
return metadata
432601

433602
def metadata_export(self, *, exclude: List[str] = []) -> types.IDescriptor:
603+
"""Serialize class instance to descriptor
604+
605+
The deserialization and serialization must be lossless
606+
"""
434607
descriptor = {}
435608
profile = self.metadata_ensure_profile()
436609
for name in profile.get("properties", {}):

0 commit comments

Comments
 (0)