3737class Metadata :
3838 """Metadata representation
3939
40+ This class provides functionality for serialization / deserialization of
41+ python child classes to descriptors.
42+
43+ A **descriptor** is a JSON serializable `dict`.
44+ A **profile** is a JSON Schema dict that sets expectations on the format
45+ of the descriptor.
46+
4047 For proper functioning a child class must be decorated by
4148 "@attrs.define(kw_only=True, repr=False)" and ensure that
42- "Metadata.__attrs_post_init__" is called
49+ "Metadata.__attrs_post_init__" is called :
50+
51+ - `kw_only=True` is required because this class will need explicit
52+ keywords to be able to track which properties have been set at
53+ initialization (see implementation of `__new__`, which uses the keyword
54+ arguments `kwargs`)
55+ - `repr=False` is to avoid `attrs` to overwrite the inherited `__repr__`
56+ function defined in this class.
4357
4458 """
4559
4660 custom : dict [str , Any ] = {}
4761 """
48- List of custom parameters. Any extra properties will be added
62+ List of custom parameters. Any extra property will be added
4963 to the custom property.
64+
65+ A "custom" property is an additional property to the ones expected by the
66+ classe's "profile" (See the "metadata_profile_*" properties)
5067 """
5168
5269 def __new__ (cls , * args : Any , ** kwargs : Any ):
@@ -61,6 +78,14 @@ def __attrs_post_init__(self):
6178 self .metadata_initiated = True
6279
6380 def __setattr__ (self , name : str , value : Any ):
81+ """Side effects when setting a property
82+
83+ Properties starting with `_` or `metadata_` have no side effects.
84+
85+ For all other properties, the "metatdata_assigned" and
86+ "metatadata_defaults" are update, depending of if the value has been
87+ set explicitely or implicitely as the default respectively.
88+ """
6489 if not name .startswith (("_" , "metadata_" )):
6590 if self .metadata_initiated :
6691 if value is not None :
@@ -74,6 +99,7 @@ def __setattr__(self, name: str, value: Any):
7499 super ().__setattr__ (name , value )
75100
76101 def __repr__ (self ) -> str :
102+ """Prints the descriptor of the object"""
77103 return pprint .pformat (self .to_descriptor (), sort_dicts = False )
78104
79105 @property
@@ -105,7 +131,15 @@ def handle_data(self, data: str):
105131 # Defined
106132
107133 def list_defined (self ) -> List [str ]:
134+ """Returns a list of all properties that have been defined.
135+
136+ TODO : the difference with metadata_assigned is that it lists values
137+ that are set in the class that are different from `metadata_defaults`.
138+ How is that possible, I thought metadata_defaults can only be set to
139+ the defaults ?
140+ """
108141 defined = list (self .metadata_assigned )
142+
109143 for name , default in self .metadata_defaults .items ():
110144 value = getattr (self , name , None )
111145 if isinstance (value , type ):
@@ -118,15 +152,27 @@ def add_defined(self, name: str) -> None:
118152 self .metadata_assigned .add (name )
119153
120154 def has_defined (self , name : str ) -> bool :
155+ """Whether a property has been defined explicitely"""
121156 return name in self .list_defined ()
122157
123158 def get_defined (self , name : str , * , default : Any = None ) -> Any :
159+ """Retrieve the value of a property if it has been explicitely
160+ assigned, or return a default value otherwise"""
124161 if self .has_defined (name ):
125162 return getattr (self , name )
126- if default is not None :
127- return default
163+
164+ return default
128165
129166 def set_not_defined (self , name : str , value : Any , * , distinct : bool = False ) -> None :
167+ """If no property with "name" has already been assigned, then assign
168+ "value" to this property, but without the side effects of setting an
169+ attribute (see
170+ `__setattr__`, in particular, "has_defined(name)" will still return
171+ False after definition).
172+
173+ Setting `distinct=True` will prevent from overwriting an already set
174+ (including default values or values set with this method already)
175+ """
130176 if not self .has_defined (name ) and value is not None :
131177 if distinct and getattr (self , name , None ) == value :
132178 return
@@ -141,6 +187,11 @@ def validate_descriptor(
141187 * ,
142188 basepath : Optional [str ] = None ,
143189 ) -> Report :
190+ """Validate a descriptor
191+
192+ To do so, it tries to convert a descriptor into a class instance, and
193+ report errors it has encountered (if any)
194+ """
144195 errors = []
145196 timer = helpers .Timer ()
146197 try :
@@ -165,36 +216,63 @@ def from_descriptor(
165216 allow_invalid : bool = False ,
166217 ** options : Any ,
167218 ) -> Self :
219+ """Constructs an instance from a descriptor.
220+
221+ This method will identify the most specialized Class and instantiate
222+ it given information provided in the descriptor.
223+
224+ "descriptor" can be provided as a path to a descriptor file. The path
225+ can be relative to a base path provided as an option with the name
226+ "basepath".
227+
228+ If `allow_invalid = True`, the class creation will try to continue
229+ despite the descriptor having errors.
230+ """
168231 descriptor_path = None
232+
169233 if isinstance (descriptor , str ):
170234 descriptor_path = descriptor
171235 basepath = options .pop ("basepath" , None )
172236 descriptor = helpers .join_basepath (descriptor , basepath )
173237 if "basepath" in inspect .signature (cls .__init__ ).parameters :
174238 options ["basepath" ] = helpers .parse_basepath (descriptor )
239+
175240 descriptor = cls .metadata_retrieve (descriptor )
176- # TODO: remove in next version
241+
242+ # TODO: remove in v6
177243 # Transform with a base class in case the type is not available
178244 cls .metadata_transform (descriptor )
179- type = descriptor .get ("type" )
245+
246+ expected_type = descriptor .get ("type" )
247+
248+ # python class "type" property, if present, has precedence over descriptor type
180249 class_type = vars (cls ).get ("type" )
181250 if isinstance (class_type , str ):
182- type = class_type
183- Class = cls .metadata_select_class (type )
251+ expected_type = class_type
252+
253+ # Get the most specialized class associated with the expected_type
254+ # (defaults to the current class if `expected_type` is `None`)
255+ Class = cls .metadata_select_class (expected_type )
184256 Error = Class .metadata_Error or platform .frictionless_errors .MetadataError
257+
185258 Class .metadata_transform (descriptor )
186259 errors = list (Class .metadata_validate (descriptor ))
260+
187261 if not allow_invalid :
188262 if errors :
189263 error = Error (note = "descriptor is not valid" )
190264 raise FrictionlessException (error , reasons = errors )
265+
191266 metadata = Class .metadata_import (descriptor , ** helpers .remove_non_values (options ))
192267 if descriptor_path :
193268 metadata .metadata_descriptor_path = descriptor_path
194269 metadata .metadata_descriptor_initial = metadata .to_descriptor ()
195270 return metadata # type: ignore
196271
197272 def to_descriptor (self , * , validate : bool = False ) -> types .IDescriptor :
273+ """Return a descriptor associated to the class instance.
274+ If `validate = True`, the descriptor will additionnaly be validated.
275+ """
198276 descriptor = self .metadata_export ()
199277 if validate :
200278 Error = self .metadata_Error or platform .frictionless_errors .MetadataError
@@ -277,16 +355,57 @@ def to_markdown(self, path: Optional[str] = None, table: bool = False) -> str:
277355 metadata_type : ClassVar [str ]
278356 metadata_Error : ClassVar [Optional [Type [Error ]]] = None
279357 metadata_profile : ClassVar [Dict [str , Any ]] = {}
358+ """A JSON Schema like dictionary that defines the expected format of the descriptor"""
359+
280360 metadata_profile_patch : ClassVar [Dict [str , Any ]] = {}
361+ """Change to the expected format of the descriptor
362+
363+ This will usually be used by child classes to amend and build upon the
364+ descriptor of their parent.
365+ """
366+
281367 metadata_profile_merged : ClassVar [Dict [str , Any ]] = {}
368+ """Provides a consolidated definition of the descriptor, taking into
369+ account a `metadata_profile` and all `metadata_profile_patch`es that
370+ apply.
371+ """
372+
282373 metadata_initiated : bool = False
374+ """Is set to true when the class initialization is finished"""
375+
283376 metadata_assigned : Set [str ] = set ()
377+ """Set of all names of properties to which a value (different from None)
378+ has been _explicitely_ assigned (including with explicit arguments at
379+ object initialization)"""
380+
284381 metadata_defaults : Dict [str , Any ] = {}
382+ """Names and values of properties that have not been
383+ explicitely set, and that have been set to a default value instead"""
384+
285385 metadata_descriptor_path : Optional [str ] = None
386+ """Descriptor file path
387+ If applicable, i.e. if a class has been instantiated with
388+ a descriptor read from a file
389+ """
390+
286391 metadata_descriptor_initial : Optional [types .IDescriptor ] = None
392+ """Descriptor used for class instantiation
393+ If applicable, i.e. if a class has been instantiated with
394+ a descriptor
395+ """
287396
288397 @classmethod
289398 def metadata_select_class (cls , type : Optional [str ]) -> Type [Metadata ]:
399+ """Allows to specify a more specialized class for the "type" given as
400+ input
401+
402+ When a class can be dispatched into several different more
403+ specialized classes, this function makes the link between the type and
404+ the class.
405+
406+ Otherwise, "type" is expected to be None, and the current class is
407+ returned.
408+ """
290409 if type :
291410 note = f'unsupported type for "{ cls .metadata_type } ": { type } '
292411 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
@@ -295,10 +414,21 @@ def metadata_select_class(cls, type: Optional[str]) -> Type[Metadata]:
295414
296415 @classmethod
297416 def metadata_select_property_class (cls , name : str ) -> Optional [Type [Metadata ]]:
417+ """Defines the class to use with a given property's metadata
418+
419+ Complex properties are likely to have their own python class,
420+ inheriting from Metadata. If this is the case, this method should
421+ return this class when called with the property name as "name".
422+ """
298423 pass
299424
300425 @classmethod
301426 def metadata_ensure_profile (cls ):
427+ """Consolidates `metadata_profile` and `metadata_profile_patch`es
428+
429+ All patches are applied, in order from parent to child, in case of
430+ multiple successive inheritance.
431+ """
302432 if not cls .__dict__ .get ("metadata_profile_merged" , None ):
303433 cls .metadata_profile_merged = cls .metadata_profile
304434 for subcls in reversed (cls .mro ()):
@@ -310,14 +440,32 @@ def metadata_ensure_profile(cls):
310440
311441 @classmethod
312442 def metadata_retrieve (
313- cls , descriptor : Union [types .IDescriptor , str ], * , size : Optional [int ] = None
443+ cls ,
444+ descriptor : Union [types .IDescriptor , str , Path ],
445+ * ,
446+ size : Optional [int ] = None ,
314447 ) -> types .IDescriptor :
448+ """Copy or fetch the "descriptor" as a dictionnary.
449+
450+ If "descriptor" is a string or Path, then it is interpreted as a
451+ (possibly remote) path to a descriptor file.
452+
453+ The content of the file is expected to be in JSON format, except if
454+ the filename has an explicit `.yaml` extension.
455+
456+ """
315457 try :
316458 if isinstance (descriptor , Mapping ):
317459 return deepcopy (descriptor )
460+
461+ # Types are tested explicitely,
462+ # for providing feedback to users that do not comply with
463+ # the function signature and provide a wrong type
318464 if isinstance (descriptor , (str , Path )): # type: ignore
465+ # descriptor is read from (possibly remote) file
319466 if isinstance (descriptor , Path ):
320467 descriptor = str (descriptor )
468+
321469 if helpers .is_remote_path (descriptor ):
322470 session = platform .frictionless .system .http_session
323471 response = session .get (descriptor , stream = True )
@@ -328,20 +476,36 @@ def metadata_retrieve(
328476 else :
329477 with open (descriptor , encoding = "utf-8" ) as file :
330478 content = file .read (size )
479+
331480 if descriptor .endswith (".yaml" ):
332481 metadata = platform .yaml .safe_load (io .StringIO (content ))
333482 else :
334483 metadata = json .loads (content )
484+
335485 assert isinstance (metadata , dict )
336486 return metadata # type: ignore
487+
337488 raise TypeError ("descriptor type is not supported" )
489+
338490 except Exception as exception :
339491 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
340492 note = f'cannot retrieve metadata "{ descriptor } " because "{ exception } "'
341493 raise FrictionlessException (Error (note = note )) from exception
342494
343495 @classmethod
344496 def metadata_transform (cls , descriptor : types .IDescriptor ):
497+ """Transform the descriptor inplace before serializing into a python class
498+ instance.
499+
500+ The transformation applies recursively to any property handled with
501+ `metadata_select_property_class(name)`.
502+
503+ The actual transformation steps are defined by child classes, which must call
504+ `super().metadata_transform` to ensure recursive transformation.
505+
506+ This can be used for instance for retrocompatibility, converting
507+ former descriptors into new ones.
508+ """
345509 profile = cls .metadata_ensure_profile ()
346510 for name in profile .get ("properties" , {}):
347511 value = descriptor .get (name )
@@ -364,12 +528,22 @@ def metadata_validate(
364528 profile : Optional [Union [types .IDescriptor , str ]] = None ,
365529 error_class : Optional [Type [Error ]] = None ,
366530 ) -> Generator [Error , None , None ]:
531+ """Validates a descriptor according to a profile
532+
533+ A **profile** is a JSON Schema dict that sets expectations on the format
534+ of the descriptor.
535+
536+ The profile to validate can be set explicitely ("profile" parameter),
537+ otherwise it defaults to the class profile.
538+ """
367539 Error = error_class
368540 if not Error :
369541 Error = cls .metadata_Error or platform .frictionless_errors .MetadataError
542+
370543 profile = profile or cls .metadata_ensure_profile ()
371544 if isinstance (profile , str ):
372545 profile = cls .metadata_retrieve (profile )
546+
373547 validator_class = platform .jsonschema .validators .validator_for (profile ) # type: ignore
374548 validator = validator_class (profile ) # type: ignore
375549 for error in validator .iter_errors (descriptor ): # type: ignore
@@ -379,6 +553,7 @@ def metadata_validate(
379553 if metadata_path :
380554 note = f"{ note } at property '{ metadata_path } '"
381555 yield Error (note = note )
556+
382557 for name in profile .get ("properties" , {}):
383558 value = descriptor .get (name )
384559 Class = cls .metadata_select_property_class (name )
@@ -400,6 +575,10 @@ def metadata_import(
400575 with_basepath : bool = False ,
401576 ** options : Any ,
402577 ) -> Self :
578+ """Deserialization of a descriptor to a class instance
579+
580+ The deserialization and serialization must be lossless.
581+ """
403582 merged_options = {}
404583 profile = cls .metadata_ensure_profile ()
405584 basepath = options .pop ("basepath" , None )
@@ -431,6 +610,10 @@ def metadata_import(
431610 return metadata
432611
433612 def metadata_export (self , * , exclude : List [str ] = []) -> types .IDescriptor :
613+ """Serialize class instance to descriptor
614+
615+ The deserialization and serialization must be lossless
616+ """
434617 descriptor = {}
435618 profile = self .metadata_ensure_profile ()
436619 for name in profile .get ("properties" , {}):
0 commit comments