@@ -263,7 +263,9 @@ def __open_lookup(self):
263263 self .__lookup [source_name ][source_key ].add (cells )
264264
265265 def __open_row_stream (self ):
266- field_info = FieldsInfo (self .schema .fields )
266+ fields_info = FieldsInfo (
267+ self .schema .fields , self .labels , self .detector .schema_sync
268+ )
267269
268270 # Create state
269271 memory_unique : Dict [str , Any ] = {}
@@ -296,7 +298,7 @@ def row_stream():
296298
297299 row = Row (
298300 cells ,
299- field_info = field_info ,
301+ fields_info = fields_info ,
300302 row_number = row_number ,
301303 )
302304
@@ -378,9 +380,9 @@ def row_stream():
378380
379381 if self .detector .schema_sync :
380382 # Missing required labels are not included in the
381- # field_info parameter used for row creation
383+ # fields_info parameter used for row creation
382384 for field in self .schema .fields :
383- self .remove_missing_required_label_from_field_info (field , field_info )
385+ self .remove_missing_required_label_from_field_info (field , fields_info )
384386
385387 self .__row_stream = row_stream ()
386388
@@ -415,7 +417,9 @@ def label_is_missing(
415417 def primary_key_cells (self , row : Row , case_sensitive : bool ) -> Tuple [Any , ...]:
416418 """Create a tuple containg all cells from a given row associated to primary
417419 keys"""
418- return tuple (row [label ] for label in self .primary_key_labels (row , case_sensitive ))
420+ return tuple (
421+ row [label ] for label in self .primary_key_labels (row , case_sensitive )
422+ )
419423
420424 def primary_key_labels (
421425 self ,
@@ -689,39 +693,63 @@ def __init__(self, field: Field, field_number: int):
689693
690694
691695class FieldsInfo :
692- """Helper class to store additional data to a collection of fields
696+ """Helper class for linking columns to schema fields.
697+
698+ It abstracts away the different ways of making this link. In particular, the
699+ reference may be the schema (`detector.schema_sync = False`), or the labels
700+ (`detector.schema_sync = True`).
693701
694702 This class is not Public API, and should be used only in non-public
695703 interfaces.
696704 """
697705
698- def __init__ (self , fields : List [Field ]):
699- self ._fields : List [_FieldInfo ] = [
700- _FieldInfo (field , i + 1 ) for i , field in enumerate (fields )
701- ]
706+ def __init__ (
707+ self , fields : List [Field ], labels : Optional [List [str ]], schema_sync : bool
708+ ):
709+ if schema_sync and labels :
710+ self ._expected_fields : List [_FieldInfo ] = []
711+ if len (labels ) != len (set (labels )):
712+ note = '"schema_sync" requires unique labels in the header'
713+ raise FrictionlessException (note )
714+
715+ for label_index , label in enumerate (labels ):
716+ try :
717+ field = next (f for f in fields if f .name == label )
718+ except StopIteration :
719+ field = Field .from_descriptor ({"name" : label , "type" : "any" })
720+ self ._expected_fields .append (_FieldInfo (field , label_index + 1 ))
721+ else :
722+ self ._expected_fields = [
723+ _FieldInfo (field , i + 1 ) for i , field in enumerate (fields )
724+ ]
702725
703726 def ls (self ) -> List [str ]:
704- """List all field names"""
705- return [fi .field .name for fi in self ._fields ]
727+ """List all column names"""
728+ return [fi .field .name for fi in self ._expected_fields ]
706729
707730 def get (self , field_name : str ) -> _FieldInfo :
708731 """Get a Field by its name
709732
733+ In case no field with field_name exists, the behavior depends on
734+ the `detector.schema_sync` option:
735+
710736 Raises:
711- ValueError: Field with name fieldname does not exist
737+ ValueError
712738 """
713739 try :
714- return next (fi for fi in self ._fields if fi .field .name == field_name )
740+ return next (
741+ fi for fi in self ._expected_fields if fi .field .name == field_name
742+ )
715743 except StopIteration :
716- raise ValueError (f"' { field_name } ' is not in fields data " )
744+ raise ValueError (f"{ field_name } is missing from expected fields " )
717745
718746 def get_copies (self ) -> List [Field ]:
719747 """Return field copies"""
720- return [fi .field .to_copy () for fi in self ._fields ]
748+ return [fi .field .to_copy () for fi in self ._expected_fields ]
721749
722750 def rm (self , field_name : str ):
723751 try :
724752 i = self .ls ().index (field_name )
725- del self ._fields [i ]
753+ del self ._expected_fields [i ]
726754 except ValueError :
727755 raise ValueError (f"'{ field_name } ' is not in fields data" )
0 commit comments