@@ -23,7 +23,7 @@ def _format_table(
2323 record_id_cols : Iterable [str ],
2424 control_id_cols : Iterable [str ],
2525 add_style : bool = True ,
26- ):
26+ ) -> str :
2727 local_data_model = data_algebra .data_model .lookup_data_model_for_dataframe (d )
2828 d = local_data_model .to_pandas (d )
2929 pd = data_algebra .data_model .lookup_data_model_for_dataframe (d ).pd
@@ -52,12 +52,13 @@ def _format_table(
5252 d = d .style .set_properties (
5353 ** {"background-color" : "#FFE4C4" }, subset = record_id_col_pairs
5454 ).set_properties (** {"background-color" : "#7FFFD4" }, subset = control_id_col_pairs )
55- return d
55+ return d . _repr_html_ ()
5656
5757
5858class RecordSpecification :
5959 """
60- Class to represent a multi-row data record.
60+ Class to represent a data record.
61+ For single row data records use None as the specification.
6162 """
6263
6364 row_columns : List [str ] # columns when in row form
@@ -93,11 +94,22 @@ def __init__(
9394 control_table = local_data_model .clean_copy (control_table )
9495 if control_table .shape [0 ] < 1 :
9596 raise ValueError ("control table should have at least 1 row" )
97+ if control_table .shape [1 ] < 2 :
98+ raise ValueError ("control table must have at least 2 columns" )
9699 if len (control_table .columns ) != len (set (control_table .columns )):
97100 raise ValueError ("control table columns should be unique" )
101+ if control_table_keys is None :
102+ if control_table .shape [0 ] > 1 :
103+ control_table_keys = [control_table .columns [0 ]]
104+ else :
105+ control_table_keys = [] # single row records don't need to be keyed
106+ if isinstance (control_table_keys , str ):
107+ control_table_keys = [control_table_keys ]
108+ else :
109+ control_table_keys = list (control_table_keys )
110+ assert isinstance (control_table_keys , List )
98111 if strict :
99112 if control_table .shape [0 ] > 1 :
100- assert control_table_keys is not None
101113 assert len (control_table_keys ) > 0
102114 assert local_data_model .table_is_keyed_by_columns (
103115 control_table , column_names = control_table_keys
@@ -106,14 +118,12 @@ def __init__(
106118 assert self .control_table .shape [0 ] > 0
107119 if record_keys is None :
108120 record_keys = []
109- if isinstance (record_keys , str ):
121+ elif isinstance (record_keys , str ):
110122 record_keys = [record_keys ]
111- self .record_keys = list (record_keys )
112- if control_table_keys is None :
113- if self .control_table .shape [0 ] > 1 :
114- control_table_keys = [self .control_table .columns [0 ]]
115- else :
116- control_table_keys = [] # single row records don't need to be keyed
123+ else :
124+ record_keys = list (record_keys )
125+ assert isinstance (record_keys , list )
126+ self .record_keys = record_keys
117127 if isinstance (control_table_keys , str ):
118128 control_table_keys = [control_table_keys ]
119129 if self .control_table .shape [0 ] > 1 :
@@ -182,12 +192,55 @@ def row_version(self, *, include_record_keys: bool = True) -> List[str]:
182192 :param include_record_keys: logical, if True include record keys as columns
183193 :return: column list
184194 """
195+ assert isinstance (include_record_keys , bool )
185196 cols : List [str ] = []
186197 if include_record_keys :
187198 cols = cols + self .record_keys
188199 cols = cols + self .content_keys
189200 return cols
201+
202+ def row_record_form (self ):
203+ """
204+ Return specification of matching row record form.
205+ Note: prefer using None to specify row records specs.
206+ """
207+ local_data_model = data_algebra .data_model .lookup_data_model_for_dataframe (self .control_table )
208+ row_vals = self .row_version (include_record_keys = False )
209+ ct = local_data_model .data_frame ({
210+ k : [k ] for k in row_vals
211+ })
212+ v_set = set (self .row_version (include_record_keys = False ))
213+ return RecordSpecification (
214+ ct ,
215+ record_keys = self .record_keys ,
216+ control_table_keys = [],
217+ strict = self .strict ,
218+ local_data_model = local_data_model ,
219+ )
220+
221+ def value_column_form (self , * , key_column_name : str = "measure" , value_column_name : str = "value" ):
222+ """
223+ Return specification of the matching value column form.
224+ Note: for type safety prefer map_to_rows() to map_to_keyed_column().
190225
226+ :param key_column_name: name for additional keying column
227+ :param value_column_name: name for value column
228+ """
229+ assert isinstance (key_column_name , str )
230+ assert isinstance (value_column_name , str )
231+ local_data_model = data_algebra .data_model .lookup_data_model_for_dataframe (self .control_table )
232+ ct = local_data_model .data_frame ({
233+ key_column_name : self .row_version (include_record_keys = False ),
234+ value_column_name : self .row_version (include_record_keys = False ),
235+ })
236+ return RecordSpecification (
237+ ct ,
238+ record_keys = self .record_keys ,
239+ control_table_keys = [key_column_name ],
240+ strict = self .strict ,
241+ local_data_model = local_data_model ,
242+ )
243+
191244 def __repr__ (self ):
192245 s = (
193246 "data_algebra.cdata.RecordSpecification(\n "
@@ -248,7 +301,12 @@ def _repr_html_(self):
248301 + _str_list_to_html (self .control_table_keys )
249302 + "</li>\n "
250303 + "<li>control_table:<br>\n "
251- + self .control_table ._repr_html_ ()
304+ + _format_table (
305+ self .control_table ,
306+ record_id_cols = self .record_keys ,
307+ control_id_cols = self .control_table_keys ,
308+ add_style = True ,
309+ )
252310 + "</li>\n "
253311 + "</ul>"
254312 + "</p>\n "
@@ -266,7 +324,8 @@ def map_to_rows(self):
266324
267325 :return: RecordMap
268326 """
269-
327+ if self .control_table .shape [0 ] <= 1 :
328+ raise ValueError ("already in row record format" )
270329 return RecordMap (blocks_in = self , strict = self .strict )
271330
272331 def map_from_rows (self ):
@@ -275,8 +334,33 @@ def map_from_rows(self):
275334
276335 :return: RecordMap
277336 """
278-
337+ if self .control_table .shape [0 ] <= 1 :
338+ raise ValueError ("already in row record format" )
279339 return RecordMap (blocks_out = self , strict = self .strict )
340+
341+ def map_to_keyed_column (self , * , key_column_name : str = "measure" , value_column_name : str = "value" ):
342+ """
343+ Build a RecordMap mapping this RecordSpecification to a table
344+ where only one column holds values.
345+ Note: for type safety prefer map_to_rows() to map_to_keyed_column().
346+
347+
348+ :param key_column_name: name for additional keying column
349+ :param value_column_name: name for value column
350+ :return: Record map
351+ """
352+ return RecordMap (blocks_in = self , blocks_out = self .value_column_form (), strict = self .strict )
353+
354+ def map_from_keyed_column (self , * , key_column_name : str = "measure" , value_column_name : str = "value" ):
355+ """
356+ Build a RecordMap mapping this RecordSpecification from a table
357+ where only one column holds values.
358+
359+ :param key_column_name: name for additional keying column
360+ :param value_column_name: name for value column
361+ :return: Record map
362+ """
363+ return RecordMap (blocks_in = self .value_column_form (), blocks_out = self , strict = self .strict )
280364
281365
282366class RecordMap (ShiftPipeAction ):
@@ -309,16 +393,23 @@ def __init__(
309393 assert isinstance (blocks_in , RecordSpecification )
310394 if strict :
311395 assert blocks_in .strict
396+ if blocks_in .control_table .shape [0 ] <= 1 :
397+ blocks_in = None
398+ if blocks_in is not None :
312399 ck = [k for k in blocks_in .content_keys if k is not None ]
313400 if len (ck ) != len (set (ck )):
314401 raise ValueError ("blocks_in can not have duplicate content keys" )
402+ if blocks_in .control_table .shape [0 ] <= 1 :
403+ raise ValueError ("for row records use None specification" )
315404 if blocks_out is not None :
316405 assert isinstance (blocks_out , RecordSpecification )
317406 if strict :
318407 assert blocks_out .strict
408+ if blocks_out .control_table .shape [0 ] <= 1 :
409+ blocks_out = None
319410 if (blocks_in is None ) and (blocks_out is None ):
320411 raise ValueError (
321- "At least one of blocks_in or blocks_out should not be None"
412+ "At least one of blocks_in or blocks_out should not be None or a non-row record "
322413 )
323414 if (blocks_in is not None ) and (blocks_out is not None ):
324415 unknown = set (blocks_out .record_keys ) - set (blocks_in .record_keys )
@@ -658,9 +749,9 @@ def _repr_html_(self):
658749 )
659750 s = (
660751 "RecordMap: transforming records of the form:<br>\n "
661- + example_input_formatted . _repr_html_ ()
752+ + example_input_formatted
662753 + "<br>\n to records of the form:<br>\n "
663- + example_output_formatted . _repr_html_ ()
754+ + example_output_formatted
664755 )
665756 return s
666757
0 commit comments