@@ -341,111 +341,6 @@ def last_observed_carried_forward(
341341 return ops
342342
343343
344- def braid_data (
345- * ,
346- d_state : ViewRepresentation ,
347- d_event : ViewRepresentation ,
348- order_by : Iterable [str ],
349- partition_by : Optional [Iterable [str ]] = None ,
350- state_value_column_name : str ,
351- event_value_column_names : Iterable [str ],
352- source_id_column : str = "record_type" ,
353- state_row_mark : str = "state_row" ,
354- event_row_mark : str = "event_row" ,
355- stand_in_values : Dict ,
356- locf_to_use_column_name : str = "locf_to_use" ,
357- locf_non_null_rank_column_name : str = "locf_non_null_rank" ,
358- locf_tiebreaker_column_name : str = "locf_tiebreaker" ,
359- ) -> ViewRepresentation :
360- """
361- Mix data from two sources, ordering by order_by columns and carrying forward observations
362- on d_state value column.
363-
364- :param d_state: ViewRepresentation representation of state by order_by.
365- :param d_event: ViewRepresentation representation of events by order_by.
366- :param order_by: columns to order by (non empty list of column names)
367- :param partition_by: optional partitioning column names
368- :param state_value_column_name: column to copy from d_state and propagate forward
369- :param event_value_column_names: columns to copy from d_event
370- :param source_id_column: name for source identification column.
371- :param state_row_mark: source annotation of state rows.
372- :param event_row_mark: source annotation of event rows.
373- :param stand_in_values: dictionary stand in values to use for state_value_column_name and event_value_column_names
374- needed to get column types correct, replaced by None and not passed further.
375- :param locf_to_use_column_name: name for a temporary values column
376- :param locf_non_null_rank_column_name: name for a temporary values column
377- :param locf_tiebreaker_column_name: name for a temporary values column
378- :return: ops
379- """
380- assert isinstance (d_state , ViewRepresentation )
381- assert isinstance (d_event , ViewRepresentation )
382- assert not isinstance (order_by , str )
383- order_by = list (order_by )
384- assert len (order_by ) > 0
385- if partition_by is not None :
386- assert not isinstance (partition_by , str )
387- partition_by = list (partition_by )
388- else :
389- partition_by = []
390- assert isinstance (state_value_column_name , str )
391- assert not isinstance (event_value_column_names , str )
392- event_value_column_names = list (event_value_column_names )
393- assert isinstance (source_id_column , str )
394- assert isinstance (state_row_mark , str )
395- assert isinstance (event_row_mark , str )
396- assert isinstance (locf_to_use_column_name , str )
397- assert isinstance (locf_non_null_rank_column_name , str )
398- assert isinstance (locf_tiebreaker_column_name , str )
399- assert isinstance (stand_in_values , dict )
400- together = (
401- d_state .extend ({k : stand_in_values [k ] for k in event_value_column_names })
402- .select_columns (
403- partition_by
404- + order_by
405- + [state_value_column_name ]
406- + event_value_column_names
407- )
408- .concat_rows (
409- b = (
410- d_event .extend (
411- {state_value_column_name : stand_in_values [state_value_column_name ]}
412- ).select_columns (
413- partition_by
414- + order_by
415- + [state_value_column_name ]
416- + event_value_column_names
417- )
418- ),
419- id_column = source_id_column ,
420- a_name = state_row_mark ,
421- b_name = event_row_mark ,
422- )
423- # clear out stand-in values
424- .extend (
425- {
426- state_value_column_name : f'({ source_id_column } == "{ event_row_mark } ").if_else(None, { state_value_column_name } )'
427- }
428- )
429- .extend (
430- {
431- k : f'({ source_id_column } == "{ state_row_mark } ").if_else(None, { k } )'
432- for k in event_value_column_names
433- }
434- )
435- )
436- ops = last_observed_carried_forward (
437- together ,
438- order_by = order_by ,
439- partition_by = partition_by ,
440- value_column_name = state_value_column_name ,
441- selection_predicate = "is_null()" ,
442- locf_to_use_column_name = locf_to_use_column_name ,
443- locf_non_null_rank_column_name = locf_non_null_rank_column_name ,
444- locf_tiebreaker_column_name = locf_tiebreaker_column_name ,
445- )
446- return ops
447-
448-
449344def rank_to_average (
450345 d : ViewRepresentation ,
451346 * ,
0 commit comments