Skip to content

Commit 9a80dc6

Browse files
committed
remove incomplete braid solution
1 parent d7112f8 commit 9a80dc6

File tree

3 files changed

+2
-157
lines changed

3 files changed

+2
-157
lines changed

data_algebra/solutions.py

Lines changed: 0 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -341,111 +341,6 @@ def last_observed_carried_forward(
341341
return ops
342342

343343

344-
def braid_data(
345-
*,
346-
d_state: ViewRepresentation,
347-
d_event: ViewRepresentation,
348-
order_by: Iterable[str],
349-
partition_by: Optional[Iterable[str]] = None,
350-
state_value_column_name: str,
351-
event_value_column_names: Iterable[str],
352-
source_id_column: str = "record_type",
353-
state_row_mark: str = "state_row",
354-
event_row_mark: str = "event_row",
355-
stand_in_values: Dict,
356-
locf_to_use_column_name: str = "locf_to_use",
357-
locf_non_null_rank_column_name: str = "locf_non_null_rank",
358-
locf_tiebreaker_column_name: str = "locf_tiebreaker",
359-
) -> ViewRepresentation:
360-
"""
361-
Mix data from two sources, ordering by order_by columns and carrying forward observations
362-
on d_state value column.
363-
364-
:param d_state: ViewRepresentation representation of state by order_by.
365-
:param d_event: ViewRepresentation representation of events by order_by.
366-
:param order_by: columns to order by (non empty list of column names)
367-
:param partition_by: optional partitioning column names
368-
:param state_value_column_name: column to copy from d_state and propagate forward
369-
:param event_value_column_names: columns to copy from d_event
370-
:param source_id_column: name for source identification column.
371-
:param state_row_mark: source annotation of state rows.
372-
:param event_row_mark: source annotation of event rows.
373-
:param stand_in_values: dictionary stand in values to use for state_value_column_name and event_value_column_names
374-
needed to get column types correct, replaced by None and not passed further.
375-
:param locf_to_use_column_name: name for a temporary values column
376-
:param locf_non_null_rank_column_name: name for a temporary values column
377-
:param locf_tiebreaker_column_name: name for a temporary values column
378-
:return: ops
379-
"""
380-
assert isinstance(d_state, ViewRepresentation)
381-
assert isinstance(d_event, ViewRepresentation)
382-
assert not isinstance(order_by, str)
383-
order_by = list(order_by)
384-
assert len(order_by) > 0
385-
if partition_by is not None:
386-
assert not isinstance(partition_by, str)
387-
partition_by = list(partition_by)
388-
else:
389-
partition_by = []
390-
assert isinstance(state_value_column_name, str)
391-
assert not isinstance(event_value_column_names, str)
392-
event_value_column_names = list(event_value_column_names)
393-
assert isinstance(source_id_column, str)
394-
assert isinstance(state_row_mark, str)
395-
assert isinstance(event_row_mark, str)
396-
assert isinstance(locf_to_use_column_name, str)
397-
assert isinstance(locf_non_null_rank_column_name, str)
398-
assert isinstance(locf_tiebreaker_column_name, str)
399-
assert isinstance(stand_in_values, dict)
400-
together = (
401-
d_state.extend({k: stand_in_values[k] for k in event_value_column_names})
402-
.select_columns(
403-
partition_by
404-
+ order_by
405-
+ [state_value_column_name]
406-
+ event_value_column_names
407-
)
408-
.concat_rows(
409-
b=(
410-
d_event.extend(
411-
{state_value_column_name: stand_in_values[state_value_column_name]}
412-
).select_columns(
413-
partition_by
414-
+ order_by
415-
+ [state_value_column_name]
416-
+ event_value_column_names
417-
)
418-
),
419-
id_column=source_id_column,
420-
a_name=state_row_mark,
421-
b_name=event_row_mark,
422-
)
423-
# clear out stand-in values
424-
.extend(
425-
{
426-
state_value_column_name: f'({source_id_column} == "{event_row_mark}").if_else(None, {state_value_column_name})'
427-
}
428-
)
429-
.extend(
430-
{
431-
k: f'({source_id_column} == "{state_row_mark}").if_else(None, {k})'
432-
for k in event_value_column_names
433-
}
434-
)
435-
)
436-
ops = last_observed_carried_forward(
437-
together,
438-
order_by=order_by,
439-
partition_by=partition_by,
440-
value_column_name=state_value_column_name,
441-
selection_predicate="is_null()",
442-
locf_to_use_column_name=locf_to_use_column_name,
443-
locf_non_null_rank_column_name=locf_non_null_rank_column_name,
444-
locf_tiebreaker_column_name=locf_tiebreaker_column_name,
445-
)
446-
return ops
447-
448-
449344
def rank_to_average(
450345
d: ViewRepresentation,
451346
*,

tests/test_braid.py

Lines changed: 0 additions & 51 deletions
This file was deleted.

tests/test_idioms.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,10 @@ def test_idiom_extend_special_count():
6868
expect = data_algebra.data_model.default_data_model().pd.DataFrame({"count": [4]})
6969

7070
with pytest.warns(UserWarning):
71+
# warning is db adapter saying to not use this fn
7172
data_algebra.test_util.check_transform(
7273
ops=ops, data=d, expect=expect, empty_produces_empty=False,
73-
try_on_Polars=False, # TODO: turn this on
74+
try_on_Polars=False, # TODO: turn this on, don't use cumsum() in project on Polars
7475
)
7576

7677

0 commit comments

Comments
 (0)