Skip to content

Commit 8863634

Browse files
committed
rebuild and retest
1 parent 9a80dc6 commit 8863634

File tree

8 files changed

+2681
-3008
lines changed

8 files changed

+2681
-3008
lines changed

build/lib/data_algebra/polars_model.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020

2121

2222
def _build_lit(v):
23-
if isinstance(v, int):
23+
if isinstance(v, bool):
24+
# bools return true to isinstance int.
25+
return pl.lit(v)
26+
elif isinstance(v, int):
2427
# Polars defaults ints in constructor to Int64,
2528
# but ints in lit to Int32. Try to prevent type clashes
2629
return pl.lit(v, pl.Int64)
27-
return pl.lit(v)
30+
else:
31+
return pl.lit(v)
2832

2933

3034
def _reduce_plus(*args):
@@ -632,6 +636,7 @@ def _extend_step(self, op: data_algebra.data_ops_types.OperatorPlatform, *, data
632636
res = res.with_columns(produced_columns)
633637
if len(temp_v_columns) > 0:
634638
res = res.select(op.columns_produced())
639+
# get back to lazy type if needed
635640
if self.use_lazy_eval and isinstance(res, pl.DataFrame):
636641
res = res.lazy()
637642
return res
@@ -685,6 +690,14 @@ def _project_step(self, op: data_algebra.data_ops_types.OperatorPlatform, *, dat
685690
res = res.groupby(group_by).agg(produced_columns)
686691
if len(temp_v_columns) > 0:
687692
res = res.select(op.columns_produced())
693+
if (op.group_by is None) or (len(op.group_by) == 0):
694+
# see if we have a zero row result
695+
if isinstance(res, pl.LazyFrame):
696+
res = res.collect()
697+
if res.shape[0] <= 0:
698+
# make an all None frame
699+
res = pl.DataFrame({c: [None] for c in res.columns}, columns=[(res.columns[j], res.dtypes[j]) for j in range(res.shape[1])])
700+
# see if we need to convert to lazy type
688701
if self.use_lazy_eval and isinstance(res, pl.DataFrame):
689702
res = res.lazy()
690703
return res

build/lib/data_algebra/solutions.py

Lines changed: 0 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -341,111 +341,6 @@ def last_observed_carried_forward(
341341
return ops
342342

343343

344-
def braid_data(
345-
*,
346-
d_state: ViewRepresentation,
347-
d_event: ViewRepresentation,
348-
order_by: Iterable[str],
349-
partition_by: Optional[Iterable[str]] = None,
350-
state_value_column_name: str,
351-
event_value_column_names: Iterable[str],
352-
source_id_column: str = "record_type",
353-
state_row_mark: str = "state_row",
354-
event_row_mark: str = "event_row",
355-
stand_in_values: Dict,
356-
locf_to_use_column_name: str = "locf_to_use",
357-
locf_non_null_rank_column_name: str = "locf_non_null_rank",
358-
locf_tiebreaker_column_name: str = "locf_tiebreaker",
359-
) -> ViewRepresentation:
360-
"""
361-
Mix data from two sources, ordering by order_by columns and carrying forward observations
362-
on d_state value column.
363-
364-
:param d_state: ViewRepresentation representation of state by order_by.
365-
:param d_event: ViewRepresentation representation of events by order_by.
366-
:param order_by: columns to order by (non empty list of column names)
367-
:param partition_by: optional partitioning column names
368-
:param state_value_column_name: column to copy from d_state and propagate forward
369-
:param event_value_column_names: columns to copy from d_event
370-
:param source_id_column: name for source identification column.
371-
:param state_row_mark: source annotation of state rows.
372-
:param event_row_mark: source annotation of event rows.
373-
:param stand_in_values: dictionary stand in values to use for state_value_column_name and event_value_column_names
374-
needed to get column types correct, replaced by None and not passed further.
375-
:param locf_to_use_column_name: name for a temporary values column
376-
:param locf_non_null_rank_column_name: name for a temporary values column
377-
:param locf_tiebreaker_column_name: name for a temporary values column
378-
:return: ops
379-
"""
380-
assert isinstance(d_state, ViewRepresentation)
381-
assert isinstance(d_event, ViewRepresentation)
382-
assert not isinstance(order_by, str)
383-
order_by = list(order_by)
384-
assert len(order_by) > 0
385-
if partition_by is not None:
386-
assert not isinstance(partition_by, str)
387-
partition_by = list(partition_by)
388-
else:
389-
partition_by = []
390-
assert isinstance(state_value_column_name, str)
391-
assert not isinstance(event_value_column_names, str)
392-
event_value_column_names = list(event_value_column_names)
393-
assert isinstance(source_id_column, str)
394-
assert isinstance(state_row_mark, str)
395-
assert isinstance(event_row_mark, str)
396-
assert isinstance(locf_to_use_column_name, str)
397-
assert isinstance(locf_non_null_rank_column_name, str)
398-
assert isinstance(locf_tiebreaker_column_name, str)
399-
assert isinstance(stand_in_values, dict)
400-
together = (
401-
d_state.extend({k: stand_in_values[k] for k in event_value_column_names})
402-
.select_columns(
403-
partition_by
404-
+ order_by
405-
+ [state_value_column_name]
406-
+ event_value_column_names
407-
)
408-
.concat_rows(
409-
b=(
410-
d_event.extend(
411-
{state_value_column_name: stand_in_values[state_value_column_name]}
412-
).select_columns(
413-
partition_by
414-
+ order_by
415-
+ [state_value_column_name]
416-
+ event_value_column_names
417-
)
418-
),
419-
id_column=source_id_column,
420-
a_name=state_row_mark,
421-
b_name=event_row_mark,
422-
)
423-
# clear out stand-in values
424-
.extend(
425-
{
426-
state_value_column_name: f'({source_id_column} == "{event_row_mark}").if_else(None, {state_value_column_name})'
427-
}
428-
)
429-
.extend(
430-
{
431-
k: f'({source_id_column} == "{state_row_mark}").if_else(None, {k})'
432-
for k in event_value_column_names
433-
}
434-
)
435-
)
436-
ops = last_observed_carried_forward(
437-
together,
438-
order_by=order_by,
439-
partition_by=partition_by,
440-
value_column_name=state_value_column_name,
441-
selection_predicate="is_null()",
442-
locf_to_use_column_name=locf_to_use_column_name,
443-
locf_non_null_rank_column_name=locf_non_null_rank_column_name,
444-
locf_tiebreaker_column_name=locf_tiebreaker_column_name,
445-
)
446-
return ops
447-
448-
449344
def rank_to_average(
450345
d: ViewRepresentation,
451346
*,

coverage.txt

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,18 @@ tests/test_arith.py . [ 3%]
1111
tests/test_arrow1.py ... [ 4%]
1212
tests/test_bigquery_user_fns.py . [ 4%]
1313
tests/test_bitemporal_obs_agg_join.py . [ 4%]
14-
tests/test_braid.py . [ 5%]
1514
tests/test_calc_warnings_errors.py . [ 5%]
1615
tests/test_cc.py .... [ 6%]
1716
tests/test_cdata1.py .... [ 7%]
1817
tests/test_cdata_convenience.py . [ 7%]
1918
tests/test_cdata_example.py .. [ 8%]
2019
tests/test_cdata_tr6_example.py .... [ 9%]
2120
tests/test_cdata_wvpy_case.py . [ 9%]
22-
tests/test_coalesce.py . [ 10%]
21+
tests/test_coalesce.py . [ 9%]
2322
tests/test_cols_used.py . [ 10%]
2423
tests/test_compare_data_frames.py . [ 10%]
2524
tests/test_complex_expr.py .... [ 11%]
26-
tests/test_compound_where.py . [ 12%]
25+
tests/test_compound_where.py . [ 11%]
2726
tests/test_concat_rows.py ... [ 12%]
2827
tests/test_container.py .. [ 13%]
2928
tests/test_cross_product_join.py . [ 13%]
@@ -32,59 +31,59 @@ tests/test_data_space.py .. [ 17%]
3231
tests/test_date_stuff.py .. [ 17%]
3332
tests/test_db_handle.py .. [ 18%]
3433
tests/test_db_model.py . [ 18%]
35-
tests/test_degenerate_project.py . [ 19%]
34+
tests/test_degenerate_project.py . [ 18%]
3635
tests/test_drop_columns.py . [ 19%]
3736
tests/test_dup_detection_example.py . [ 19%]
3837
tests/test_eval_cache.py ... [ 20%]
3938
tests/test_ex_examples.py ............ [ 23%]
4039
tests/test_example1.py .... [ 24%]
41-
tests/test_exp.py . [ 25%]
40+
tests/test_exp.py . [ 24%]
4241
tests/test_exp_parens.py . [ 25%]
4342
tests/test_expand_rows.py . [ 25%]
44-
tests/test_expr_parse.py . [ 26%]
43+
tests/test_expr_parse.py . [ 25%]
4544
tests/test_expression_expectations.py .. [ 26%]
4645
tests/test_expression_expectations_polars.py . [ 26%]
4746
tests/test_extend.py .......... [ 29%]
4847
tests/test_extend_order.py . [ 29%]
49-
tests/test_first_last.py . [ 30%]
48+
tests/test_first_last.py . [ 29%]
5049
tests/test_float_divide.py .. [ 30%]
51-
tests/test_flow_text.py . [ 31%]
50+
tests/test_flow_text.py . [ 30%]
5251
tests/test_forbidden_calculation.py .. [ 31%]
5352
tests/test_forbidden_ops.py ... [ 32%]
5453
tests/test_free_fn.py . [ 32%]
55-
tests/test_get_methods_used.py . [ 33%]
54+
tests/test_get_methods_used.py . [ 32%]
5655
tests/test_ghost_col_issue.py . [ 33%]
57-
tests/test_idioms.py ................. [ 38%]
56+
tests/test_idioms.py ................. [ 37%]
5857
tests/test_if_else.py ..... [ 39%]
5958
tests/test_if_else_return_type.py . [ 39%]
60-
tests/test_incomplete_agg.py . [ 40%]
59+
tests/test_incomplete_agg.py . [ 39%]
6160
tests/test_join_check.py . [ 40%]
62-
tests/test_join_conditions.py ... [ 41%]
61+
tests/test_join_conditions.py ... [ 40%]
6362
tests/test_join_effects.py . [ 41%]
6463
tests/test_join_multi_key.py . [ 41%]
65-
tests/test_join_opt.py . [ 42%]
64+
tests/test_join_opt.py . [ 41%]
6665
tests/test_join_variations.py . [ 42%]
6766
tests/test_lark_parser.py ..... [ 43%]
6867
tests/test_locf.py . [ 43%]
69-
tests/test_logistic_example.py . [ 44%]
68+
tests/test_logistic_example.py . [ 43%]
7069
tests/test_mapv.py .. [ 44%]
71-
tests/test_math.py . [ 45%]
70+
tests/test_math.py . [ 44%]
7271
tests/test_method_catalog_issues.py .. [ 45%]
7372
tests/test_minimum.py . [ 45%]
7473
tests/test_mod_fns.py ... [ 46%]
75-
tests/test_multi_map.py . [ 47%]
74+
tests/test_multi_map.py . [ 46%]
7675
tests/test_narrow.py . [ 47%]
7776
tests/test_natural_join.py .... [ 48%]
7877
tests/test_neg.py . [ 48%]
7978
tests/test_null_bad.py .... [ 49%]
80-
tests/test_one_row_cdata_convert.py . [ 50%]
79+
tests/test_one_row_cdata_convert.py . [ 49%]
8180
tests/test_ops.py . [ 50%]
8281
tests/test_ops_eq.py . [ 50%]
83-
tests/test_or.py ..... [ 52%]
82+
tests/test_or.py ..... [ 51%]
8483
tests/test_order_limit.py . [ 52%]
8584
tests/test_parens.py .. [ 52%]
86-
tests/test_parse.py .......................... [ 60%]
87-
tests/test_polars.py ...................... [ 66%]
85+
tests/test_parse.py .......................... [ 59%]
86+
tests/test_polars.py ....................... [ 66%]
8887
tests/test_project.py ..... [ 67%]
8988
tests/test_rank.py . [ 68%]
9089
tests/test_rank_to_average.py . [ 68%]
@@ -161,14 +160,14 @@ data_algebra/op_container.py 127 49 61% 46-47, 63-71, 80-81
161160
data_algebra/pandas_base.py 696 68 90% 54, 68, 77, 87, 92, 101, 223, 225, 239, 242, 247, 252, 427, 467, 478, 503, 506, 511, 514, 516, 528, 534-541, 548, 582-587, 617, 621, 624, 626, 663, 717, 758, 775, 795, 813, 823, 838, 886, 894, 902, 917, 928, 940, 959, 974, 1002, 1017, 1054, 1071, 1074, 1085, 1113, 1145, 1154, 1181, 1198, 1210, 1265, 1279-1281
162161
data_algebra/pandas_model.py 19 2 89% 32-33
163162
data_algebra/parse_by_lark.py 164 24 85% 71, 93, 108, 129-130, 137, 161, 171, 185-186, 188, 200, 206, 213-217, 245, 253, 263-266
164-
data_algebra/polars_model.py 532 61 89% 130, 139, 189, 401, 417, 427, 434, 447-451, 459, 461, 486, 489, 494, 497, 546, 564, 580, 644, 660-662, 689, 697, 749, 764, 782, 800, 820, 832-834, 837, 842, 844, 851-863, 870, 875, 906, 935, 944, 972, 987, 999, 1063-1065, 1074-1075, 1077
163+
data_algebra/polars_model.py 539 60 89% 134, 143, 193, 405, 421, 431, 438, 451-455, 463, 465, 490, 493, 498, 501, 550, 568, 584, 649, 665-667, 710, 762, 777, 795, 813, 833, 845-847, 850, 855, 857, 864-876, 883, 888, 919, 948, 957, 985, 1000, 1012, 1076-1078, 1087-1088, 1090
165164
data_algebra/python3_lark.py 1 0 100%
166-
data_algebra/solutions.py 135 4 97% 63, 308, 389, 472
165+
data_algebra/solutions.py 112 3 97% 63, 308, 367
167166
data_algebra/sql_format_options.py 15 2 87% 61, 69
168167
data_algebra/test_util.py 333 62 81% 28-29, 104, 126, 136, 139, 143, 166, 169, 173, 175-178, 189, 246-247, 263-268, 272, 284, 286-294, 331, 333, 344, 352, 363, 370, 376, 388, 399, 413, 468, 472, 523-526, 528-531, 533-536, 538-541, 652-657, 662-663, 665
169168
data_algebra/util.py 127 28 78% 26, 59-60, 63-64, 67-68, 71-72, 75-76, 79-80, 83-84, 87-88, 91-92, 95-96, 143, 165, 167, 182, 223, 227, 229
170169
--------------------------------------------------------------------
171-
TOTAL 6786 964 86%
170+
TOTAL 6770 962 86%
172171

173172

174-
======================= 357 passed in 821.88s (0:13:41) ========================
173+
======================= 357 passed in 778.36s (0:12:58) ========================
-523 Bytes
Binary file not shown.

dist/data_algebra-1.6.1.tar.gz

-436 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)