Skip to content

Commit 8ad72dc

Browse files
committed
rebuild and recheck
1 parent d9b3554 commit 8ad72dc

File tree

6 files changed

+250
-14
lines changed

6 files changed

+250
-14
lines changed

build/lib/data_algebra/solutions.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import numpy
88
import data_algebra
9-
from data_algebra.data_ops import descr, ViewRepresentation
9+
from data_algebra.data_ops import descr, TableDescription, ViewRepresentation
1010
from data_algebra.cdata import (
1111
pivot_specification,
1212
unpivot_specification,
@@ -473,3 +473,70 @@ def rank_to_average(
473473
.drop_columns([tie_breaker_column_name])
474474
)
475475
return ops
476+
477+
478+
def replicate_rows_query(
479+
d: ViewRepresentation,
480+
*,
481+
count_column_name: str,
482+
seq_column_name: str,
483+
join_temp_name: str,
484+
max_count: int,
485+
) -> Tuple[ViewRepresentation, Any]:
486+
"""
487+
Build query to replicate each row by count_column_name copies.
488+
489+
:param d: incoming data description.
490+
:param count_column_name: name of count column, should be non-negative integers.
491+
:param seq_column_name: name of colulmn to land sequence in.
492+
:param join_temp_name: name for join temp table.
493+
:param max_count: maximum in count column we need to handle, should be a reasonable upper bound.
494+
:return: ops and table to join against
495+
"""
496+
assert isinstance(d, TableDescription)
497+
assert isinstance(count_column_name, str)
498+
assert count_column_name in d.column_names
499+
assert isinstance(seq_column_name, str)
500+
assert seq_column_name not in d.column_names
501+
assert isinstance(join_temp_name, str)
502+
assert isinstance(max_count, int)
503+
assert max_count > 0
504+
# reserve a power key column
505+
power_key_colname = 'power'
506+
assert power_key_colname != count_column_name
507+
assert power_key_colname not in d.column_names
508+
# get a pandas namespace
509+
pd = data_algebra.default_data_model.pd
510+
# build powers of 2 until max_count is met or exceeded
511+
powers = list(range(int(numpy.ceil(numpy.log(max_count)/numpy.log(2))) + 1))
512+
# replicate each power the number of times it specifies
513+
count_frame = pd.concat([
514+
pd.DataFrame({
515+
power_key_colname: f'p{p}',
516+
seq_column_name: range(int(2 ** p)),
517+
})
518+
for p in powers
519+
])
520+
count_frame.reset_index(drop=True, inplace=True)
521+
# specify ops that produce row replicates
522+
ops = (
523+
d
524+
# specify which power table we want to join with
525+
.extend({
526+
power_key_colname: f'"p" %+% ({count_column_name}.log() / (2).log()).ceil().as_int64()'
527+
})
528+
# get one row for each number less than or equal to power by under-specified join
529+
.natural_join(
530+
b=TableDescription(
531+
table_name=join_temp_name,
532+
column_names=[power_key_colname, seq_column_name],
533+
),
534+
by=[power_key_colname],
535+
jointype='inner',
536+
)
537+
# drop rows exceeding desired count
538+
.select_rows(f'{seq_column_name} < {count_column_name}')
539+
# drop the power group column id
540+
.drop_columns([power_key_colname])
541+
)
542+
return ops, count_frame

coverage.txt

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
platform darwin -- Python 3.9.12, pytest-7.1.1, pluggy-1.0.0
33
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: anyio-3.5.0, cov-3.0.0
5-
collected 302 items
5+
collected 303 items
66

77
tests/test_OrderedSet.py ...... [ 1%]
88
tests/test_agg.py . [ 2%]
@@ -36,13 +36,14 @@ tests/test_ex_examples.py ............ [ 24%]
3636
tests/test_example1.py .... [ 26%]
3737
tests/test_exp.py . [ 26%]
3838
tests/test_exp_parens.py . [ 26%]
39+
tests/test_expand_rows.py . [ 27%]
3940
tests/test_expr_parse.py . [ 27%]
40-
tests/test_expression_expectations.py .. [ 27%]
41-
tests/test_extend.py ...... [ 29%]
41+
tests/test_expression_expectations.py .. [ 28%]
42+
tests/test_extend.py ...... [ 30%]
4243
tests/test_extend_order.py . [ 30%]
4344
tests/test_first_last.py . [ 30%]
4445
tests/test_float_divide.py ... [ 31%]
45-
tests/test_flow_text.py . [ 31%]
46+
tests/test_flow_text.py . [ 32%]
4647
tests/test_forbidden_calculation.py .. [ 32%]
4748
tests/test_forbidden_ops.py ... [ 33%]
4849
tests/test_free_fn.py . [ 33%]
@@ -71,15 +72,15 @@ tests/test_natural_join.py . [ 50%]
7172
tests/test_neg.py . [ 50%]
7273
tests/test_null_bad.py .. [ 51%]
7374
tests/test_one_row_cdata_convert.py . [ 51%]
74-
tests/test_ops.py . [ 51%]
75+
tests/test_ops.py . [ 52%]
7576
tests/test_ops_eq.py . [ 52%]
76-
tests/test_or.py ..... [ 53%]
77+
tests/test_or.py ..... [ 54%]
7778
tests/test_order_limit.py . [ 54%]
7879
tests/test_ordered_agg_group.py . [ 54%]
7980
tests/test_parens.py .. [ 55%]
80-
tests/test_parse.py .......................... [ 63%]
81+
tests/test_parse.py .......................... [ 64%]
8182
tests/test_project.py ..... [ 65%]
82-
tests/test_rank.py . [ 65%]
83+
tests/test_rank.py . [ 66%]
8384
tests/test_rank_to_average.py . [ 66%]
8485
tests/test_ranked_example.py . [ 66%]
8586
tests/test_readme_example.py . [ 66%]
@@ -150,12 +151,12 @@ data_algebra/pandas_base.py 501 44 91% 29, 69, 71, 84, 94,
150151
data_algebra/pandas_model.py 5 0 100%
151152
data_algebra/parse_by_lark.py 164 24 85% 71, 93, 108, 129-130, 137, 161, 171, 185-186, 188, 200, 206, 213-217, 245, 253, 263-266
152153
data_algebra/python3_lark.py 1 0 100%
153-
data_algebra/solutions.py 117 4 97% 63, 290, 371, 454
154+
data_algebra/solutions.py 135 4 97% 63, 290, 371, 454
154155
data_algebra/sql_format_options.py 15 2 87% 61, 69
155156
data_algebra/test_util.py 290 56 81% 88, 110, 120, 123, 127, 150, 153, 157, 159-162, 173, 228-229, 245-250, 254, 266, 268-276, 322, 325, 333, 344, 351, 355, 367, 380, 463-466, 468-471, 473-476, 478-481, 555-560, 566-567, 570
156157
data_algebra/util.py 140 29 79% 26, 51, 56, 61, 84-85, 88-89, 92-93, 96-97, 100-101, 104-105, 108-109, 112-113, 116-117, 120-121, 192, 207, 248, 252, 254
157158
--------------------------------------------------------------------
158-
TOTAL 5759 845 85%
159+
TOTAL 5777 845 85%
159160

160161

161-
======================= 302 passed in 417.20s (0:06:57) ========================
162+
======================= 303 passed in 423.88s (0:07:03) ========================
723 Bytes
Binary file not shown.

dist/data_algebra-1.4.1.tar.gz

687 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)