Skip to content

Commit cfd709e

Browse files
committed
rebuild and retest
1 parent 63d4c21 commit cfd709e

File tree

6 files changed

+1381
-1391
lines changed

6 files changed

+1381
-1391
lines changed

build/lib/data_algebra/polars_model.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
collect_required: bool = False, # property of tree, not node
3838
one_constant_required: bool = False, # property of tree, not node
3939
inputs: Optional[List] = None,
40+
lit_value = None,
4041
) -> None:
4142
"""
4243
Carry a Polars expression term (polars_term) plus annotations.
@@ -46,13 +47,17 @@ def __init__(
4647
:param is_column: True if term is a column name
4748
:param collect_required: True if Polars frame collection required by this node or an input node
4849
:param one_constant_required: True one constant required by this node or an input node
50+
:param lit_value: original value for a literal
4951
:param inputs: inputs to expression node
5052
"""
5153
assert isinstance(is_literal, bool)
5254
assert isinstance(is_column, bool)
5355
assert isinstance(collect_required, bool)
5456
assert isinstance(one_constant_required, bool)
5557
assert (is_literal + is_column + (inputs is not None) + (polars_term is None)) == 1
58+
if lit_value is not None:
59+
assert is_literal
60+
self.lit_value = lit_value
5661
self.polars_term = polars_term
5762
self.is_literal = is_literal
5863
self.collect_required = collect_required
@@ -210,9 +215,9 @@ def _populate_expr_impl_map() -> Dict[int, Dict[str, Callable]]:
210215
# datetime parsing from https://stackoverflow.com/a/71759536/6901725
211216
# TODO: figure out why format is wrong type
212217
# TODO: wire up format
213-
"parse_date": lambda x, format : x.cast(str).str.strptime(pl.Date, fmt="%Y-%m-%d", strict=False).cast(pl.Date),
218+
"parse_date": lambda x, format : x.cast(str).str.strptime(pl.Date, fmt=format, strict=False).cast(pl.Date),
214219
# TODO: wire up format
215-
"parse_datetime": lambda x, format : x.cast(str).str.strptime(pl.Datetime, strict=False).cast(pl.Datetime),
220+
"parse_datetime": lambda x, format : x.cast(str).str.strptime(pl.Datetime, fmt=format, strict=False).cast(pl.Datetime),
216221
}
217222
impl_map_3 = {
218223
"if_else": lambda a, b, c: pl.when(a).then(b).otherwise(c),
@@ -265,6 +270,7 @@ def __init__(self, *, use_lazy_eval: bool = True):
265270
"TableDescription": self._table_step,
266271
}
267272
self._expr_impl_map = _populate_expr_impl_map()
273+
self._want_literals_unpacked = {"parse_date", "parse_datetime"}
268274
self._collect_required = set()
269275

270276
def data_frame(self, arg=None):
@@ -651,7 +657,7 @@ def act_on_literal(self, *, value):
651657
:return: converted result
652658
"""
653659
assert not isinstance(value, PolarsTerm)
654-
return PolarsTerm(polars_term=pl.lit(value), is_literal=True)
660+
return PolarsTerm(polars_term=pl.lit(value), is_literal=True, lit_value=value)
655661

656662
def act_on_column_name(self, *, arg, value):
657663
"""
@@ -679,7 +685,9 @@ def act_on_expression(self, *, arg, values: List, op):
679685
assert isinstance(v, PolarsTerm)
680686
f = self._expr_impl_map[len(values)][op.op]
681687
assert f is not None
682-
res = f(*[v.polars_term for v in values])
688+
want_literals_unpacked = op.op in self._want_literals_unpacked
689+
args = [v.lit_value if (want_literals_unpacked and v.is_literal) else v.polars_term for v in values]
690+
res = f(*args)
683691
return PolarsTerm(
684692
polars_term=res,
685693
inputs=values,

coverage.txt

Lines changed: 18 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
platform darwin -- Python 3.10.8, pytest-7.1.2, pluggy-1.0.0
33
rootdir: /Users/johnmount/Documents/work/data_algebra
44
plugins: anyio-3.5.0, cov-3.0.0
5-
collected 347 items
5+
collected 348 items
66

77
tests/test_OrderedSet.py ...... [ 1%]
88
tests/test_agg.py .. [ 2%]
@@ -27,12 +27,12 @@ tests/test_concat_rows.py ... [ 12%]
2727
tests/test_container.py .. [ 12%]
2828
tests/test_cross_product_join.py . [ 13%]
2929
tests/test_dag_elim.py ........... [ 16%]
30-
tests/test_data_space.py .. [ 17%]
30+
tests/test_data_space.py .. [ 16%]
3131
tests/test_date_stuff.py .. [ 17%]
3232
tests/test_db_handle.py .. [ 18%]
3333
tests/test_db_model.py . [ 18%]
3434
tests/test_degenerate_project.py . [ 18%]
35-
tests/test_drop_columns.py . [ 19%]
35+
tests/test_drop_columns.py . [ 18%]
3636
tests/test_dup_detection_example.py . [ 19%]
3737
tests/test_eval_cache.py ... [ 20%]
3838
tests/test_ex_examples.py ............ [ 23%]
@@ -54,12 +54,12 @@ tests/test_free_fn.py . [ 33%]
5454
tests/test_get_methods_used.py . [ 33%]
5555
tests/test_ghost_col_issue.py . [ 33%]
5656
tests/test_idioms.py ................. [ 38%]
57-
tests/test_if_else.py ..... [ 40%]
57+
tests/test_if_else.py ..... [ 39%]
5858
tests/test_if_else_return_type.py . [ 40%]
5959
tests/test_incomplete_agg.py . [ 40%]
6060
tests/test_join_check.py . [ 40%]
6161
tests/test_join_conditions.py ... [ 41%]
62-
tests/test_join_effects.py . [ 42%]
62+
tests/test_join_effects.py . [ 41%]
6363
tests/test_join_multi_key.py . [ 42%]
6464
tests/test_join_opt.py . [ 42%]
6565
tests/test_join_variations.py . [ 42%]
@@ -72,19 +72,19 @@ tests/test_method_catalog_issues.py .. [ 46%]
7272
tests/test_minimum.py . [ 46%]
7373
tests/test_mod_fns.py ... [ 47%]
7474
tests/test_multi_map.py . [ 47%]
75-
tests/test_narrow.py . [ 48%]
75+
tests/test_narrow.py . [ 47%]
7676
tests/test_natural_join.py .... [ 49%]
7777
tests/test_neg.py . [ 49%]
7878
tests/test_null_bad.py .. [ 50%]
7979
tests/test_one_row_cdata_convert.py . [ 50%]
8080
tests/test_ops.py . [ 50%]
81-
tests/test_ops_eq.py . [ 51%]
81+
tests/test_ops_eq.py . [ 50%]
8282
tests/test_or.py ..... [ 52%]
8383
tests/test_order_limit.py . [ 52%]
84-
tests/test_ordered_agg_group.py . [ 53%]
84+
tests/test_ordered_agg_group.py . [ 52%]
8585
tests/test_parens.py .. [ 53%]
86-
tests/test_parse.py .......................... [ 61%]
87-
tests/test_polars.py ............F.. [ 65%]
86+
tests/test_parse.py .......................... [ 60%]
87+
tests/test_polars.py ................ [ 65%]
8888
tests/test_project.py ..... [ 66%]
8989
tests/test_rank.py . [ 67%]
9090
tests/test_rank_to_average.py . [ 67%]
@@ -102,10 +102,10 @@ tests/test_select_values.py .. [ 73%]
102102
tests/test_set_quoting.py ...... [ 75%]
103103
tests/test_shift.py .. [ 76%]
104104
tests/test_shorten.py . [ 76%]
105-
tests/test_sign_parse.py .. [ 76%]
105+
tests/test_sign_parse.py .. [ 77%]
106106
tests/test_simple.py .... [ 78%]
107107
tests/test_simple_expr.py . [ 78%]
108-
tests/test_simplification.py .. [ 78%]
108+
tests/test_simplification.py .. [ 79%]
109109
tests/test_spark_sql.py ... [ 79%]
110110
tests/test_sql_extend_shortening.py .. [ 80%]
111111
tests/test_sqlite.py .............. [ 84%]
@@ -129,56 +129,14 @@ tests/test_window_fns.py ..... [ 98%]
129129
tests/test_with.py .. [ 99%]
130130
tests/test_xicor.py .. [100%]
131131

132-
=================================== FAILURES ===================================
133-
_________________________ test_polars_project_max_str __________________________
134-
135-
def test_polars_project_max_str():
136-
if have_polars:
137-
d = pl.DataFrame({
138-
"g": ["a", "a", "b"],
139-
"v": ["x", "y", "x"],
140-
})
141-
# d.groupby(["g"]).agg([pl.col("v").min().alias("v_min"), pl.col("v").max().alias("v_max")])
142-
# returns nulls
143-
# known Polars bug:
144-
# https://stackoverflow.com/q/74763636/6901725
145-
# https://github.com/pola-rs/polars/issues/5735
146-
ops = (
147-
data_algebra.descr(d=d)
148-
.project(
149-
{
150-
"min_v": "v.min()",
151-
"max_v": "v.max()",
152-
},
153-
group_by=["g"]
154-
)
155-
)
156-
res_polars = ops.transform(d)
157-
expect = pl.DataFrame({
158-
"g": ["a", "b"],
159-
"min_v": ["x", "x"],
160-
"max_v": ["y", "x"],
161-
})
162-
> assert data_algebra.test_util.equivalent_frames(res_polars.to_pandas(), expect.to_pandas())
163-
E AssertionError: assert False
164-
E + where False = <function equivalent_frames at 0x7fdd48f5d630>( g min_v max_v\n0 b None None\n1 a None None, g min_v max_v\n0 a x y\n1 b x x)
165-
E + where <function equivalent_frames at 0x7fdd48f5d630> = <module 'data_algebra.test_util' from '/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py'>.equivalent_frames
166-
E + where <module 'data_algebra.test_util' from '/Users/johnmount/Documents/work/data_algebra/data_algebra/test_util.py'> = data_algebra.test_util
167-
E + and g min_v max_v\n0 b None None\n1 a None None = <bound method DataFrame.to_pandas of shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ ---...═════╪═══════╪═══════╡\n│ b ┆ null ┆ null │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ a ┆ null ┆ null │\n└─────┴───────┴───────┘>()
168-
E + where <bound method DataFrame.to_pandas of shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ ---...═════╪═══════╪═══════╡\n│ b ┆ null ┆ null │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ a ┆ null ┆ null │\n└─────┴───────┴───────┘> = shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ --- │\n│ str ┆ str ┆ str │\n╞═════╪═══════╪═══════╡\n│ b ┆ null ┆ null │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ a ┆ null ┆ null │\n└─────┴───────┴───────┘.to_pandas
169-
E + and g min_v max_v\n0 a x y\n1 b x x = <bound method DataFrame.to_pandas of shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ ---...═════╪═══════╪═══════╡\n│ a ┆ x ┆ y │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ b ┆ x ┆ x │\n└─────┴───────┴───────┘>()
170-
E + where <bound method DataFrame.to_pandas of shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ ---...═════╪═══════╪═══════╡\n│ a ┆ x ┆ y │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ b ┆ x ┆ x │\n└─────┴───────┴───────┘> = shape: (2, 3)\n┌─────┬───────┬───────┐\n│ g ┆ min_v ┆ max_v │\n│ --- ┆ --- ┆ --- │\n│ str ┆ str ┆ str │\n╞═════╪═══════╪═══════╡\n│ a ┆ x ┆ y │\n├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤\n│ b ┆ x ┆ x │\n└─────┴───────┴───────┘.to_pandas
171-
172-
tests/test_polars.py:307: AssertionError
173-
174132
---------- coverage: platform darwin, python 3.10.8-final-0 ----------
175133
Name Stmts Miss Cover Missing
176134
--------------------------------------------------------------------
177135
data_algebra/BigQuery.py 126 67 47% 22-23, 27, 131-135, 139, 153-160, 169-178, 183-201, 215-244, 248-261, 272-287
178136
data_algebra/MySQL.py 31 8 74% 16-17, 79, 91-97
179137
data_algebra/OrderedSet.py 82 7 91% 33, 46, 49, 55, 58, 61, 64
180138
data_algebra/PostgreSQL.py 30 7 77% 15-16, 69-75
181-
data_algebra/SQLite.py 193 25 87% 91, 101, 127, 141, 143, 155, 171-172, 193, 201, 233, 260, 268-271, 291, 312-316, 448, 489, 494
139+
data_algebra/SQLite.py 193 29 85% 91, 101, 127, 141, 143, 155, 171-172, 193, 201, 233, 260, 268-271, 281, 289-292, 312-316, 448, 489, 494
182140
data_algebra/SparkSQL.py 87 46 47% 16-17, 21-22, 26-28, 89, 94, 132-136, 142-146, 170-172, 178-185, 192-207, 218-223
183141
data_algebra/__init__.py 9 0 100%
184142
data_algebra/arrow.py 141 41 71% 45, 52, 56-57, 66, 69, 73, 92, 96, 102, 113-116, 121, 129, 136, 155, 158, 171-172, 203, 212, 221-234, 237-245, 258, 260, 262, 266, 270
@@ -202,15 +160,14 @@ data_algebra/op_container.py 127 49 61% 46-47, 63-71, 80-81
202160
data_algebra/pandas_base.py 691 63 91% 53, 67, 76, 86, 96, 215, 217, 231, 234, 239, 244, 454, 460-467, 474, 505-510, 540, 544, 547, 549, 586, 640, 681, 698, 718, 736, 746, 761, 809, 817, 825, 840, 851, 863, 882, 897, 925, 940, 977, 994, 997, 1008, 1027, 1033, 1038, 1067, 1089, 1114, 1117-1120, 1124, 1134, 1233, 1247-1249
203161
data_algebra/pandas_model.py 15 0 100%
204162
data_algebra/parse_by_lark.py 164 24 85% 71, 93, 108, 129-130, 137, 161, 171, 185-186, 188, 200, 206, 213-217, 245, 253, 263-266
205-
data_algebra/polars_model.py 286 49 83% 71, 73, 77, 279, 291-295, 301, 308, 348, 363, 370, 386, 401, 410, 432, 439-441, 447, 462, 469, 483, 497-504, 512, 519, 526-532, 539, 545, 553, 573, 586-595, 602, 607, 625, 642
163+
data_algebra/polars_model.py 292 49 83% 76, 78, 82, 285, 297-301, 307, 314, 354, 369, 376, 392, 407, 416, 438, 445-447, 453, 468, 475, 489, 503-510, 518, 525, 532-538, 545, 551, 559, 579, 592-601, 608, 613, 631, 648
206164
data_algebra/python3_lark.py 1 0 100%
207165
data_algebra/solutions.py 136 4 97% 63, 303, 384, 467
208166
data_algebra/sql_format_options.py 15 2 87% 61, 69
209-
data_algebra/test_util.py 291 57 80% 88, 110, 120, 123, 127, 150, 153, 157, 159-162, 173, 230-231, 247-252, 256, 268, 270-278, 324, 327, 335, 346, 353, 357, 369, 380, 394, 465-468, 470-473, 475-478, 480-483, 557-562, 568-569, 572
167+
data_algebra/test_util.py 291 63 78% 88, 110, 120, 123, 127, 150, 153, 157, 159-162, 173, 216-217, 230-231, 234-252, 256, 268, 270-278, 324, 327, 335, 346, 353, 357, 369, 380, 394, 465-468, 470-473, 475-478, 480-483, 557-562, 568-569, 572
210168
data_algebra/util.py 140 31 78% 26, 51, 56, 61, 84-85, 88-89, 92-93, 96-97, 100-101, 104-105, 108-109, 112-113, 116-117, 120-121, 168, 190, 192, 207, 248, 252, 254
211169
--------------------------------------------------------------------
212-
TOTAL 6407 932 85%
170+
TOTAL 6413 942 85%
171+
213172

214-
=========================== short test summary info ============================
215-
FAILED tests/test_polars.py::test_polars_project_max_str - AssertionError: as...
216-
================== 1 failed, 346 passed in 809.23s (0:13:29) ===================
173+
======================= 348 passed in 805.59s (0:13:25) ========================
96 Bytes
Binary file not shown.

dist/data_algebra-1.5.0.tar.gz

104 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)