|
1 | 1 |
|
2 | | -import numpy as np |
3 | 2 | import datetime |
| 3 | +import numpy as np |
4 | 4 | import data_algebra |
5 | 5 | import data_algebra.data_model |
6 | 6 | import data_algebra.test_util |
@@ -365,3 +365,42 @@ def test_polars_project_max_date_2(): |
365 | 365 | "max_v": [datetime.date(2023, 1, 1), datetime.date(2020, 1, 1)], |
366 | 366 | }) |
367 | 367 | assert data_algebra.test_util.equivalent_frames(res_polars.to_pandas(), expect.to_pandas()) |
| 368 | + |
| 369 | + |
| 370 | +def test_polars_group_min_max_example(): |
| 371 | + # from Examples/TimedGroupedCalc.ipynb |
| 372 | + if have_polars: |
| 373 | + pd = data_algebra.data_model.default_data_model().pd |
| 374 | + rng = np.random.default_rng(2022) |
| 375 | + |
| 376 | + def mk_example(*, n_rows: int, n_groups: int): |
| 377 | + assert n_rows > 0 |
| 378 | + assert n_groups > 0 |
| 379 | + groups = [f"group_{i:04d}" for i in range(n_groups)] |
| 380 | + d = pd.DataFrame({ |
| 381 | + "group": rng.choice(groups, size=n_rows, replace=True), |
| 382 | + "value": rng.normal(size=n_rows) |
| 383 | + }) |
| 384 | + return d |
| 385 | + |
| 386 | + d_Pandas = mk_example(n_rows=10, n_groups=2) |
| 387 | + d_Polars = pl.DataFrame(d_Pandas) |
| 388 | + res_pandas = ( |
| 389 | + d_Pandas |
| 390 | + .groupby(["group"]) |
| 391 | + .agg({"value": ["min", "max"]}) |
| 392 | + ) |
| 393 | + res_polars = ( |
| 394 | + d_Polars |
| 395 | + .groupby(["group"]) |
| 396 | + .agg([ |
| 397 | + pl.col("value").min().alias("min_value"), |
| 398 | + pl.col("value").max().alias("max_value"), |
| 399 | + ]) |
| 400 | + ) |
| 401 | + expect = pl.DataFrame({ |
| 402 | + "group": ["group_0000", "group_0001"], |
| 403 | + "min_value": [-2.931249, -1.440234], |
| 404 | + "max_value": [1.667716, 0.078888], |
| 405 | + }) |
| 406 | + assert data_algebra.test_util.equivalent_frames(res_polars.to_pandas(), expect.to_pandas(), float_tol=1.0e-3) |
0 commit comments