Skip to content

Commit 720110a

Browse files
committed
Add resampling aggregation test with missing data
Also discovered an issue with appending an empty column set. Added an xfail test for it and an issue 10029194063
1 parent 0415374 commit 720110a

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

python/tests/unit/arcticdb/version_store/test_append.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,3 +742,15 @@ def test_append_series_with_different_row_range_index_name(lmdb_version_store_dy
742742
# See Monday 9797097831, it would be best to require that index names are always matching. This is the case for
743743
# datetime index because it's a physical column. It's a potentially breaking change.
744744
assert lib.read("sym").data.index.name == "index_name_2"
745+
746+
747+
@pytest.mark.xfail(reason="Wrong normalization metadata update. Monday ref: 10029194063")
748+
def test_append_no_columns(lmdb_version_store_dynamic_schema_v1):
749+
lib = lmdb_version_store_dynamic_schema_v1
750+
to_write = pd.DataFrame({"col" : [1, 2, 3]}, index=pd.date_range(pd.Timestamp(2025, 1, 1), periods=3))
751+
to_append = pd.DataFrame({}, index=pd.date_range(pd.Timestamp(2025, 1, 4), periods=3))
752+
lib.write("sym", to_write)
753+
lib.append("sym", to_append)
754+
expected = pd.concat([to_write, to_append])
755+
result = lib.read("sym").data
756+
assert_frame_equal(result, expected)

python/tests/unit/arcticdb/version_store/test_arrow.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,3 +705,54 @@ def test_aggregation_empty_slices(lmdb_version_store_dynamic_schema_v1):
705705
assert pc.count(table.column("count_col"), mode="only_null").as_py() == 5
706706
expected = lib.read(sym, query_builder=q, output_format=OutputFormat.PANDAS).data
707707
assert_frame_equal_with_arrow(table, expected)
708+
709+
710+
def test_resample_empty_slices(lmdb_version_store_dynamic_schema_v1):
711+
lib = lmdb_version_store_dynamic_schema_v1
712+
lib.set_output_format(OutputFormat.EXPERIMENTAL_ARROW)
713+
sym = "sym"
714+
def gen_df(start, num_rows, with_columns=True):
715+
data = {}
716+
if with_columns:
717+
data = {
718+
"mean_col": np.arange(start, start+num_rows, dtype=np.float64),
719+
"sum_col": np.arange(start, start+num_rows, dtype=np.float64),
720+
"min_col": np.arange(start, start+num_rows, dtype=np.float64),
721+
"max_col": np.arange(start, start+num_rows, dtype=np.float64),
722+
"count_col": np.arange(start, start+num_rows, dtype=np.float64),
723+
}
724+
index = pd.date_range(pd.Timestamp(2025, 1, start), periods=num_rows)
725+
return pd.DataFrame(data, index=index)
726+
727+
slices = [
728+
gen_df(1, 3),
729+
gen_df(4, 2, False), # We expect an entirely missing slice 4th-5th
730+
gen_df(6, 3),
731+
gen_df(9, 5, False), # We expect two missing slices 10th-11th and 12th-13th
732+
gen_df(14, 2),
733+
gen_df(16, 2, False), # We expect one missing slice 16th-17th
734+
# TODO: If we don't finish with an append with columns our normalization metadata will be broken
735+
gen_df(18, 1)
736+
]
737+
for df_slice in slices:
738+
lib.append(sym, df_slice, write_if_missing=True)
739+
740+
q = QueryBuilder()
741+
q.resample("2d").agg({
742+
"mean_col": "mean",
743+
"sum_col": "sum",
744+
"min_col": "min",
745+
"max_col": "max",
746+
"count_col": "count",
747+
})
748+
749+
table = lib.read(sym, query_builder=q).data
750+
# sum_col is correctly filled with 0s instead of nulls
751+
assert pc.count(table.column("sum_col"), mode="only_null").as_py() == 0
752+
# We expect 4 entirely empty buckets
753+
assert pc.count(table.column("mean_col"), mode="only_null").as_py() == 4
754+
assert pc.count(table.column("min_col"), mode="only_null").as_py() == 4
755+
assert pc.count(table.column("max_col"), mode="only_null").as_py() == 4
756+
assert pc.count(table.column("count_col"), mode="only_null").as_py() == 4
757+
expected = lib.read(sym, query_builder=q, output_format=OutputFormat.PANDAS).data
758+
assert_frame_equal_with_arrow(table, expected)

0 commit comments

Comments
 (0)