Skip to content

Commit d7adb85

Browse files
committed
Add resampling aggregation test with missing data
Also discovered an issue with appending an empty column set. Added an xfail test for it and an issue 10029194063
1 parent 54c4518 commit d7adb85

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

python/tests/unit/arcticdb/version_store/test_append.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,3 +750,15 @@ def test_append_series_with_different_row_range_index_name(lmdb_version_store_dy
750750
# See Monday 9797097831, it would be best to require that index names are always matching. This is the case for
751751
# datetime index because it's a physical column. It's a potentially breaking change.
752752
assert lib.read("sym").data.index.name == "index_name_2"
753+
754+
755+
@pytest.mark.xfail(reason="Wrong normalization metadata update. Monday ref: 10029194063")
756+
def test_append_no_columns(lmdb_version_store_dynamic_schema_v1):
757+
lib = lmdb_version_store_dynamic_schema_v1
758+
to_write = pd.DataFrame({"col" : [1, 2, 3]}, index=pd.date_range(pd.Timestamp(2025, 1, 1), periods=3))
759+
to_append = pd.DataFrame({}, index=pd.date_range(pd.Timestamp(2025, 1, 4), periods=3))
760+
lib.write("sym", to_write)
761+
lib.append("sym", to_append)
762+
expected = pd.concat([to_write, to_append])
763+
result = lib.read("sym").data
764+
assert_frame_equal(result, expected)

python/tests/unit/arcticdb/version_store/test_arrow.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,3 +763,54 @@ def test_aggregation_empty_slices(lmdb_version_store_dynamic_schema_v1):
763763
assert pc.count(table.column("count_col"), mode="only_null").as_py() == 5
764764
expected = lib.read(sym, query_builder=q, output_format=OutputFormat.PANDAS).data
765765
assert_frame_equal_with_arrow(table, expected)
766+
767+
768+
def test_resample_empty_slices(lmdb_version_store_dynamic_schema_v1):
769+
lib = lmdb_version_store_dynamic_schema_v1
770+
lib.set_output_format(OutputFormat.EXPERIMENTAL_ARROW)
771+
sym = "sym"
772+
def gen_df(start, num_rows, with_columns=True):
773+
data = {}
774+
if with_columns:
775+
data = {
776+
"mean_col": np.arange(start, start+num_rows, dtype=np.float64),
777+
"sum_col": np.arange(start, start+num_rows, dtype=np.float64),
778+
"min_col": np.arange(start, start+num_rows, dtype=np.float64),
779+
"max_col": np.arange(start, start+num_rows, dtype=np.float64),
780+
"count_col": np.arange(start, start+num_rows, dtype=np.float64),
781+
}
782+
index = pd.date_range(pd.Timestamp(2025, 1, start), periods=num_rows)
783+
return pd.DataFrame(data, index=index)
784+
785+
slices = [
786+
gen_df(1, 3),
787+
gen_df(4, 2, False), # We expect an entirely missing slice 4th-5th
788+
gen_df(6, 3),
789+
gen_df(9, 5, False), # We expect two missing slices 10th-11th and 12th-13th
790+
gen_df(14, 2),
791+
gen_df(16, 2, False), # We expect one missing slice 16th-17th
792+
# TODO: If we don't finish with an append with columns our normalization metadata will be broken
793+
gen_df(18, 1)
794+
]
795+
for df_slice in slices:
796+
lib.append(sym, df_slice, write_if_missing=True)
797+
798+
q = QueryBuilder()
799+
q.resample("2d").agg({
800+
"mean_col": "mean",
801+
"sum_col": "sum",
802+
"min_col": "min",
803+
"max_col": "max",
804+
"count_col": "count",
805+
})
806+
807+
table = lib.read(sym, query_builder=q).data
808+
# sum_col is correctly filled with 0s instead of nulls
809+
assert pc.count(table.column("sum_col"), mode="only_null").as_py() == 0
810+
# We expect 4 entirely empty buckets
811+
assert pc.count(table.column("mean_col"), mode="only_null").as_py() == 4
812+
assert pc.count(table.column("min_col"), mode="only_null").as_py() == 4
813+
assert pc.count(table.column("max_col"), mode="only_null").as_py() == 4
814+
assert pc.count(table.column("count_col"), mode="only_null").as_py() == 4
815+
expected = lib.read(sym, query_builder=q, output_format=OutputFormat.PANDAS).data
816+
assert_frame_equal_with_arrow(table, expected)

0 commit comments

Comments
 (0)