@@ -705,3 +705,54 @@ def test_aggregation_empty_slices(lmdb_version_store_dynamic_schema_v1):
705
705
assert pc .count (table .column ("count_col" ), mode = "only_null" ).as_py () == 5
706
706
expected = lib .read (sym , query_builder = q , output_format = OutputFormat .PANDAS ).data
707
707
assert_frame_equal_with_arrow (table , expected )
708
+
709
+
710
+ def test_resample_empty_slices (lmdb_version_store_dynamic_schema_v1 ):
711
+ lib = lmdb_version_store_dynamic_schema_v1
712
+ lib .set_output_format (OutputFormat .EXPERIMENTAL_ARROW )
713
+ sym = "sym"
714
+ def gen_df (start , num_rows , with_columns = True ):
715
+ data = {}
716
+ if with_columns :
717
+ data = {
718
+ "mean_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
719
+ "sum_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
720
+ "min_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
721
+ "max_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
722
+ "count_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
723
+ }
724
+ index = pd .date_range (pd .Timestamp (2025 , 1 , start ), periods = num_rows )
725
+ return pd .DataFrame (data , index = index )
726
+
727
+ slices = [
728
+ gen_df (1 , 3 ),
729
+ gen_df (4 , 2 , False ), # We expect an entirely missing slice 4th-5th
730
+ gen_df (6 , 3 ),
731
+ gen_df (9 , 5 , False ), # We expect two missing slices 10th-11th and 12th-13th
732
+ gen_df (14 , 2 ),
733
+ gen_df (16 , 2 , False ), # We expect one missing slice 16th-17th
734
+ # TODO: If we don't finish with an append with columns our normalization metadata will be broken
735
+ gen_df (18 , 1 )
736
+ ]
737
+ for df_slice in slices :
738
+ lib .append (sym , df_slice , write_if_missing = True )
739
+
740
+ q = QueryBuilder ()
741
+ q .resample ("2d" ).agg ({
742
+ "mean_col" : "mean" ,
743
+ "sum_col" : "sum" ,
744
+ "min_col" : "min" ,
745
+ "max_col" : "max" ,
746
+ "count_col" : "count" ,
747
+ })
748
+
749
+ table = lib .read (sym , query_builder = q ).data
750
+ # sum_col is correctly filled with 0s instead of nulls
751
+ assert pc .count (table .column ("sum_col" ), mode = "only_null" ).as_py () == 0
752
+ # We expect 4 entirely empty buckets
753
+ assert pc .count (table .column ("mean_col" ), mode = "only_null" ).as_py () == 4
754
+ assert pc .count (table .column ("min_col" ), mode = "only_null" ).as_py () == 4
755
+ assert pc .count (table .column ("max_col" ), mode = "only_null" ).as_py () == 4
756
+ assert pc .count (table .column ("count_col" ), mode = "only_null" ).as_py () == 4
757
+ expected = lib .read (sym , query_builder = q , output_format = OutputFormat .PANDAS ).data
758
+ assert_frame_equal_with_arrow (table , expected )
0 commit comments