@@ -763,3 +763,54 @@ def test_aggregation_empty_slices(lmdb_version_store_dynamic_schema_v1):
763
763
assert pc .count (table .column ("count_col" ), mode = "only_null" ).as_py () == 5
764
764
expected = lib .read (sym , query_builder = q , output_format = OutputFormat .PANDAS ).data
765
765
assert_frame_equal_with_arrow (table , expected )
766
+
767
+
768
+ def test_resample_empty_slices (lmdb_version_store_dynamic_schema_v1 ):
769
+ lib = lmdb_version_store_dynamic_schema_v1
770
+ lib .set_output_format (OutputFormat .EXPERIMENTAL_ARROW )
771
+ sym = "sym"
772
+ def gen_df (start , num_rows , with_columns = True ):
773
+ data = {}
774
+ if with_columns :
775
+ data = {
776
+ "mean_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
777
+ "sum_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
778
+ "min_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
779
+ "max_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
780
+ "count_col" : np .arange (start , start + num_rows , dtype = np .float64 ),
781
+ }
782
+ index = pd .date_range (pd .Timestamp (2025 , 1 , start ), periods = num_rows )
783
+ return pd .DataFrame (data , index = index )
784
+
785
+ slices = [
786
+ gen_df (1 , 3 ),
787
+ gen_df (4 , 2 , False ), # We expect an entirely missing slice 4th-5th
788
+ gen_df (6 , 3 ),
789
+ gen_df (9 , 5 , False ), # We expect two missing slices 10th-11th and 12th-13th
790
+ gen_df (14 , 2 ),
791
+ gen_df (16 , 2 , False ), # We expect one missing slice 16th-17th
792
+ # TODO: If we don't finish with an append with columns our normalization metadata will be broken
793
+ gen_df (18 , 1 )
794
+ ]
795
+ for df_slice in slices :
796
+ lib .append (sym , df_slice , write_if_missing = True )
797
+
798
+ q = QueryBuilder ()
799
+ q .resample ("2d" ).agg ({
800
+ "mean_col" : "mean" ,
801
+ "sum_col" : "sum" ,
802
+ "min_col" : "min" ,
803
+ "max_col" : "max" ,
804
+ "count_col" : "count" ,
805
+ })
806
+
807
+ table = lib .read (sym , query_builder = q ).data
808
+ # sum_col is correctly filled with 0s instead of nulls
809
+ assert pc .count (table .column ("sum_col" ), mode = "only_null" ).as_py () == 0
810
+ # We expect 4 entirely empty buckets
811
+ assert pc .count (table .column ("mean_col" ), mode = "only_null" ).as_py () == 4
812
+ assert pc .count (table .column ("min_col" ), mode = "only_null" ).as_py () == 4
813
+ assert pc .count (table .column ("max_col" ), mode = "only_null" ).as_py () == 4
814
+ assert pc .count (table .column ("count_col" ), mode = "only_null" ).as_py () == 4
815
+ expected = lib .read (sym , query_builder = q , output_format = OutputFormat .PANDAS ).data
816
+ assert_frame_equal_with_arrow (table , expected )
0 commit comments