man-group · vasil-pashov · Sep 23, 2025 · Aug 29, 2025 · Aug 29, 2025 · Sep 23, 2025
@@ -18,11 +18,18 @@
 
 COLUMN_DTYPE = ["float", "int", "uint"]
 ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
-MIN_DATE = np.datetime64('1969-06-01')
-MAX_DATE = np.datetime64('1970-06-01')
+# Make sure the start date is pre-epoch so that we can test pre-epoch dates. Not all C++ libraries handle pre-epoch well.
+MIN_DATE = np.datetime64('1960-01-01')
+MAX_DATE = np.datetime64('2025-01-01')
 
 pytestmark = pytest.mark.pipeline
 
+def dense_row_count_in_resampled_dataframe(df_list, rule):
+    """
+    The number of rows Arctic's resampling will produce after appending all dataframes in `df_list` and then resampling
+    with `rule`.  Assumes df_list is sorted by start date and the indexes are not overlapping.
+    """
+    return (df_list[-1].index[-1] - df_list[0].index[0]).value // pd.Timedelta(rule).value
 
 @st.composite
 def date(draw, min_date, max_date, unit="ns"):
@@ -98,22 +105,22 @@ def freq_fits_in_64_bits(count, unit):
     This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64-bit integer.
     """
     billion = 1_000_000_000
-    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion}
+    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion, 'ms': billion // 1000, 'us' : 1000,'ns': 1}
     return (mult[unit] * count).bit_length() <= 63
 
 
 @st.composite
 def rule(draw):
     count = draw(st.integers(min_value=1, max_value=10_000))
-    unit = draw(st.sampled_from(['min', 'h', 's']))
+    unit = draw(st.sampled_from(['min', 'h', 's', 'ms', 'us', 'ns']))
     result = f"{count}{unit}"
     assume(freq_fits_in_64_bits(count=count, unit=unit))
     return result
 
 
 @st.composite
 def offset(draw):
-    unit = draw(st.sampled_from(['s', 'min', 'h', None]))
+    unit = draw(st.sampled_from(['s', 'min', 'h', 'ms', 'us', 'ns', None]))
     if unit is None:
         return None
     count = draw(st.integers(min_value=1, max_value=100))
@@ -150,6 +157,9 @@ def dynamic_schema_column_list(draw):
     offset=offset()
 )
 def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
+    # The assumption below is to avoid OOM-ing the GitHub runners.
+    assume(dense_row_count_in_resampled_dataframe([df], rule) < 150000)
+
     lib = lmdb_version_store_v1
     sym = "sym"
     logger = get_logger()
@@ -198,6 +208,9 @@ def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
 )
 @settings(deadline=None, suppress_health_check=[HealthCheck.data_too_large])
 def test_resample_dynamic_schema(lmdb_version_store_dynamic_schema_v1, df_list, rule, origin, offset):
+    # The assumption below is to avoid OOM-ing the GitHub runners.
+    assume(dense_row_count_in_resampled_dataframe(df_list, rule) < 150000)
+
     common_column_types = compute_common_type_for_columns_in_df_list(df_list)
     lib = lmdb_version_store_dynamic_schema_v1
     lib.version_store.clear()