From c28301d7313330d6dec31d41879b0735e4f8a257 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov1@gmail.com>
Date: Fri, 29 Aug 2025 11:01:30 +0300
Subject: [PATCH 1/2] Fix hypothesis tests that occasionally make the CI
 runners OOM

The issue is that we need to output a dense dataframe for Pandas. So if
the date range is big and the frequency is small we can end up with too
many rows in the output dataframe eating the memory of the runner.

This also allows us to test all possible frequencies and extend the date
range for generating dataframes.
---
 .../hypothesis/arcticdb/test_resample.py      | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/python/tests/hypothesis/arcticdb/test_resample.py b/python/tests/hypothesis/arcticdb/test_resample.py
index 040a2e1acc..8c8f33c5fc 100644
--- a/python/tests/hypothesis/arcticdb/test_resample.py
+++ b/python/tests/hypothesis/arcticdb/test_resample.py
@@ -18,11 +18,18 @@
 
 COLUMN_DTYPE = ["float", "int", "uint"]
 ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
-MIN_DATE = np.datetime64('1969-06-01')
-MAX_DATE = np.datetime64('1970-06-01')
+# Make sure the start date is pre-epoch so that we can test pre-epoch dates. Not all C++ libraries handle pre-epoch well.
+MIN_DATE = np.datetime64('1960-01-01')
+MAX_DATE = np.datetime64('2025-01-01')
 
 pytestmark = pytest.mark.pipeline
 
+def dense_row_count_in_resampled_dataframe(df_list, rule):
+    """
+    The number of rows Arctic's resampling will produce after appending all dataframes in `df_list` and then resampling
+    with `rule`.  Assumes df_list is sorted by start date and the indexes are not overlapping.
+    """
+    return (df_list[-1].index[-1] - df_list[0].index[0]).value // pd.Timedelta(rule).value
 
 @st.composite
 def date(draw, min_date, max_date, unit="ns"):
@@ -98,14 +105,14 @@ def freq_fits_in_64_bits(count, unit):
     This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64-bit integer.
     """
     billion = 1_000_000_000
-    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion}
+    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion, 'ms': billion // 1000, 'us' : 1000,'ns': 1}
     return (mult[unit] * count).bit_length() <= 63
 
 
 @st.composite
 def rule(draw):
     count = draw(st.integers(min_value=1, max_value=10_000))
-    unit = draw(st.sampled_from(['min', 'h', 's']))
+    unit = draw(st.sampled_from(['min', 'h', 's', 'ms', 'us', 'ns']))
     result = f"{count}{unit}"
     assume(freq_fits_in_64_bits(count=count, unit=unit))
     return result
@@ -113,7 +120,7 @@ def rule(draw):
 
 @st.composite
 def offset(draw):
-    unit = draw(st.sampled_from(['s', 'min', 'h', None]))
+    unit = draw(st.sampled_from(['s', 'min', 'h', 'ms', 'us', 'ns', None]))
     if unit is None:
         return None
     count = draw(st.integers(min_value=1, max_value=100))
@@ -150,6 +157,9 @@ def dynamic_schema_column_list(draw):
     offset=offset()
 )
 def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
+    # The assumption below is to avoid OOM-ing the GitHub runners.
+    assume(dense_row_count_in_resampled_dataframe([df], rule) < 150000)
+
     lib = lmdb_version_store_v1
     sym = "sym"
     logger = get_logger()
@@ -198,6 +208,9 @@ def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
 )
 @settings(deadline=None, suppress_health_check=[HealthCheck.data_too_large])
 def test_resample_dynamic_schema(lmdb_version_store_dynamic_schema_v1, df_list, rule, origin, offset):
+    # The assumption below is to avoid OOM-ing the GitHub runners.
+    assume(dense_row_count_in_resampled_dataframe(df_list, rule) < 150000)
+
     common_column_types = compute_common_type_for_columns_in_df_list(df_list)
     lib = lmdb_version_store_dynamic_schema_v1
     lib.version_store.clear()

From 42a63e8069f4e54c1872cb48128751014f4ff232 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov1@gmail.com>
Date: Tue, 23 Sep 2025 12:58:59 +0300
Subject: [PATCH 2/2] Apply code formatting rules

---
 python/tests/hypothesis/arcticdb/test_resample.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/python/tests/hypothesis/arcticdb/test_resample.py b/python/tests/hypothesis/arcticdb/test_resample.py
index 6a9609a3c9..7ee2f34caa 100644
--- a/python/tests/hypothesis/arcticdb/test_resample.py
+++ b/python/tests/hypothesis/arcticdb/test_resample.py
@@ -19,11 +19,12 @@
 COLUMN_DTYPE = ["float", "int", "uint"]
 ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
 # Make sure the start date is pre-epoch so that we can test pre-epoch dates. Not all C++ libraries handle pre-epoch well.
-MIN_DATE = np.datetime64('1960-01-01')
-MAX_DATE = np.datetime64('2025-01-01')
+MIN_DATE = np.datetime64("1960-01-01")
+MAX_DATE = np.datetime64("2025-01-01")
 
 pytestmark = pytest.mark.pipeline
 
+
 def dense_row_count_in_resampled_dataframe(df_list, rule):
     """
     The number of rows Arctic's resampling will produce after appending all dataframes in `df_list` and then resampling
@@ -31,6 +32,7 @@ def dense_row_count_in_resampled_dataframe(df_list, rule):
     """
     return (df_list[-1].index[-1] - df_list[0].index[0]).value // pd.Timedelta(rule).value
 
+
 @st.composite
 def date(draw, min_date, max_date, unit="ns"):
     """
@@ -109,14 +111,14 @@ def freq_fits_in_64_bits(count, unit):
     This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64-bit integer.
     """
     billion = 1_000_000_000
-    mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion, 'ms': billion // 1000, 'us' : 1000,'ns': 1}
+    mult = {"h": 3600 * billion, "min": 60 * billion, "s": billion, "ms": billion // 1000, "us": 1000, "ns": 1}
     return (mult[unit] * count).bit_length() <= 63
 
 
 @st.composite
 def rule(draw):
     count = draw(st.integers(min_value=1, max_value=10_000))
-    unit = draw(st.sampled_from(['min', 'h', 's', 'ms', 'us', 'ns']))
+    unit = draw(st.sampled_from(["min", "h", "s", "ms", "us", "ns"]))
     result = f"{count}{unit}"
     assume(freq_fits_in_64_bits(count=count, unit=unit))
     return result
@@ -124,7 +126,7 @@ def rule(draw):
 
 @st.composite
 def offset(draw):
-    unit = draw(st.sampled_from(['s', 'min', 'h', 'ms', 'us', 'ns', None]))
+    unit = draw(st.sampled_from(["s", "min", "h", "ms", "us", "ns", None]))
     if unit is None:
         return None
     count = draw(st.integers(min_value=1, max_value=100))