|
18 | 18 |
|
19 | 19 | COLUMN_DTYPE = ["float", "int", "uint"]
|
20 | 20 | ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
|
21 |
| -MIN_DATE = np.datetime64("1969-06-01") |
22 |
| -MAX_DATE = np.datetime64("1970-06-01") |
| 21 | +# Make sure the start date is pre-epoch so that we can test pre-epoch dates. Not all C++ libraries handle pre-epoch well. |
| 22 | +MIN_DATE = np.datetime64("1960-01-01") |
| 23 | +MAX_DATE = np.datetime64("2025-01-01") |
23 | 24 |
|
24 | 25 | pytestmark = pytest.mark.pipeline
|
25 | 26 |
|
26 | 27 |
|
| 28 | +def dense_row_count_in_resampled_dataframe(df_list, rule): |
| 29 | + """ |
| 30 | + The number of rows Arctic's resampling will produce after appending all dataframes in `df_list` and then resampling |
| 31 | + with `rule`. Assumes df_list is sorted by start date and the indexes are not overlapping. |
| 32 | + """ |
| 33 | + return (df_list[-1].index[-1] - df_list[0].index[0]).value // pd.Timedelta(rule).value |
| 34 | + |
| 35 | + |
27 | 36 | @st.composite
|
28 | 37 | def date(draw, min_date, max_date, unit="ns"):
|
29 | 38 | """
|
@@ -102,22 +111,22 @@ def freq_fits_in_64_bits(count, unit):
|
102 | 111 | This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64-bit integer.
|
103 | 112 | """
|
104 | 113 | billion = 1_000_000_000
|
105 |
| - mult = {"h": 3600 * billion, "min": 60 * billion, "s": billion} |
| 114 | + mult = {"h": 3600 * billion, "min": 60 * billion, "s": billion, "ms": billion // 1000, "us": 1000, "ns": 1} |
106 | 115 | return (mult[unit] * count).bit_length() <= 63
|
107 | 116 |
|
108 | 117 |
|
109 | 118 | @st.composite
|
110 | 119 | def rule(draw):
|
111 | 120 | count = draw(st.integers(min_value=1, max_value=10_000))
|
112 |
| - unit = draw(st.sampled_from(["min", "h", "s"])) |
| 121 | + unit = draw(st.sampled_from(["min", "h", "s", "ms", "us", "ns"])) |
113 | 122 | result = f"{count}{unit}"
|
114 | 123 | assume(freq_fits_in_64_bits(count=count, unit=unit))
|
115 | 124 | return result
|
116 | 125 |
|
117 | 126 |
|
118 | 127 | @st.composite
|
119 | 128 | def offset(draw):
|
120 |
| - unit = draw(st.sampled_from(["s", "min", "h", None])) |
| 129 | + unit = draw(st.sampled_from(["s", "min", "h", "ms", "us", "ns", None])) |
121 | 130 | if unit is None:
|
122 | 131 | return None
|
123 | 132 | count = draw(st.integers(min_value=1, max_value=100))
|
@@ -173,6 +182,9 @@ def dynamic_schema_column_list(draw):
|
173 | 182 | offset=offset(),
|
174 | 183 | )
|
175 | 184 | def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
|
| 185 | + # The assumption below is to avoid OOM-ing the GitHub runners. |
| 186 | + assume(dense_row_count_in_resampled_dataframe([df], rule) < 150000) |
| 187 | + |
176 | 188 | lib = lmdb_version_store_v1
|
177 | 189 | sym = "sym"
|
178 | 190 | logger = get_logger()
|
@@ -220,6 +232,9 @@ def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
|
220 | 232 | @given(df_list=dynamic_schema_column_list(), rule=rule(), origin=origin(), offset=offset())
|
221 | 233 | @settings(deadline=None, suppress_health_check=[HealthCheck.data_too_large])
|
222 | 234 | def test_resample_dynamic_schema(lmdb_version_store_dynamic_schema_v1, df_list, rule, origin, offset):
|
| 235 | + # The assumption below is to avoid OOM-ing the GitHub runners. |
| 236 | + assume(dense_row_count_in_resampled_dataframe(df_list, rule) < 150000) |
| 237 | + |
223 | 238 | common_column_types = compute_common_type_for_columns_in_df_list(df_list)
|
224 | 239 | lib = lmdb_version_store_dynamic_schema_v1
|
225 | 240 | lib.version_store.clear()
|
|
0 commit comments