|
18 | 18 |
|
19 | 19 | COLUMN_DTYPE = ["float", "int", "uint"]
|
20 | 20 | ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"]
|
21 |
| -MIN_DATE = np.datetime64('1969-06-01') |
22 |
| -MAX_DATE = np.datetime64('1970-06-01') |
| 21 | +# Make sure the start date is pre-epoch so that we can test pre-epoch dates. Not all C++ libraries handle pre-epoch well. |
| 22 | +MIN_DATE = np.datetime64('1960-01-01') |
| 23 | +MAX_DATE = np.datetime64('2025-01-01') |
23 | 24 |
|
24 | 25 | pytestmark = pytest.mark.pipeline
|
25 | 26 |
|
| 27 | +def dense_row_count_in_resampled_dataframe(df_list, rule): |
| 28 | + """ |
| 29 | + The number of rows Arctic's resampling will produce after appending all dataframes in `df_list` and then resampling |
| 30 | + with `rule`. Assumes df_list is sorted by start date and the indexes are not overlapping. |
| 31 | + """ |
| 32 | + return (df_list[-1].index[-1] - df_list[0].index[0]).value // pd.Timedelta(rule).value |
26 | 33 |
|
27 | 34 | @st.composite
|
28 | 35 | def date(draw, min_date, max_date, unit="ns"):
|
@@ -98,22 +105,22 @@ def freq_fits_in_64_bits(count, unit):
|
98 | 105 | This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64-bit integer.
|
99 | 106 | """
|
100 | 107 | billion = 1_000_000_000
|
101 |
| - mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion} |
| 108 | + mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion, 'ms': billion // 1000, 'us' : 1000,'ns': 1} |
102 | 109 | return (mult[unit] * count).bit_length() <= 63
|
103 | 110 |
|
104 | 111 |
|
105 | 112 | @st.composite
|
106 | 113 | def rule(draw):
|
107 | 114 | count = draw(st.integers(min_value=1, max_value=10_000))
|
108 |
| - unit = draw(st.sampled_from(['min', 'h', 's'])) |
| 115 | + unit = draw(st.sampled_from(['min', 'h', 's', 'ms', 'us', 'ns'])) |
109 | 116 | result = f"{count}{unit}"
|
110 | 117 | assume(freq_fits_in_64_bits(count=count, unit=unit))
|
111 | 118 | return result
|
112 | 119 |
|
113 | 120 |
|
114 | 121 | @st.composite
|
115 | 122 | def offset(draw):
|
116 |
| - unit = draw(st.sampled_from(['s', 'min', 'h', None])) |
| 123 | + unit = draw(st.sampled_from(['s', 'min', 'h', 'ms', 'us', 'ns', None])) |
117 | 124 | if unit is None:
|
118 | 125 | return None
|
119 | 126 | count = draw(st.integers(min_value=1, max_value=100))
|
@@ -150,6 +157,9 @@ def dynamic_schema_column_list(draw):
|
150 | 157 | offset=offset()
|
151 | 158 | )
|
152 | 159 | def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
|
| 160 | + # The assumption below is to avoid OOM-ing the GitHub runners. |
| 161 | + assume(dense_row_count_in_resampled_dataframe([df], rule) < 10000) |
| 162 | + |
153 | 163 | lib = lmdb_version_store_v1
|
154 | 164 | sym = "sym"
|
155 | 165 | logger = get_logger()
|
@@ -198,6 +208,9 @@ def test_resample(lmdb_version_store_v1, df, rule, origin, offset):
|
198 | 208 | )
|
199 | 209 | @settings(deadline=None, suppress_health_check=[HealthCheck.data_too_large])
|
200 | 210 | def test_resample_dynamic_schema(lmdb_version_store_dynamic_schema_v1, df_list, rule, origin, offset):
|
| 211 | + # The assumption below is to avoid OOM-ing the GitHub runners. |
| 212 | + assume(dense_row_count_in_resampled_dataframe(df_list, rule) < 10000) |
| 213 | + |
201 | 214 | common_column_types = compute_common_type_for_columns_in_df_list(df_list)
|
202 | 215 | lib = lmdb_version_store_dynamic_schema_v1
|
203 | 216 | lib.version_store.clear()
|
|
0 commit comments