Skip to content

Commit 5f1d9cb

Browse files
authored
Merge pull request freqtrade#12151 from mpagnoulle/feat/trades-timerange-filtering
Trades TimeRange filtering for FeatherDataHandler and _if_enabled_populate_trades
2 parents aad2119 + 9d5295f commit 5f1d9cb

File tree

4 files changed

+205
-8
lines changed

4 files changed

+205
-8
lines changed

freqtrade/data/dataprovider.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,12 @@ def ohlcv(
498498
return DataFrame()
499499

500500
def trades(
501-
self, pair: str, timeframe: str | None = None, copy: bool = True, candle_type: str = ""
501+
self,
502+
pair: str,
503+
timeframe: str | None = None,
504+
copy: bool = True,
505+
candle_type: str = "",
506+
timerange: TimeRange | None = None,
502507
) -> DataFrame:
503508
"""
504509
Get candle (TRADES) data for the given pair as DataFrame
@@ -526,7 +531,7 @@ def trades(
526531
self._config["datadir"], data_format=self._config["dataformat_trades"]
527532
)
528533
trades_df = data_handler.trades_load(
529-
pair, self._config.get("trading_mode", TradingMode.SPOT)
534+
pair, self._config.get("trading_mode", TradingMode.SPOT), timerange=timerange
530535
)
531536
return trades_df
532537

freqtrade/data/history/datahandlers/featherdatahandler.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22

33
from pandas import DataFrame, read_feather, to_datetime
4+
from pyarrow import dataset
45

56
from freqtrade.configuration import TimeRange
67
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
@@ -111,22 +112,71 @@ def trades_append(self, pair: str, data: DataFrame):
111112
"""
112113
raise NotImplementedError()
113114

115+
def _build_arrow_time_filter(self, timerange: TimeRange | None):
116+
"""
117+
Build Arrow predicate filter for timerange filtering.
118+
Treats 0 as unbounded (no filter on that side).
119+
:param timerange: TimeRange object with start/stop timestamps
120+
:return: Arrow filter expression or None if fully unbounded
121+
"""
122+
if not timerange:
123+
return None
124+
125+
# Treat 0 as unbounded
126+
start_set = bool(timerange.startts and timerange.startts > 0)
127+
stop_set = bool(timerange.stopts and timerange.stopts > 0)
128+
129+
if not (start_set or stop_set):
130+
return None
131+
132+
ts_field = dataset.field("timestamp")
133+
exprs = []
134+
135+
if start_set:
136+
exprs.append(ts_field >= timerange.startts)
137+
if stop_set:
138+
exprs.append(ts_field <= timerange.stopts)
139+
140+
if len(exprs) == 1:
141+
return exprs[0]
142+
else:
143+
return exprs[0] & exprs[1]
144+
114145
def _trades_load(
115146
self, pair: str, trading_mode: TradingMode, timerange: TimeRange | None = None
116147
) -> DataFrame:
117148
"""
118149
Load a pair from file, either .json.gz or .json
119-
# TODO: respect timerange ...
120150
:param pair: Load trades for this pair
121151
:param trading_mode: Trading mode to use (used to determine the filename)
122-
:param timerange: Timerange to load trades for - currently not implemented
152+
:param timerange: Timerange to load trades for - filters data to this range if provided
123153
:return: Dataframe containing trades
124154
"""
125155
filename = self._pair_trades_filename(self._datadir, pair, trading_mode)
126156
if not filename.exists():
127157
return DataFrame(columns=DEFAULT_TRADES_COLUMNS)
128158

129-
tradesdata = read_feather(filename)
159+
# Use Arrow dataset with optional timerange filtering, fallback to read_feather
160+
try:
161+
dataset_reader = dataset.dataset(filename, format="feather")
162+
time_filter = self._build_arrow_time_filter(timerange)
163+
164+
if time_filter is not None and timerange is not None:
165+
tradesdata = dataset_reader.to_table(filter=time_filter).to_pandas()
166+
start_desc = timerange.startts if timerange.startts > 0 else "unbounded"
167+
stop_desc = timerange.stopts if timerange.stopts > 0 else "unbounded"
168+
logger.debug(
169+
f"Loaded {len(tradesdata)} trades for {pair} "
170+
f"(filtered start={start_desc}, stop={stop_desc})"
171+
)
172+
else:
173+
tradesdata = dataset_reader.to_table().to_pandas()
174+
logger.debug(f"Loaded {len(tradesdata)} trades for {pair} (unfiltered)")
175+
176+
except (ImportError, AttributeError, ValueError) as e:
177+
# Fallback: load entire file
178+
logger.warning(f"Unable to use Arrow filtering, loading entire trades file: {e}")
179+
tradesdata = read_feather(filename)
130180

131181
return tradesdata
132182

freqtrade/strategy/interface.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas import DataFrame
1212
from pydantic import ValidationError
1313

14+
from freqtrade.configuration import TimeRange
1415
from freqtrade.constants import CUSTOM_TAG_MAX_LENGTH, Config, IntOrInf, ListPairsWithTimeframes
1516
from freqtrade.data.converter import populate_dataframe_with_trades
1617
from freqtrade.data.converter.converter import reduce_dataframe_footprint
@@ -40,7 +41,7 @@
4041
)
4142
from freqtrade.strategy.strategy_validation import StrategyResultValidator
4243
from freqtrade.strategy.strategy_wrapper import strategy_safe_wrapper
43-
from freqtrade.util import dt_now
44+
from freqtrade.util import dt_now, dt_ts
4445
from freqtrade.wallets import Wallets
4546

4647

@@ -1767,9 +1768,16 @@ def _if_enabled_populate_trades(self, dataframe: DataFrame, metadata: dict) -> D
17671768
use_public_trades = self.config.get("exchange", {}).get("use_public_trades", False)
17681769
if use_public_trades:
17691770
pair = metadata["pair"]
1770-
trades = self.dp.trades(pair=pair, copy=False)
1771+
# Build timerange from dataframe date column
1772+
if not dataframe.empty:
1773+
start_ts = dt_ts(dataframe["date"].iloc[0])
1774+
end_ts = dt_ts(dataframe["date"].iloc[-1])
1775+
timerange = TimeRange("date", "date", startts=start_ts, stopts=end_ts)
1776+
else:
1777+
timerange = None
1778+
1779+
trades = self.dp.trades(pair=pair, copy=False, timerange=timerange)
17711780

1772-
# TODO: slice trades to size of dataframe for faster backtesting
17731781
cached_grouped_trades: DataFrame | None = self._cached_grouped_trades_per_pair.get(pair)
17741782
dataframe, cached_grouped_trades = populate_dataframe_with_trades(
17751783
cached_grouped_trades, self.config, dataframe, trades

tests/data/test_datahandler.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,3 +506,137 @@ def test_get_datahandler(testdatadir):
506506
assert isinstance(dh, JsonGzDataHandler)
507507
dh1 = get_datahandler(testdatadir, "jsongz", dh)
508508
assert id(dh1) == id(dh)
509+
510+
511+
@pytest.fixture
512+
def feather_dh(testdatadir):
513+
return FeatherDataHandler(testdatadir)
514+
515+
516+
@pytest.fixture
517+
def trades_full(feather_dh):
518+
df = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT)
519+
assert not df.empty
520+
return df
521+
522+
523+
@pytest.fixture
524+
def timerange_mid(trades_full):
525+
# Pick a mid-range window using actual timestamps
526+
mid_start = int(trades_full["timestamp"].iloc[len(trades_full) // 3])
527+
mid_end = int(trades_full["timestamp"].iloc[(2 * len(trades_full)) // 3])
528+
return TimeRange("date", "date", startts=mid_start, stopts=mid_end)
529+
530+
531+
def test_feather_trades_timerange_filter_fullspan(feather_dh, trades_full):
532+
timerange_full = TimeRange(
533+
"date",
534+
"date",
535+
startts=int(trades_full["timestamp"].min()),
536+
stopts=int(trades_full["timestamp"].max()),
537+
)
538+
# Full-span filter should equal unfiltered
539+
filtered = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_full)
540+
assert_frame_equal(
541+
trades_full.reset_index(drop=True), filtered.reset_index(drop=True), check_exact=True
542+
)
543+
544+
545+
def test_feather_trades_timerange_filter_subset(feather_dh, trades_full, timerange_mid):
546+
# Subset filter should be a subset of the full-span filter
547+
subset = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_mid)
548+
assert not subset.empty
549+
assert subset["timestamp"].min() >= timerange_mid.startts
550+
assert subset["timestamp"].max() <= timerange_mid.stopts
551+
assert len(subset) < len(trades_full)
552+
553+
554+
def test_feather_trades_timerange_pushdown_fallback(
555+
feather_dh, trades_full, timerange_mid, monkeypatch, caplog
556+
):
557+
# Pushdown filter should fail, so fallback should load the entire file
558+
import freqtrade.data.history.datahandlers.featherdatahandler as fdh
559+
560+
def raise_err(*args, **kwargs):
561+
raise ValueError("fail")
562+
563+
# Mock the dataset loading to raise an error
564+
monkeypatch.setattr(fdh.dataset, "dataset", raise_err)
565+
566+
with caplog.at_level("WARNING"):
567+
out = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_mid)
568+
569+
assert len(out) == len(trades_full)
570+
assert any(
571+
"Unable to use Arrow filtering, loading entire trades file" in r.message
572+
for r in caplog.records
573+
)
574+
575+
576+
def test_feather_trades_timerange_open_start(feather_dh, trades_full):
577+
# Open start: stop timestamp but no start (startts=0)
578+
stop_ts = int(trades_full["timestamp"].iloc[(2 * len(trades_full)) // 3])
579+
tr = TimeRange(None, "date", startts=0, stopts=stop_ts)
580+
581+
filtered = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=tr)
582+
assert 0 < len(filtered) < len(trades_full)
583+
assert filtered["timestamp"].max() <= stop_ts
584+
# First row should match full's first row
585+
assert filtered.iloc[0]["timestamp"] == trades_full.iloc[0]["timestamp"]
586+
587+
588+
def test_feather_trades_timerange_open_end(feather_dh, trades_full):
589+
# Open end: start timestamp but no stop (stopts=0)
590+
start_ts = int(trades_full["timestamp"].iloc[len(trades_full) // 3])
591+
tr = TimeRange("date", None, startts=start_ts, stopts=0)
592+
593+
filtered = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=tr)
594+
assert 0 < len(filtered) < len(trades_full)
595+
assert filtered["timestamp"].min() >= start_ts
596+
# Last row should match full's last row
597+
assert filtered.iloc[-1]["timestamp"] == trades_full.iloc[-1]["timestamp"]
598+
599+
600+
def test_feather_trades_timerange_fully_open(feather_dh, trades_full):
601+
# Fully open: no start or stop bounds (both 0)
602+
tr = TimeRange(None, None, startts=0, stopts=0)
603+
604+
filtered = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=tr)
605+
# Should equal unfiltered load
606+
assert_frame_equal(
607+
trades_full.reset_index(drop=True), filtered.reset_index(drop=True), check_exact=True
608+
)
609+
610+
611+
def test_feather_build_arrow_time_filter(feather_dh):
612+
# None timerange should return None
613+
assert feather_dh._build_arrow_time_filter(None) is None
614+
615+
# Fully open (both bounds 0) should return None
616+
tr_fully_open = TimeRange(None, None, startts=0, stopts=0)
617+
assert feather_dh._build_arrow_time_filter(tr_fully_open) is None
618+
619+
# Open start (startts=0) should return stop filter only
620+
tr_open_start = TimeRange(None, "date", startts=0, stopts=1000)
621+
filter_open_start = feather_dh._build_arrow_time_filter(tr_open_start)
622+
assert filter_open_start is not None
623+
# Should be a single expression (timestamp <= stopts)
624+
assert str(filter_open_start).count("<=") == 1
625+
assert str(filter_open_start).count(">=") == 0
626+
627+
# Open end (stopts=0) should return start filter only
628+
tr_open_end = TimeRange("date", None, startts=500, stopts=0)
629+
filter_open_end = feather_dh._build_arrow_time_filter(tr_open_end)
630+
assert filter_open_end is not None
631+
# Should be a single expression (timestamp >= startts)
632+
assert str(filter_open_end).count(">=") == 1
633+
assert str(filter_open_end).count("<=") == 0
634+
635+
# Closed range should return combined filter
636+
tr_closed = TimeRange("date", "date", startts=500, stopts=1000)
637+
filter_closed = feather_dh._build_arrow_time_filter(tr_closed)
638+
assert filter_closed is not None
639+
# Should contain both >= and <= (combined with &)
640+
filter_str = str(filter_closed)
641+
assert ">=" in filter_str
642+
assert "<=" in filter_str

0 commit comments

Comments
 (0)