Skip to content

Commit f21c5ea

Browse files
committed
feat: Remove redundant filtering, add tests for pyarrow trade filtering, use date utils for date to ts conversion
1 parent 82903cc commit f21c5ea

File tree

3 files changed

+71
-19
lines changed

3 files changed

+71
-19
lines changed

freqtrade/data/history/datahandlers/featherdatahandler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _trades_load(
143143

144144
except (ImportError, AttributeError, ValueError) as e:
145145
# Fallback: load entire file
146-
logger.debug(f"Unable to use Arrow filtering, loading entire trades file: {e}")
146+
logger.warning(f"Unable to use Arrow filtering, loading entire trades file: {e}")
147147
tradesdata = read_feather(filename)
148148

149149
return tradesdata

freqtrade/strategy/interface.py

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
)
4242
from freqtrade.strategy.strategy_validation import StrategyResultValidator
4343
from freqtrade.strategy.strategy_wrapper import strategy_safe_wrapper
44-
from freqtrade.util import dt_now
44+
from freqtrade.util import dt_now, dt_ts
4545
from freqtrade.wallets import Wallets
4646

4747

@@ -1770,29 +1770,14 @@ def _if_enabled_populate_trades(self, dataframe: DataFrame, metadata: dict) -> D
17701770
pair = metadata["pair"]
17711771
# Build timerange from dataframe date column
17721772
if not dataframe.empty:
1773-
start_ts = int(dataframe["date"].iloc[0].timestamp() * 1000)
1774-
end_ts = int(dataframe["date"].iloc[-1].timestamp() * 1000)
1773+
start_ts = dt_ts(dataframe["date"].iloc[0])
1774+
end_ts = dt_ts(dataframe["date"].iloc[-1])
17751775
timerange = TimeRange("date", "date", startts=start_ts, stopts=end_ts)
17761776
else:
17771777
timerange = None
17781778

17791779
trades = self.dp.trades(pair=pair, copy=False, timerange=timerange)
17801780

1781-
# Apply additional filtering with buffer for faster backtesting
1782-
if not trades.empty and not dataframe.empty and "timestamp" in trades.columns:
1783-
# Add timeframe buffer to ensure complete candle coverage
1784-
timeframe_buffer = timeframe_to_seconds(self.config["timeframe"]) * 1000
1785-
1786-
# Create time bounds with buffer
1787-
time_start = start_ts - timeframe_buffer
1788-
time_end = end_ts + timeframe_buffer
1789-
1790-
# Filter trades within buffered timerange
1791-
trades_mask = (trades["timestamp"] >= time_start) & (
1792-
trades["timestamp"] <= time_end
1793-
)
1794-
trades = trades.loc[trades_mask].reset_index(drop=True)
1795-
17961781
cached_grouped_trades: DataFrame | None = self._cached_grouped_trades_per_pair.get(pair)
17971782
dataframe, cached_grouped_trades = populate_dataframe_with_trades(
17981783
cached_grouped_trades, self.config, dataframe, trades

tests/data/test_datahandler.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,3 +506,70 @@ def test_get_datahandler(testdatadir):
506506
assert isinstance(dh, JsonGzDataHandler)
507507
dh1 = get_datahandler(testdatadir, "jsongz", dh)
508508
assert id(dh1) == id(dh)
509+
510+
511+
@pytest.fixture
512+
def feather_dh(testdatadir):
513+
return FeatherDataHandler(testdatadir)
514+
515+
516+
@pytest.fixture
517+
def trades_full(feather_dh):
518+
df = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT)
519+
assert not df.empty
520+
return df
521+
522+
523+
@pytest.fixture
524+
def timerange_full(trades_full):
525+
# Pick a full-span window using actual timestamps
526+
startts = int(trades_full["timestamp"].min())
527+
stopts = int(trades_full["timestamp"].max())
528+
return TimeRange("date", "date", startts=startts, stopts=stopts)
529+
530+
531+
@pytest.fixture
532+
def timerange_mid(trades_full):
533+
# Pick a mid-range window using actual timestamps
534+
mid_start = int(trades_full["timestamp"].iloc[len(trades_full) // 3])
535+
mid_end = int(trades_full["timestamp"].iloc[(2 * len(trades_full)) // 3])
536+
return TimeRange("date", "date", startts=mid_start, stopts=mid_end)
537+
538+
539+
def test_feather_trades_timerange_filter_fullspan(feather_dh, trades_full, timerange_full):
540+
# Full-span filter should equal unfiltered
541+
filtered = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_full)
542+
assert_frame_equal(
543+
trades_full.reset_index(drop=True), filtered.reset_index(drop=True), check_exact=True
544+
)
545+
546+
547+
def test_feather_trades_timerange_filter_subset(feather_dh, trades_full, timerange_mid):
548+
# Subset filter should be a subset of the full-span filter
549+
subset = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_mid)
550+
assert not subset.empty
551+
assert subset["timestamp"].min() >= timerange_mid.startts
552+
assert subset["timestamp"].max() <= timerange_mid.stopts
553+
assert len(subset) < len(trades_full)
554+
555+
556+
def test_feather_trades_timerange_pushdown_fallback(
557+
feather_dh, trades_full, timerange_mid, monkeypatch, caplog
558+
):
559+
# Pushdown filter should fail, so fallback should load the entire file
560+
import freqtrade.data.history.datahandlers.featherdatahandler as fdh
561+
562+
def raise_err(*args, **kwargs):
563+
raise ValueError("fail")
564+
565+
# Mock the dataset loading to raise an error
566+
monkeypatch.setattr(fdh.dataset, "dataset", raise_err)
567+
568+
with caplog.at_level("WARNING"):
569+
out = feather_dh.trades_load("XRP/ETH", TradingMode.SPOT, timerange=timerange_mid)
570+
571+
assert len(out) == len(trades_full)
572+
assert any(
573+
"Unable to use Arrow filtering, loading entire trades file" in r.message
574+
for r in caplog.records
575+
)

0 commit comments

Comments
 (0)