Skip to content

Commit d24c440

Browse files
authored
feat: implement metrics (#7)
1 parent 11faffb commit d24c440

File tree

11 files changed

+668
-0
lines changed

11 files changed

+668
-0
lines changed

qfeval_data/data.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,6 +1864,32 @@ def sum(self, axis: typing.Optional[Axis] = None) -> Data:
18641864
tensors[k] = functions.nansum(v, dim=ag.dim, keepdim=ag.keepdim)
18651865
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
18661866

1867+
def min(self, axis: typing.Optional[Axis] = None) -> Data:
1868+
ag = self.__aggregate(axis, "min")
1869+
tensors: typing.Dict[str, torch.Tensor] = {}
1870+
for k, v in ag.items:
1871+
x = v
1872+
for d in ag.dim:
1873+
x = functions.nanmin(x, dim=d, keepdim=True).values
1874+
if not ag.keepdim:
1875+
for d in sorted(ag.dim, reverse=True):
1876+
x = x.squeeze(d)
1877+
tensors[k] = x
1878+
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
1879+
1880+
def max(self, axis: typing.Optional[Axis] = None) -> Data:
1881+
ag = self.__aggregate(axis, "max")
1882+
tensors: typing.Dict[str, torch.Tensor] = {}
1883+
for k, v in ag.items:
1884+
x = v
1885+
for d in ag.dim:
1886+
x = functions.nanmax(x, dim=d, keepdim=True).values
1887+
if not ag.keepdim:
1888+
for d in sorted(ag.dim, reverse=True):
1889+
x = x.squeeze(d)
1890+
tensors[k] = x
1891+
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
1892+
18671893
def mean(self, axis: typing.Optional[Axis] = None) -> Data:
18681894
ag = self.__aggregate(axis, "mean")
18691895
tensors = {}
@@ -1918,6 +1944,122 @@ def count(self, axis: typing.Optional[Axis] = None) -> Data:
19181944
)
19191945
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
19201946

1947+
def first(
1948+
self, axis: typing.Optional[Axis] = "timestamp", skipna: bool = True
1949+
) -> Data:
1950+
"""Returns the first value along the given axis.
1951+
1952+
If `skipna=True`, returns the first non-NaN; otherwise, simply selects
1953+
the first element (which may be NaN).
1954+
"""
1955+
ag = self.__aggregate(axis, "first")
1956+
tensors: typing.Dict[str, torch.Tensor] = {}
1957+
for k, v in ag.items:
1958+
for d in ag.dim:
1959+
v = (functions.bfill(v, d) if skipna else v).narrow(d, 0, 1)
1960+
tensors[k] = functions.nansum(v, dim=ag.dim, keepdim=ag.keepdim)
1961+
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
1962+
1963+
def last(
1964+
self, axis: typing.Optional[Axis] = "timestamp", skipna: bool = True
1965+
) -> Data:
1966+
"""Returns the last value along the given axis.
1967+
1968+
If `skipna=True`, returns the last non-NaN; otherwise, simply selects
1969+
the last element (which may be NaN).
1970+
"""
1971+
ag = self.__aggregate(axis, "last")
1972+
tensors: typing.Dict[str, torch.Tensor] = {}
1973+
for k, v in ag.items:
1974+
for d in ag.dim:
1975+
v = (functions.ffill(v, d) if skipna else v).narrow(d, -1, 1)
1976+
tensors[k] = functions.nansum(v, dim=ag.dim, keepdim=ag.keepdim)
1977+
return self.from_tensors(tensors, ag.timestamps, ag.symbols)
1978+
1979+
############################################################################
1980+
# Metrics
1981+
############################################################################
1982+
1983+
def annualized_return(self) -> Data:
1984+
"""Returns the annualized total return per series.
1985+
1986+
Formula reference: https://www.investopedia.com/terms/a/annualized-total-return.asp
1987+
1988+
Uses first/last non-NaN values along the timestamp axis and computes
1989+
(last / first) ** (1 / years) - 1, where years is the elapsed time
1990+
in years between the corresponding timestamps.
1991+
"""
1992+
if not self.has_timestamps():
1993+
raise ValueError("annualized_return requires valid timestamps")
1994+
1995+
start = self.first()
1996+
end = self.last()
1997+
ts = self.__timestamps.astype("datetime64[us]").astype("int64")
1998+
ts_year = (ts[-1] - ts[0]) / (365.2425 * 24 * 60 * 60 * 1e6)
1999+
return (end / start).apply(lambda r: torch.pow(r, 1.0 / ts_year) - 1)
2000+
2001+
ar = annualized_return
2002+
2003+
def annualized_volatility(self) -> Data:
2004+
"""Returns the annualized volatility per series.
2005+
2006+
Computes standard deviation of period returns along the timestamp axis
2007+
and scales it by sqrt(periods_per_year), where periods_per_year is the
2008+
number of periods in the dataset divided by elapsed years between the
2009+
first and last timestamps.
2010+
"""
2011+
if not self.has_timestamps():
2012+
raise ValueError("annualized_volatility requires valid timestamps")
2013+
2014+
ts = self.__timestamps.astype("datetime64[us]").astype("int64")
2015+
ts_year = (ts[-1] - ts[0]) / (365.2425 * 24 * 60 * 60 * 1e6)
2016+
scale = np.sqrt((len(self.timestamps) - 1) / ts_year)
2017+
return (
2018+
self.pct_change(skipna=True).std(axis="timestamp", ddof=0) * scale
2019+
)
2020+
2021+
avol = annualized_volatility
2022+
2023+
def annualized_sharpe_ratio(self) -> Data:
2024+
"""Returns annualized Sharpe ratio per series.
2025+
2026+
Defined as annualized_return / annualized_volatility.
2027+
"""
2028+
return self.annualized_return() / self.annualized_volatility()
2029+
2030+
asr = annualized_sharpe_ratio
2031+
2032+
def maximum_drawdown(self) -> Data:
2033+
"""Returns maximum drawdown per series.
2034+
2035+
Computes running peak (ffill + cummax) along timestamps, then the
2036+
drawdown series as current/peak - 1, and finally takes the minimum
2037+
(most negative) drawdown over time for each series.
2038+
"""
2039+
# NOTE: This applies ffill and bfill because cummax cannot skip NaNs.
2040+
filled = self.fillna(method="ffill").fillna(method="bfill")
2041+
peaks = filled.apply(lambda x: torch.cummax(x, dim=0).values)
2042+
return 1 - (filled / peaks).min(axis="timestamp")
2043+
2044+
mdd = maximum_drawdown
2045+
2046+
def metrics(self) -> Data:
2047+
"""Returns a Data object that contains common metrics per series.
2048+
2049+
The returned Data object contains the following columns:
2050+
- annualized_sharpe_ratio
2051+
- annualized_return
2052+
- annualized_volatility
2053+
- maximum_drawdown
2054+
"""
2055+
metrics = [
2056+
self.annualized_sharpe_ratio().rename("annualized_sharpe_ratio"),
2057+
self.annualized_return().rename("annualized_return"),
2058+
self.annualized_volatility().rename("annualized_volatility"),
2059+
self.maximum_drawdown().rename("maximum_drawdown"),
2060+
]
2061+
return metrics[0].merge(*metrics[1:])
2062+
19212063
############################################################################
19222064
# Private methods
19232065
############################################################################

tests/data/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from math import nan
2+
3+
import numpy as np
4+
import torch
5+
6+
from qfeval_data import Data
7+
8+
from .util import timestamps
9+
10+
11+
def test_annualized_return_basic() -> None:
12+
ts = timestamps(3)
13+
x = torch.tensor(
14+
[
15+
[100.0, 200.0],
16+
[100.5, nan],
17+
[100.2, 200.1],
18+
],
19+
dtype=torch.float32,
20+
)
21+
data = Data.from_tensors({"price": x}, ts, np.array(["A", "B"]))
22+
actual = data.annualized_return()
23+
expected = [
24+
(100.2 / 100.0) ** (365.25 / 2.0) - 1.0,
25+
(200.1 / 200.0) ** (365.25 / 2.0) - 1.0,
26+
]
27+
np.testing.assert_allclose(actual.price.array, expected, atol=1e-4)
28+
29+
30+
def test_annualized_return_with_nans() -> None:
31+
ts = timestamps(3)
32+
x = torch.tensor(
33+
[
34+
[nan, 200.0],
35+
[100.0, nan],
36+
[100.1, 200.1],
37+
],
38+
dtype=torch.float32,
39+
)
40+
data = Data.from_tensors({"price": x}, ts, np.array(["A", "B"]))
41+
actual = data.annualized_return()
42+
expected = [
43+
(100.1 / 100.0) ** (365.25 / 2.0) - 1.0,
44+
(200.1 / 200.0) ** (365.25 / 2.0) - 1.0,
45+
]
46+
np.testing.assert_allclose(actual.price.array, expected, atol=1e-4)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import numpy as np
2+
import torch
3+
4+
from qfeval_data import Data
5+
6+
from .util import timestamps
7+
8+
9+
def test_annualized_sharpe_ratio_basic() -> None:
10+
ts = timestamps(4)
11+
x = torch.tensor(
12+
[
13+
[101.0, 200.0],
14+
[102.0, 205.0],
15+
[100.0, 220.0],
16+
[101.0, 210.0],
17+
],
18+
dtype=torch.float32,
19+
)
20+
data = Data.from_tensors({"price": x}, ts, np.array(["A", "B"]))
21+
22+
ret = data.annualized_return().price.array
23+
vol = data.annualized_volatility().price.array
24+
exp = ret / vol
25+
26+
np.testing.assert_allclose(
27+
data.annualized_sharpe_ratio().price.array, exp, rtol=5e-4, atol=5e-5
28+
)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import numpy as np
2+
import torch
3+
4+
from qfeval_data import Data
5+
6+
from .util import timestamps
7+
8+
9+
def test_annualized_volatility_basic() -> None:
10+
ts = timestamps(4)
11+
x = torch.tensor(
12+
[
13+
[101.0, 200.0],
14+
[102.0, 205.0],
15+
[100.0, 220.0],
16+
[101.0, 210.0],
17+
],
18+
dtype=torch.float64,
19+
)
20+
data = Data.from_tensors({"price": x}, ts, np.array(["A", "B"]))
21+
actual = data.annualized_volatility()
22+
expected = np.nanstd(
23+
np.array(
24+
[
25+
[102.0 / 101.0 - 1.0, 205.0 / 200.0 - 1.0],
26+
[100.0 / 102.0 - 1.0, 220.0 / 205.0 - 1.0],
27+
[101.0 / 100.0 - 1.0, 210.0 / 220.0 - 1.0],
28+
],
29+
dtype=np.float64,
30+
),
31+
axis=0,
32+
ddof=0,
33+
) * np.sqrt(365.25)
34+
np.testing.assert_allclose(actual.price.array, expected, atol=1e-5)
35+
36+
37+
def test_annualized_volatility_with_nans() -> None:
38+
ts = timestamps(4)
39+
x = torch.tensor(
40+
[
41+
[float("nan"), 200.0],
42+
[100.0, float("nan")],
43+
[102.0, 210.0],
44+
[101.0, 205.0],
45+
],
46+
dtype=torch.float64,
47+
)
48+
data = Data.from_tensors({"price": x}, ts, np.array(["A", "B"]))
49+
actual = data.annualized_volatility()
50+
expected = np.nanstd(
51+
np.array(
52+
[
53+
[102.0 / 100.0 - 1.0, 210.0 / 200.0 - 1.0],
54+
[101.0 / 102.0 - 1.0, 205.0 / 210.0 - 1.0],
55+
],
56+
dtype=np.float64,
57+
),
58+
axis=0,
59+
ddof=0,
60+
) * np.sqrt(365.25)
61+
np.testing.assert_allclose(actual.price.array, expected, atol=1e-5)

0 commit comments

Comments
 (0)