Skip to content

Commit 09032e9

Browse files
author
Scott Sanderson
authored
Merge pull request #2597 from quantopian/h5-fx
ENH: HDF5-based FX Reader/Writer.
2 parents bbf9221 + 49fbc20 commit 09032e9

File tree

11 files changed

+747
-112
lines changed

11 files changed

+747
-112
lines changed

tests/data/test_fx.py

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
import itertools
2+
3+
import h5py
4+
import pandas as pd
5+
import numpy as np
6+
7+
from zipline.data.fx import DEFAULT_FX_RATE
8+
from zipline.data.fx.hdf5 import HDF5FXRateReader, HDF5FXRateWriter
9+
10+
from zipline.testing.predicates import assert_equal
11+
import zipline.testing.fixtures as zp_fixtures
12+
13+
14+
class _FXReaderTestCase(zp_fixtures.WithFXRates,
15+
zp_fixtures.ZiplineTestCase):
16+
"""
17+
Base class for testing FXRateReader implementations.
18+
19+
To test a new FXRateReader implementation, subclass from this base class
20+
and implement the ``reader`` property, returning an FXRateReader that uses
21+
the data stored in ``cls.fx_rates``.
22+
"""
23+
FX_RATES_START_DATE = pd.Timestamp('2014-01-01', tz='UTC')
24+
FX_RATES_END_DATE = pd.Timestamp('2014-01-31', tz='UTC')
25+
26+
# Calendar to which exchange rates data is aligned.
27+
FX_RATES_CALENDAR = '24/5'
28+
29+
# Currencies between which exchange rates can be calculated.
30+
FX_RATES_CURRENCIES = ["USD", "CAD", "GBP", "EUR"]
31+
32+
# Fields for which exchange rate data is present.
33+
FX_RATES_RATE_NAMES = ["london_mid", "tokyo_mid"]
34+
35+
# Field to be used on a lookup of `'default'`.
36+
FX_RATES_DEFAULT_RATE = 'london_mid'
37+
38+
# Used by WithFXRates.
39+
@classmethod
40+
def make_fx_rates(cls, fields, currencies, sessions):
41+
ndays = len(sessions)
42+
43+
# Give each currency a timeseries of "true" values, and compute fx
44+
# rates as ratios between true values.
45+
reference = pd.DataFrame({
46+
'USD': np.linspace(1.0, 2.0, num=ndays),
47+
'CAD': np.linspace(2.0, 3.0, num=ndays),
48+
'GBP': np.linspace(3.0, 4.0, num=ndays),
49+
'EUR': np.linspace(4.0, 5.0, num=ndays),
50+
}, index=sessions, columns=currencies)
51+
52+
cls.tokyo_mid_rates = cls.make_fx_rates_from_reference(reference)
53+
# Make london_mid different by adding +1 to reference values.
54+
cls.london_mid_rates = cls.make_fx_rates_from_reference(reference + 1)
55+
56+
# This will be set as cls.fx_rates by WithFXRates.
57+
return {
58+
'london_mid': cls.london_mid_rates,
59+
'tokyo_mid': cls.tokyo_mid_rates,
60+
}
61+
62+
@classmethod
63+
def get_expected_rate_scalar(cls, rate, quote, base, dt):
64+
"""Get the expected FX rate for the given scalar coordinates.
65+
"""
66+
if rate == DEFAULT_FX_RATE:
67+
rate = cls.FX_RATES_DEFAULT_RATE
68+
69+
col = cls.fx_rates[rate][quote][base]
70+
# PERF: We call this function a lot in this suite, and get_loc is
71+
# surprisingly expensive, so optimizing it has a meaningful impact on
72+
# overall suite performance. See test_fast_get_loc_ffilled_for
73+
# assurance that this behaves the same as get_loc.
74+
ix = fast_get_loc_ffilled(col.index.values, dt.asm8)
75+
return col.values[ix]
76+
77+
@classmethod
78+
def get_expected_rates(cls, rate, quote, bases, dts):
79+
"""Get an array of expected FX rates for the given indices.
80+
"""
81+
out = np.empty((len(dts), len(bases)), dtype='float64')
82+
83+
for i, dt in enumerate(dts):
84+
for j, base in enumerate(bases):
85+
out[i, j] = cls.get_expected_rate_scalar(rate, quote, base, dt)
86+
87+
return out
88+
89+
@property
90+
def reader(self):
91+
raise NotImplementedError("Must be implemented by test suite.")
92+
93+
def test_scalar_lookup(self):
94+
reader = self.reader
95+
96+
rates = self.FX_RATES_RATE_NAMES
97+
currencies = self.FX_RATES_CURRENCIES
98+
dates = pd.date_range(self.FX_RATES_START_DATE, self.FX_RATES_END_DATE)
99+
100+
cases = itertools.product(rates, currencies, currencies, dates)
101+
102+
for rate, quote, base, dt in cases:
103+
dts = pd.DatetimeIndex([dt], tz='UTC')
104+
bases = np.array([base])
105+
106+
result = reader.get_rates(rate, quote, bases, dts)
107+
assert_equal(result.shape, (1, 1))
108+
109+
result_scalar = result[0, 0]
110+
if quote == base:
111+
assert_equal(result_scalar, 1.0)
112+
113+
expected = self.get_expected_rate_scalar(rate, quote, base, dt)
114+
assert_equal(result_scalar, expected)
115+
116+
def test_vectorized_lookup(self):
117+
rand = np.random.RandomState(42)
118+
119+
dates = pd.date_range(self.FX_RATES_START_DATE, self.FX_RATES_END_DATE)
120+
rates = self.FX_RATES_RATE_NAMES + [DEFAULT_FX_RATE]
121+
currencies = self.FX_RATES_CURRENCIES
122+
123+
# For every combination of rate name and quote currency...
124+
for rate, quote in itertools.product(rates, currencies):
125+
126+
# Choose N random distinct days...
127+
for ndays in 1, 2, 7, 20:
128+
dts_raw = rand.choice(dates, ndays, replace=False)
129+
dts = pd.DatetimeIndex(dts_raw, tz='utc').sort_values()
130+
131+
# Choose M random possibly-non-distinct currencies...
132+
for nbases in 1, 2, 10, 200:
133+
bases = rand.choice(currencies, nbases, replace=True)
134+
135+
# ...And check that we get the expected result when querying
136+
# for those dates/currencies.
137+
result = self.reader.get_rates(rate, quote, bases, dts)
138+
expected = self.get_expected_rates(rate, quote, bases, dts)
139+
140+
assert_equal(result, expected)
141+
142+
def test_load_everything(self):
143+
# Sanity check for the randomized tests above: check that we get
144+
# exactly the rates we set up in make_fx_rates if we query for their
145+
# indices.
146+
for currency in self.FX_RATES_CURRENCIES:
147+
tokyo_rates = self.tokyo_mid_rates[currency]
148+
tokyo_result = self.reader.get_rates(
149+
'tokyo_mid',
150+
currency,
151+
tokyo_rates.columns,
152+
tokyo_rates.index,
153+
)
154+
assert_equal(tokyo_result, tokyo_rates.values)
155+
156+
london_rates = self.london_mid_rates[currency]
157+
london_result = self.reader.get_rates(
158+
'london_mid',
159+
currency,
160+
london_rates.columns,
161+
london_rates.index,
162+
)
163+
default_result = self.reader.get_rates(
164+
DEFAULT_FX_RATE,
165+
currency,
166+
london_rates.columns,
167+
london_rates.index,
168+
)
169+
assert_equal(london_result, default_result)
170+
assert_equal(london_result, london_rates.values)
171+
172+
def test_read_before_start_date(self):
173+
for bad_date in (self.FX_RATES_START_DATE - pd.Timedelta('1 day'),
174+
self.FX_RATES_START_DATE - pd.Timedelta('1000 days')):
175+
176+
for rate in self.FX_RATES_RATE_NAMES:
177+
quote = 'USD'
178+
bases = np.array(['CAD'], dtype='S3')
179+
dts = pd.DatetimeIndex([bad_date])
180+
with self.assertRaises(ValueError):
181+
self.reader.get_rates(rate, quote, bases, dts)
182+
183+
def test_read_after_end_date(self):
184+
for bad_date in (self.FX_RATES_END_DATE + pd.Timedelta('1 day'),
185+
self.FX_RATES_END_DATE + pd.Timedelta('1000 days')):
186+
187+
for rate in self.FX_RATES_RATE_NAMES:
188+
quote = 'USD'
189+
bases = np.array(['CAD'], dtype='S3')
190+
dts = pd.DatetimeIndex([bad_date])
191+
with self.assertRaises(ValueError):
192+
self.reader.get_rates(rate, quote, bases, dts)
193+
194+
195+
class InMemoryFXReaderTestCase(_FXReaderTestCase):
196+
197+
@property
198+
def reader(self):
199+
return self.in_memory_fx_rate_reader
200+
201+
202+
class HDF5FXReaderTestCase(zp_fixtures.WithTmpDir,
203+
_FXReaderTestCase):
204+
205+
@classmethod
206+
def init_class_fixtures(cls):
207+
super(HDF5FXReaderTestCase, cls).init_class_fixtures()
208+
209+
path = cls.tmpdir.getpath('fx_rates.h5')
210+
211+
# Set by WithFXRates.
212+
sessions = cls.fx_rates_sessions
213+
214+
# Write in-memory data to h5 file.
215+
with h5py.File(path, 'w') as h5_file:
216+
writer = HDF5FXRateWriter(h5_file)
217+
fx_data = ((rate, quote, quote_frame.values)
218+
for rate, rate_dict in cls.fx_rates.items()
219+
for quote, quote_frame in rate_dict.items())
220+
221+
writer.write(
222+
dts=sessions.values,
223+
currencies=np.array(cls.FX_RATES_CURRENCIES, dtype='S3'),
224+
data=fx_data,
225+
)
226+
227+
h5_file = cls.enter_class_context(h5py.File(path, 'r'))
228+
cls.h5_fx_reader = HDF5FXRateReader(
229+
h5_file,
230+
default_rate=cls.FX_RATES_DEFAULT_RATE,
231+
)
232+
233+
@property
234+
def reader(self):
235+
return self.h5_fx_reader
236+
237+
238+
def fast_get_loc_ffilled(dts, dt):
239+
"""
240+
Equivalent to dts.get_loc(dt, method='ffill'), but with reasonable
241+
microperformance.
242+
"""
243+
ix = dts.searchsorted(dt, side='right') - 1
244+
if ix < 0:
245+
raise KeyError(dt)
246+
return ix
247+
248+
249+
class FastGetLocTestCase(zp_fixtures.ZiplineTestCase):
250+
251+
def test_fast_get_loc_ffilled(self):
252+
dts = pd.to_datetime([
253+
'2014-01-02',
254+
'2014-01-03',
255+
# Skip 2014-01-04
256+
'2014-01-05',
257+
'2014-01-06',
258+
])
259+
260+
for dt in pd.date_range('2014-01-02', '2014-01-08'):
261+
result = fast_get_loc_ffilled(dts.values, dt.asm8)
262+
expected = dts.get_loc(dt, method='ffill')
263+
assert_equal(result, expected)
264+
265+
with self.assertRaises(KeyError):
266+
dts.get_loc(pd.Timestamp('2014-01-01'), method='ffill')
267+
268+
with self.assertRaises(KeyError):
269+
fast_get_loc_ffilled(dts, pd.Timestamp('2014-01-01'))

tests/pipeline/test_international_markets.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,16 +320,16 @@ def test_currency_convert_prices(self, name, domain, calendar_name):
320320
# (dates, sids) dataframe giving the exchange rate from each
321321
# asset's currency to the target currency.
322322
expected_rates = fx_reader.get_rates(
323-
field='mid',
323+
rate='mid',
324324
quote=target,
325-
bases=np.array(currency_codes, dtype='S3'),
325+
bases=np.array(currency_codes, dtype=object),
326326
# Exchange rates used for pipeline output with label N should
327327
# be from day N - 1, so shift back from `execution_sessions` by
328328
# a day.
329-
dates=sessions[-18:-10],
329+
dts=sessions[-18:-10],
330330
)
331331

332-
expected_result_2d = closes_2d * expected_rates.values
332+
expected_result_2d = closes_2d * expected_rates
333333

334334
assert_equal(result_2d, expected_result_2d)
335335

zipline/data/fx.py

Lines changed: 0 additions & 75 deletions
This file was deleted.

zipline/data/fx/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from .base import FXRateReader, DEFAULT_FX_RATE
2+
from .in_memory import InMemoryFXRateReader
3+
from .exploding import ExplodingFXRateReader
4+
from .hdf5 import HDF5FXRateReader, HDF5FXRateWriter
5+
6+
__all__ = [
7+
'DEFAULT_FX_RATE',
8+
'ExplodingFXRateReader',
9+
'FXRateReader',
10+
'HDF5FXRateReader',
11+
'HDF5FXRateWriter',
12+
'InMemoryFXRateReader',
13+
]

0 commit comments

Comments
 (0)