Skip to content

Commit 006b7b9

Browse files
author
Scott Sanderson
committed
BUG: Handle currency conversions for unknown sids.
Don't crash on queries for currency codes of possibly-unknown sids on calls to `SessionBarReader.currency_codes`. When a currency code is requested for an unknown sid, we return zipline.currency.MISSING_CURRENCY_CODE for that sid.
1 parent bc89946 commit 006b7b9

File tree

6 files changed

+71
-61
lines changed

6 files changed

+71
-61
lines changed

tests/data/test_daily_bars.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from toolz import merge
3737
from trading_calendars import get_calendar
3838

39+
from zipline.currency import MISSING_CURRENCY_CODE
3940
from zipline.data.bar_reader import (
4041
NoDataAfterDate,
4142
NoDataBeforeDate,
@@ -541,6 +542,27 @@ def test_listing_currency(self):
541542

542543
assert_equal(results, expected)
543544

545+
def test_listing_currency_for_nonexistent_asset(self):
546+
reader = self.daily_bar_reader
547+
548+
valid_sid = max(self.assets)
549+
valid_currency = reader.currency_codes(np.array([valid_sid]))[0]
550+
invalid_sids = [-1, -2]
551+
552+
# XXX: We currently require at least one valid sid here, because the
553+
# MultiCountryDailyBarReader needs one valid sid to be able to dispatch
554+
# to a child reader. We could probably make that work, but there are no
555+
# real-world cases where we expect to get all-invalid currency queries,
556+
# so it's unclear whether we should do work to explicitly support such
557+
# queries.
558+
mixed = np.array(invalid_sids + [valid_sid])
559+
result = self.daily_bar_reader.currency_codes(mixed)
560+
expected = np.array(
561+
[MISSING_CURRENCY_CODE] * 2 + [valid_currency],
562+
dtype='S3'
563+
)
564+
assert_equal(result, expected)
565+
544566

545567
class BcolzDailyBarTestCase(WithBcolzEquityDailyBarReader, _DailyBarsTestCase):
546568
EQUITY_DAILY_BAR_COUNTRY_CODES = ['US']

zipline/currency.py

Lines changed: 18 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,11 @@
1-
from functools import partial, total_ordering
2-
1+
from functools import total_ordering
32
from iso4217 import Currency as ISO4217Currency
43

5-
import numpy as np
6-
74
_ALL_CURRENCIES = {}
85

96

10-
def strs_to_sids(strs, category_num):
11-
"""TODO: Improve this.
12-
"""
13-
out = np.full(len(strs), category_num << 50, dtype='i8')
14-
casted_buffer = np.ndarray(
15-
shape=out.shape,
16-
dtype='S6',
17-
buffer=out,
18-
strides=out.strides,
19-
)
20-
casted_buffer[:] = np.array(strs, dtype='S6')
21-
return out
22-
23-
24-
def str_to_sid(str_, category_num):
25-
return strs_to_sids([str_], category_num)[0]
26-
27-
28-
iso_currency_to_sid = partial(str_to_sid, category_num=3)
7+
# Special sentinel used to represent unknown or missing currencies.
8+
MISSING_CURRENCY_CODE = 'XXX'
299

3010

3111
@total_ordering
@@ -48,15 +28,20 @@ def __new__(cls, code):
4828
try:
4929
return _ALL_CURRENCIES[code]
5030
except KeyError:
51-
try:
52-
iso_currency = ISO4217Currency(code)
53-
except ValueError:
54-
raise ValueError(
55-
"{!r} is not a valid currency code.".format(code)
56-
)
31+
# This isn't a real
32+
if code == MISSING_CURRENCY_CODE:
33+
name = "NO CURRENCY"
34+
else:
35+
try:
36+
name = ISO4217Currency(code).currency_name
37+
except ValueError:
38+
raise ValueError(
39+
"{!r} is not a valid currency code.".format(code)
40+
)
41+
5742
obj = _ALL_CURRENCIES[code] = super(Currency, cls).__new__(cls)
58-
obj._currency = iso_currency
59-
obj._sid = iso_currency_to_sid(iso_currency.value)
43+
obj._code = code
44+
obj._name = name
6045
return obj
6146

6247
@property
@@ -67,7 +52,7 @@ def code(self):
6752
-------
6853
code : str
6954
"""
70-
return self._currency.value
55+
return self._code
7156

7257
@property
7358
def name(self):
@@ -77,13 +62,7 @@ def name(self):
7762
-------
7863
name : str
7964
"""
80-
return self._currency.currency_name
81-
82-
@property
83-
def sid(self):
84-
"""Unique integer identifier for this currency.
85-
"""
86-
return self._sid
65+
return self._name
8766

8867
def __eq__(self, other):
8968
if type(self) != type(other):

zipline/data/bcolz_daily_bars.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from toolz import compose
3535
from trading_calendars import get_calendar
3636

37+
from zipline.currency import MISSING_CURRENCY_CODE
3738
from zipline.data.session_bars import CurrencyAwareSessionBarReader
3839
from zipline.data.bar_reader import (
3940
NoDataAfterDate,
@@ -706,5 +707,14 @@ def get_value(self, sid, dt, field):
706707
return price
707708

708709
def currency_codes(self, sids):
709-
# TODO: Better handling for this.
710-
return np.full(len(sids), b'USD', dtype='S3')
710+
# XXX: This is pretty inefficient. This reader doesn't really support
711+
# country codes, so we always either return USD or
712+
# MISSING_CURRENCY_CODE if we don't know about the sid at all.
713+
first_rows = self._first_rows
714+
out = []
715+
for sid in sids:
716+
if sid in first_rows:
717+
out.append('USD')
718+
else:
719+
out.append(MISSING_CURRENCY_CODE)
720+
return np.array(out, dtype='S3')

zipline/data/fx/hdf5.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
from interface import implements
9898
import h5py
9999
from logbook import Logger
100+
import numpy as np
100101
import pandas as pd
101102
import six
102103

@@ -176,17 +177,9 @@ def get_rates(self, rate, quote, bases, dts):
176177

177178
self._check_dts(self.dts, dts)
178179

179-
date_ixs = self.dts.searchsorted(dts, side='right') - 1
180-
currency_ixs = self.currencies.get_indexer(bases)
180+
row_ixs = self.dts.searchsorted(dts, side='right') - 1
181+
col_ixs = self.currencies.get_indexer(bases)
181182

182-
return self._read_rate_block(
183-
rate,
184-
quote,
185-
row_ixs=date_ixs,
186-
col_ixs=currency_ixs,
187-
)
188-
189-
def _read_rate_block(self, rate, quote, row_ixs, col_ixs):
190183
try:
191184
dataset = self._group[DATA][rate][quote][RATES]
192185
except KeyError:
@@ -209,7 +202,12 @@ def _read_rate_block(self, rate, quote, row_ixs, col_ixs):
209202
max_row = row_ixs[-1]
210203
rows = dataset[min_row:max_row + 1] # +1 to be inclusive of end
211204

212-
return rows[row_ixs - min_row][:, col_ixs]
205+
out = rows[row_ixs - min_row][:, col_ixs]
206+
207+
# get_indexer returns -1 for failed lookups. Fill these in with NaN.
208+
out[:, col_ixs == -1] = np.nan
209+
210+
return out
213211

214212
def _check_dts(self, stored, requested):
215213
"""Validate that requested dates are in bounds for what we have stored.

zipline/data/hdf5_daily_bars.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
from six import iteritems, raise_from, viewkeys
108108
from six.moves import reduce
109109

110+
from zipline.currency import MISSING_CURRENCY_CODE
110111
from zipline.data.bar_reader import (
111112
NoDataAfterDate,
112113
NoDataBeforeDate,
@@ -713,18 +714,15 @@ def currency_codes(self, sids):
713714
# Find the index of requested sids in our stored sids.
714715
ixs = self.sids.searchsorted(sids, side='left')
715716

717+
result = self._currency_codes[ixs]
718+
716719
# searchsorted returns the index of the next lowest sid if the lookup
717-
# fails. Check for this case and raise an error.
720+
# fails. Fill these sids with the special "missing" sentinel.
718721
not_found = (self.sids[ixs] != sids)
719722

720-
if not_found.any():
721-
# TODO: Should we return an unknown sentinel here?
722-
missing = sids[not_found]
723-
raise ValueError(
724-
"Unable to find currency codes for sids:\n{}".format(missing)
725-
)
723+
result[not_found] = MISSING_CURRENCY_CODE
726724

727-
return self._currency_codes[ixs]
725+
return result
728726

729727
@property
730728
def last_available_dt(self):

zipline/data/session_bars.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,8 @@ def currency_codes(self, sids):
5151
Returns
5252
-------
5353
currency_codes : np.array[S3]
54-
Array of currency codes for listing currencies of ``sids``.
54+
Array of currency codes for listing currencies of
55+
``sids``. Implementations should return
56+
zipline.currency.MISSING_CURRENCY_CODE for sids whose currency is
57+
unknown.
5558
"""

0 commit comments

Comments
 (0)