Skip to content

Commit 13777f9

Browse files
author
Scott Sanderson
committed
MAINT: Use object arrays with None for currency codes.
Rather than trying to use S3s everywhere, which is annoying in Python 3 and makes it harder to represent missing data, just use object arrays with None as the missing value. This is the representation we want anyway for loading currency data in pipelines, and the main downsides are performance (which doesn't appear to be meaningfully affected) and difficulty with sorting, which we don't need to do (at least right now).
1 parent 393ae91 commit 13777f9

File tree

9 files changed

+37
-48
lines changed

9 files changed

+37
-48
lines changed

tests/data/test_daily_bars.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from toolz import merge
3737
from trading_calendars import get_calendar
3838

39-
from zipline.currency import MISSING_CURRENCY_CODE
4039
from zipline.data.bar_reader import (
4140
NoDataAfterDate,
4241
NoDataBeforeDate,
@@ -188,7 +187,7 @@ def make_equity_daily_bar_currency_codes(cls, country_code, sids):
188187
# Evenly distribute choices among ``sids``.
189188
choices = cls.DAILY_BARS_TEST_CURRENCIES[country_code]
190189
codes = list(islice(cycle(choices), len(sids)))
191-
return Series(index=sids, data=np.array(codes, dtype='S3'))
190+
return Series(index=sids, data=np.array(codes, dtype=object))
192191

193192
@classproperty
194193
def holes(cls):
@@ -531,6 +530,10 @@ def test_listing_currency(self):
531530
).values
532531
assert_equal(all_results, all_expected)
533532

533+
self.assertEqual(all_results.dtype, np.dtype(object))
534+
for code in all_results:
535+
self.assertIsInstance(code, str)
536+
534537
# Check all possible subsets of assets.
535538
for indices in map(list, powerset(range(len(all_assets)))):
536539
# Empty queries aren't currently supported.
@@ -557,10 +560,7 @@ def test_listing_currency_for_nonexistent_asset(self):
557560
# queries.
558561
mixed = np.array(invalid_sids + [valid_sid])
559562
result = self.daily_bar_reader.currency_codes(mixed)
560-
expected = np.array(
561-
[MISSING_CURRENCY_CODE] * 2 + [valid_currency],
562-
dtype='S3'
563-
)
563+
expected = np.array([None] * 2 + [valid_currency])
564564
assert_equal(result, expected)
565565

566566

tests/data/test_fx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def test_read_before_start_date(self):
146146

147147
for rate in self.FX_RATES_RATE_NAMES:
148148
quote = 'USD'
149-
bases = np.array(['CAD'], dtype='S3')
149+
bases = np.array(['CAD'], dtype=object)
150150
dts = pd.DatetimeIndex([bad_date])
151151
with self.assertRaises(ValueError):
152152
self.reader.get_rates(rate, quote, bases, dts)
@@ -157,7 +157,7 @@ def test_read_after_end_date(self):
157157

158158
for rate in self.FX_RATES_RATE_NAMES:
159159
quote = 'USD'
160-
bases = np.array(['CAD'], dtype='S3')
160+
bases = np.array(['CAD'], dtype=object)
161161
dts = pd.DatetimeIndex([bad_date])
162162
with self.assertRaises(ValueError):
163163
self.reader.get_rates(rate, quote, bases, dts)

zipline/currency.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,6 @@
44
_ALL_CURRENCIES = {}
55

66

7-
# Special sentinel used to represent unknown or missing currencies.
8-
MISSING_CURRENCY_CODE = 'XXX'
9-
10-
117
@total_ordering
128
class Currency(object):
139
"""A currency identifier, as defined by ISO-4217.
@@ -28,8 +24,7 @@ def __new__(cls, code):
2824
try:
2925
return _ALL_CURRENCIES[code]
3026
except KeyError:
31-
# This isn't a real
32-
if code == MISSING_CURRENCY_CODE:
27+
if code is None:
3328
name = "NO CURRENCY"
3429
else:
3530
try:

zipline/data/bcolz_daily_bars.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from toolz import compose
3535
from trading_calendars import get_calendar
3636

37-
from zipline.currency import MISSING_CURRENCY_CODE
3837
from zipline.data.session_bars import CurrencyAwareSessionBarReader
3938
from zipline.data.bar_reader import (
4039
NoDataAfterDate,
@@ -708,13 +707,13 @@ def get_value(self, sid, dt, field):
708707

709708
def currency_codes(self, sids):
710709
# XXX: This is pretty inefficient. This reader doesn't really support
711-
# country codes, so we always either return USD or
712-
# MISSING_CURRENCY_CODE if we don't know about the sid at all.
710+
# country codes, so we always either return USD or None if we don't
711+
# know about the sid at all.
713712
first_rows = self._first_rows
714713
out = []
715714
for sid in sids:
716715
if sid in first_rows:
717716
out.append('USD')
718717
else:
719-
out.append(MISSING_CURRENCY_CODE)
720-
return np.array(out, dtype='S3')
718+
out.append(None)
719+
return np.array(out, dtype=object)

zipline/data/fx/hdf5.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@
9999
from logbook import Logger
100100
import numpy as np
101101
import pandas as pd
102-
import six
103102

104103
from zipline.utils.memoize import lazyval
104+
from zipline.utils.numpy_utils import bytes_array_to_native_str_object_array
105105

106106
from .base import FXRateReader, DEFAULT_FX_RATE
107107

@@ -177,13 +177,9 @@ def currencies(self):
177177
"""
178178
# Currencies are stored as fixed-length bytes in the file, but we want
179179
# `str` objects in memory.
180-
byte_strings = self._group[INDEX][CURRENCIES][:]
181-
if six.PY3:
182-
values = [c.decode('ascii') for c in byte_strings]
183-
else:
184-
values = byte_strings.astype(object)
185-
186-
return pd.Index(values)
180+
bytes_array = self._group[INDEX][CURRENCIES][:]
181+
objects = bytes_array_to_native_str_object_array(bytes_array)
182+
return pd.Index(objects)
187183

188184
def get_rates(self, rate, quote, bases, dts):
189185
"""Get rates to convert ``bases`` into ``quote``.
@@ -281,6 +277,13 @@ def _write_metadata(self):
281277
def _write_index_group(self, dts, currencies):
282278
"""Write content of /index.
283279
"""
280+
if not is_sorted_ascending(dts):
281+
raise ValueError("dts is not sorted")
282+
283+
for c in currencies:
284+
if not isinstance(c, str) or len(c) != 3:
285+
raise ValueError("Invalid currency: {!r}".format(c))
286+
284287
index_group = self._group.create_group(INDEX)
285288

286289
self._log_writing(INDEX, DTS)

zipline/data/hdf5_daily_bars.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@
107107
from six import iteritems, raise_from, viewkeys
108108
from six.moves import reduce
109109

110-
from zipline.currency import MISSING_CURRENCY_CODE
111110
from zipline.data.bar_reader import (
112111
NoDataAfterDate,
113112
NoDataBeforeDate,
@@ -116,6 +115,7 @@
116115
)
117116
from zipline.data.session_bars import CurrencyAwareSessionBarReader
118117
from zipline.utils.memoize import lazyval
118+
from zipline.utils.numpy_utils import bytes_array_to_native_str_object_array
119119
from zipline.utils.pandas_utils import check_indexes_all_same
120120

121121

@@ -696,7 +696,8 @@ def asset_end_dates(self):
696696

697697
@lazyval
698698
def _currency_codes(self):
699-
return self._country_group[CURRENCY][CODE][:]
699+
bytes_array = self._country_group[CURRENCY][CODE][:]
700+
return bytes_array_to_native_str_object_array(bytes_array)
700701

701702
def currency_codes(self, sids):
702703
"""Get currencies in which prices are quoted for the requested sids.
@@ -708,7 +709,7 @@ def currency_codes(self, sids):
708709
709710
Returns
710711
-------
711-
currency_codes : np.array[S3]
712+
currency_codes : np.array[object]
712713
Array of currency codes for listing currencies of ``sids``.
713714
"""
714715
# Find the index of requested sids in our stored sids.
@@ -720,7 +721,7 @@ def currency_codes(self, sids):
720721
# fails. Fill these sids with the special "missing" sentinel.
721722
not_found = (self.sids[ixs] != sids)
722723

723-
result[not_found] = MISSING_CURRENCY_CODE
724+
result[not_found] = None
724725

725726
return result
726727

zipline/data/session_bars.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ class CurrencyAwareSessionBarReader(SessionBarReader):
3939

4040
@abstractmethod
4141
def currency_codes(self, sids):
42-
"""Get currencies in which prices are quoted for the requested sids.
42+
"""
43+
Get currencies in which prices are quoted for the requested sids.
4344
4445
Assumes that a sid's prices are always quoted in a single currency.
4546
@@ -50,9 +51,8 @@ def currency_codes(self, sids):
5051
5152
Returns
5253
-------
53-
currency_codes : np.array[S3]
54+
currency_codes : np.array[object]
5455
Array of currency codes for listing currencies of
55-
``sids``. Implementations should return
56-
zipline.currency.MISSING_CURRENCY_CODE for sids whose currency is
57-
unknown.
56+
``sids``. Implementations should return None for sids whose
57+
currency is unknown.
5858
"""

zipline/pipeline/loaders/equity_pricing_loader.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,9 @@
1616
from interface import implements
1717
from numpy import iinfo, uint32, multiply
1818

19-
from zipline.currency import MISSING_CURRENCY_CODE
2019
from zipline.data.fx import ExplodingFXRateReader
2120
from zipline.lib.adjusted_array import AdjustedArray
22-
from zipline.utils.numpy_utils import (
23-
repeat_first_axis,
24-
bytes_array_to_native_str_object_array,
25-
)
21+
from zipline.utils.numpy_utils import repeat_first_axis
2622

2723
from .base import PipelineLoader
2824
from .utils import shift_dates
@@ -123,12 +119,7 @@ def load_adjusted_array(self, domain, columns, dates, sids, mask):
123119
)
124120

125121
for c in currency_cols:
126-
codes_1d = bytes_array_to_native_str_object_array(
127-
self.raw_price_reader.currency_codes(sids)
128-
)
129-
# XXX: Should this just be the contract of `currency_codes`?
130-
codes_1d[codes_1d == MISSING_CURRENCY_CODE] = None
131-
122+
codes_1d = self.raw_price_reader.currency_codes(sids)
132123
codes = repeat_first_axis(codes_1d, len(dates))
133124
out[c] = AdjustedArray(
134125
codes,

zipline/testing/fixtures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2182,7 +2182,7 @@ def write_h5_fx_rates(cls, path):
21822182

21832183
writer.write(
21842184
dts=sessions.values,
2185-
currencies=np.array(cls.FX_RATES_CURRENCIES, dtype='S3'),
2185+
currencies=np.array(cls.FX_RATES_CURRENCIES, dtype=object),
21862186
data=fx_data,
21872187
)
21882188

0 commit comments

Comments
 (0)