Skip to content

Commit 98221e8

Browse files
Joe Jevnikllllllllll
authored andcommitted
ENH: add MutliDimensionalDataSet type
1 parent c3ec965 commit 98221e8

File tree

3 files changed

+576
-3
lines changed

3 files changed

+576
-3
lines changed
Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
from collections import OrderedDict
2+
import itertools
3+
from textwrap import dedent
4+
5+
from nose_parameterized import parameterized
6+
import numpy as np
7+
8+
from zipline.pipeline.data import (
9+
Column,
10+
MultiDimensionalDataSet,
11+
MultiDimensionalDataSetSlice,
12+
)
13+
from zipline.testing import ZiplineTestCase
14+
from zipline.testing.predicates import (
15+
assert_equal,
16+
assert_is,
17+
assert_is_not,
18+
assert_is_not_subclass,
19+
assert_is_subclass,
20+
assert_raises_str,
21+
)
22+
23+
24+
class TestMultiDimensionalDataSet(ZiplineTestCase):
25+
def test_cache(self):
26+
class MD1(MultiDimensionalDataSet):
27+
extra_dims = [('dim_0', ['a', 'b', 'c'])]
28+
29+
class MD2(MultiDimensionalDataSet):
30+
extra_dims = [('dim_0', ['a', 'b', 'c'])]
31+
32+
MD1Slice = MD1.slice(dim_0='a')
33+
MD2Slice = MD2.slice(dim_0='a')
34+
35+
assert_equal(MD1Slice.extra_coords, MD2Slice.extra_coords)
36+
assert_is_not(MD1Slice, MD2Slice)
37+
38+
def test_empty_extra_dims(self):
39+
expected_msg = (
40+
'MultiDimensionalDataSet must be defined with non-empty extra_dims'
41+
)
42+
with assert_raises_str(ValueError, expected_msg):
43+
class MD(MultiDimensionalDataSet):
44+
extra_dims = []
45+
46+
def spec(*cs):
47+
return (cs,)
48+
49+
@parameterized.expand([
50+
spec(
51+
('dim_0', range(10))
52+
),
53+
spec(
54+
('dim_0', range(10)),
55+
('dim_1', range(10, 15)),
56+
),
57+
spec(
58+
('dim_0', range(10)),
59+
('dim_1', range(10, 15)),
60+
('dim_2', range(5, 15)),
61+
),
62+
spec(
63+
('dim_0', range(6)),
64+
('dim_1', {'a', 'b', 'c'}),
65+
('dim_2', range(5, 15)),
66+
('dim_3', {'b', 'c', 'e'}),
67+
),
68+
])
69+
def test_valid_slice(self, dims_spec):
70+
class MD(MultiDimensionalDataSet):
71+
extra_dims = dims_spec
72+
73+
f8 = Column('f8')
74+
i8 = Column('i8', missing_value=0)
75+
ob = Column('O')
76+
M8 = Column('M8[ns]')
77+
boolean = Column('?')
78+
79+
expected_dims = OrderedDict([(k, frozenset(v)) for k, v in dims_spec])
80+
assert_equal(MD.extra_dims, expected_dims)
81+
82+
for valid_combination in itertools.product(*expected_dims.values()):
83+
Slice = MD.slice(*valid_combination)
84+
alternate_constructions = [
85+
# all positional
86+
MD.slice(*valid_combination),
87+
# all keyword
88+
MD.slice(**dict(zip(expected_dims.keys(), valid_combination))),
89+
# mix keyword/positional
90+
MD.slice(
91+
*valid_combination[:len(valid_combination) // 2],
92+
**dict(
93+
list(zip(expected_dims.keys(), valid_combination))[
94+
len(valid_combination) // 2:
95+
],
96+
)
97+
),
98+
]
99+
for alt in alternate_constructions:
100+
assert_is(Slice, alt, msg='Slices are not properly memoized')
101+
102+
expected_coords = OrderedDict(
103+
zip(expected_dims, valid_combination),
104+
)
105+
assert_equal(Slice.extra_coords, expected_coords)
106+
107+
assert_is(Slice.parent_multidimensional_dataset, MD)
108+
109+
assert_is_subclass(Slice, MultiDimensionalDataSetSlice)
110+
111+
expected_columns = {
112+
('f8', np.dtype('f8'), Slice),
113+
('i8', np.dtype('i8'), Slice),
114+
('ob', np.dtype('O'), Slice),
115+
('M8', np.dtype('M8[ns]'), Slice),
116+
('boolean', np.dtype('?'), Slice),
117+
}
118+
actual_columns = {
119+
(c.name, c.dtype, c.dataset) for c in Slice.columns
120+
}
121+
assert_equal(actual_columns, expected_columns)
122+
123+
del spec
124+
125+
def test_slice_unknown_dims(self):
126+
class MD(MultiDimensionalDataSet):
127+
extra_dims = [
128+
('dim_0', {'a', 'b', 'c'}),
129+
('dim_1', {'c', 'd', 'e'}),
130+
]
131+
132+
def expect_slice_fails(*args, **kwargs):
133+
expected_msg = kwargs.pop('expected_msg')
134+
135+
with assert_raises_str(TypeError, expected_msg):
136+
MD.slice(*args, **kwargs)
137+
138+
# insufficient positional
139+
expect_slice_fails(
140+
expected_msg=(
141+
'no coordinate provided for the following dimensions:'
142+
' dim_0, dim_1'
143+
),
144+
)
145+
expect_slice_fails(
146+
'a',
147+
expected_msg=(
148+
'no coordinate provided for the following dimension: dim_1'
149+
),
150+
)
151+
152+
# too many positional
153+
expect_slice_fails(
154+
'a', 'b', 'c',
155+
expected_msg='MD has 2 extra dimensions but 3 were given',
156+
)
157+
158+
# mismatched keys
159+
expect_slice_fails(
160+
dim_2='??',
161+
expected_msg='MD does not have the following dimension: dim_2',
162+
)
163+
expect_slice_fails(
164+
dim_1='??', dim_2='??',
165+
expected_msg='MD does not have the following dimension: dim_2',
166+
)
167+
expect_slice_fails(
168+
dim_0='??', dim_1='??', dim_2='??',
169+
expected_msg='MD does not have the following dimension: dim_2',
170+
)
171+
172+
# the extra keyword dims should be sorted
173+
expect_slice_fails(
174+
dim_3='??', dim_2='??',
175+
expected_msg=(
176+
'MD does not have the following dimensions: dim_2, dim_3'
177+
),
178+
)
179+
180+
def test_slice_unknown_dim_label(self):
181+
class MD(MultiDimensionalDataSet):
182+
extra_dims = [
183+
('dim_0', {'a', 'b', 'c'}),
184+
('dim_1', {'c', 'd', 'e'}),
185+
]
186+
187+
def expect_slice_fails(*args, **kwargs):
188+
expected_msg = kwargs.pop('expected_msg')
189+
190+
with assert_raises_str(ValueError, expected_msg):
191+
MD.slice(*args, **kwargs)
192+
193+
expect_slice_fails(
194+
'not-in-0', 'c',
195+
expected_msg="'not-in-0' is not a value along the dim_0 dimension",
196+
)
197+
expect_slice_fails(
198+
dim_0='not-in-0', dim_1='c',
199+
expected_msg="'not-in-0' is not a value along the dim_0 dimension",
200+
)
201+
202+
expect_slice_fails(
203+
'a', 'not-in-1',
204+
expected_msg="'not-in-1' is not a value along the dim_1 dimension",
205+
)
206+
expect_slice_fails(
207+
dim_0='a', dim_1='not-in-1',
208+
expected_msg="'not-in-1' is not a value along the dim_1 dimension",
209+
)
210+
211+
def test_inheritence(self):
212+
class Parent(MultiDimensionalDataSet):
213+
extra_dims = [
214+
('dim_0', {'a', 'b', 'c'}),
215+
('dim_1', {'d', 'e', 'f'}),
216+
]
217+
218+
column_0 = Column('f8')
219+
column_1 = Column('?')
220+
221+
class Child(Parent):
222+
column_2 = Column('O')
223+
column_3 = Column('i8', -1)
224+
225+
assert_is_subclass(Child, Parent)
226+
assert_equal(Child.extra_dims, Parent.extra_dims)
227+
228+
ParentSlice = Parent.slice(dim_0='a', dim_1='d')
229+
ChildSlice = Child.slice(dim_0='a', dim_1='d')
230+
231+
assert_is_not_subclass(ChildSlice, ParentSlice)
232+
233+
expected_child_slice_columns = frozenset({
234+
ChildSlice.column_0,
235+
ChildSlice.column_1,
236+
ChildSlice.column_2,
237+
ChildSlice.column_3,
238+
})
239+
assert_equal(ChildSlice.columns, expected_child_slice_columns)
240+
241+
def test_column_access_without_slice(self):
242+
class Parent(MultiDimensionalDataSet):
243+
extra_dims = [
244+
('dim_0', {'a', 'b', 'c'}),
245+
('dim_1', {'d', 'e', 'f'}),
246+
]
247+
248+
column_0 = Column('f8')
249+
column_1 = Column('?')
250+
251+
class Child(Parent):
252+
column_2 = Column('O')
253+
column_3 = Column('i8', -1)
254+
255+
def make_expected_msg(ds, attr):
256+
return dedent(
257+
"""\
258+
Attempted to access column from a MultiDimensionalDataSet.
259+
You must first slice the dataset along the extra dimensions like:
260+
261+
%s.slice(...).%s
262+
""", # noqa
263+
) % (ds, attr)
264+
265+
expected_msg = make_expected_msg('Parent', 'column_0')
266+
with assert_raises_str(AttributeError, expected_msg):
267+
Parent.column_0
268+
269+
expected_msg = make_expected_msg('Parent', 'column_1')
270+
with assert_raises_str(AttributeError, expected_msg):
271+
Parent.column_1
272+
273+
expected_msg = make_expected_msg('Child', 'column_0')
274+
with assert_raises_str(AttributeError, expected_msg):
275+
Child.column_0
276+
277+
expected_msg = make_expected_msg('Child', 'column_1')
278+
with assert_raises_str(AttributeError, expected_msg):
279+
Child.column_1
280+
281+
expected_msg = make_expected_msg('Child', 'column_2')
282+
with assert_raises_str(AttributeError, expected_msg):
283+
Child.column_2
284+
285+
expected_msg = make_expected_msg('Child', 'column_3')
286+
with assert_raises_str(AttributeError, expected_msg):
287+
Child.column_3

zipline/pipeline/data/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
from .equity_pricing import EquityPricing, USEquityPricing
2-
from .dataset import DataSet, Column, BoundColumn
2+
from .dataset import (
3+
BoundColumn,
4+
Column,
5+
DataSet,
6+
MultiDimensionalDataSet,
7+
MultiDimensionalDataSetSlice,
8+
)
39

410
__all__ = [
511
'BoundColumn',
612
'Column',
713
'DataSet',
814
'EquityPricing',
15+
'MultiDimensionalDataSet',
16+
'MultiDimensionalDataSetSlice',
917
'USEquityPricing',
1018
]

0 commit comments

Comments
 (0)