Skip to content

Commit 5114ce1

Browse files
committed
rebuild and retest
1 parent ae86323 commit 5114ce1

File tree

16 files changed

+11789
-11475
lines changed

16 files changed

+11789
-11475
lines changed

build/lib/data_algebra/arrow.py

Lines changed: 33 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
import data_algebra.data_ops
44
import data_algebra.flow_text
5+
from data_algebra.shift_pipe_action import ShiftPipeAction
56

67

7-
class Arrow(abc.ABC):
8+
class Arrow(ShiftPipeAction):
89
"""
910
Arrow from category theory: see Steve Awody,
1011
"Category Theory, 2nd Edition", Oxford Univ. Press, 2010 pg. 4.
@@ -13,7 +14,7 @@ class Arrow(abc.ABC):
1314
"""
1415

1516
def __init__(self):
16-
pass
17+
ShiftPipeAction.__init__(self)
1718

1819
@abc.abstractmethod
1920
def dom(self):
@@ -23,28 +24,16 @@ def dom(self):
2324
def cod(self):
2425
"""return co-domain, object at head of arrow"""
2526

26-
@abc.abstractmethod
27-
def apply_to(self, b):
28-
"""apply_to b, compose arrows (right to left)"""
29-
3027
# noinspection PyPep8Naming
3128
@abc.abstractmethod
32-
def act_on(self, X):
33-
"""act on X, must associate with composition"""
29+
def act_on(self, b):
30+
"""act on b, must associate with composition"""
3431

3532
# noinspection PyPep8Naming
3633
def transform(self, X):
3734
"""transform X, may or may not associate with composition"""
3835
return self.act_on(X)
3936

40-
def __rshift__(self, other): # override self >> other
41-
return other.apply_to(self)
42-
43-
def __rrshift__(self, other): # override other >> self
44-
if isinstance(other, Arrow):
45-
return self.apply_to(other)
46-
return self.act_on(other)
47-
4837

4938
class DataOpArrow(Arrow):
5039
"""
@@ -79,39 +68,44 @@ def get_feature_names(self):
7968
cp = self.outgoing_columns.copy()
8069
return cp
8170

82-
def apply_to(self, b):
83-
"""replace self input table with b"""
71+
def act_on(self, b, *, correct_ordered_first_call: bool = False):
72+
"""
73+
Apply self onto b.
74+
75+
:param b: item to act on, or item that has been sent to self.
76+
:param correct_ordered_first_call: if True indicates this call is from __rshift__ or __rrshift__ and not the fallback paths.
77+
"""
78+
assert isinstance(correct_ordered_first_call, bool)
8479
if isinstance(b, data_algebra.data_ops.ViewRepresentation):
8580
b = DataOpArrow(b)
86-
assert isinstance(b, DataOpArrow)
87-
# check categorical arrow composition conditions
88-
missing = set(self.incoming_columns) - set(b.outgoing_columns)
89-
if len(missing) > 0:
90-
raise ValueError("missing required columns: " + str(missing))
91-
excess = set(b.outgoing_columns) - set(self.incoming_columns)
92-
if len(excess) > 0:
93-
raise ValueError("extra incoming columns: " + str(excess))
94-
new_pipeline = self.pipeline.replace_leaves({self.free_table_key: b.pipeline})
95-
new_pipeline.get_tables() # check tables are compatible
96-
res = DataOpArrow(
97-
pipeline=new_pipeline,
98-
free_table_key=b.free_table_key,
99-
)
100-
return res
101-
102-
# noinspection PyPep8Naming
103-
def act_on(self, X):
81+
if isinstance(b, DataOpArrow):
82+
# check categorical arrow composition conditions
83+
missing = set(self.incoming_columns) - set(b.outgoing_columns)
84+
if len(missing) > 0:
85+
raise ValueError("missing required columns: " + str(missing))
86+
excess = set(b.outgoing_columns) - set(self.incoming_columns)
87+
if len(excess) > 0:
88+
raise ValueError("extra incoming columns: " + str(excess))
89+
new_pipeline = self.pipeline.replace_leaves({self.free_table_key: b.pipeline})
90+
new_pipeline.get_tables() # check tables are compatible
91+
res = DataOpArrow(
92+
pipeline=new_pipeline,
93+
free_table_key=b.free_table_key,
94+
)
95+
return res
96+
if correct_ordered_first_call and isinstance(b, ShiftPipeAction):
97+
return b.act_on(self, correct_ordered_first_call=False) # fall back
10498
# assume a pandas.DataFrame compatible object
10599
# noinspection PyUnresolvedReferences
106-
cols = set(X.columns)
100+
cols = set(b.columns)
107101
missing = set(self.incoming_columns) - cols
108102
if len(missing) > 0:
109103
raise ValueError("missing required columns: " + str(missing))
110104
excess = cols - set(self.incoming_columns)
111105
assert len(excess) == 0
112106
if len(excess) > 0:
113-
X = X[self.incoming_columns]
114-
return self.pipeline.act_on(X)
107+
b = b[self.incoming_columns]
108+
return self.pipeline.act_on(b)
115109

116110
def dom(self):
117111
return DataOpArrow(

build/lib/data_algebra/cdata.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import data_algebra.data_model
1111
import data_algebra.util
1212
import data_algebra.data_ops
13+
from data_algebra.shift_pipe_action import ShiftPipeAction
1314

1415

1516
def _str_list_to_html(lst: Iterable[str]) -> str:
@@ -257,7 +258,7 @@ def map_from_rows(self):
257258
return RecordMap(blocks_out=self, strict=self.strict)
258259

259260

260-
class RecordMap:
261+
class RecordMap(ShiftPipeAction):
261262
"""
262263
Class for specifying general record to record transforms.
263264
"""
@@ -280,6 +281,7 @@ def __init__(
280281
:param blocks_out: outgoing record specification, None for row-records.
281282
:param strict: if True insist block be strict, and in and out blocks agree on row-form columns.∂
282283
"""
284+
ShiftPipeAction.__init__(self)
283285
assert isinstance(strict, bool)
284286
self.strict = strict
285287
if blocks_in is not None:
@@ -426,10 +428,6 @@ def transform(
426428
blocks_out=self.blocks_out,
427429
)
428430
return X
429-
430-
def act_on(self, X):
431-
assert isinstance(X, data_algebra.data_ops.ViewRepresentation)
432-
return X.convert_records(self)
433431

434432
def compose(self, other):
435433
"""
@@ -534,12 +532,17 @@ def inverse(self):
534532
"""
535533
assert self.strict
536534
return RecordMap(blocks_in=self.blocks_out, blocks_out=self.blocks_in, strict=True)
537-
538-
def __rshift__(self, other): # override self >> other
539-
return self.act_on(other)
540-
541-
def __rrshift__(self, other): # override other >> self
542-
return self.transform(other)
535+
536+
def act_on(self, b, *, correct_ordered_first_call: bool = False):
537+
assert isinstance(correct_ordered_first_call, bool)
538+
if isinstance(b, RecordMap):
539+
self.compose(b)
540+
if isinstance(b, data_algebra.data_ops.ViewRepresentation):
541+
return b.convert_records(self)
542+
if correct_ordered_first_call and isinstance(b, ShiftPipeAction):
543+
return b.act_on(self, correct_ordered_first_call=False) # fall back to peer's action
544+
# assume table like
545+
return self.transform(b)
543546

544547
def fmt(self) -> str:
545548
"""Format for informal presentation."""

build/lib/data_algebra/data_ops.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ordered_union,
2525
)
2626
import data_algebra.util
27+
from data_algebra.shift_pipe_action import ShiftPipeAction
2728

2829

2930
_have_black = False
@@ -411,36 +412,38 @@ def __ne__(self, other):
411412

412413
# composition
413414

414-
# noinspection PyPep8Naming
415-
def act_on(self, X, *, data_model=None):
415+
def act_on(self, b, *, correct_ordered_first_call: bool = False):
416416
"""
417-
apply self to X, must associate with composition
417+
apply self to b, must associate with composition
418418
Operator is strict about column names.
419419
420-
:param X: input data frame
421-
:param data_model implementation to use
422-
:return: transformed result
420+
:param b: input data frame
421+
:param correct_ordered_first_call: indicate not on fallback path
422+
:return: transformed or composed result
423423
"""
424+
assert isinstance(correct_ordered_first_call, bool)
424425
tables = self.get_tables()
425-
if isinstance(X, ViewRepresentation):
426+
if isinstance(b, ViewRepresentation):
426427
# insert to only table or if more than one, table with matching key
427428
if len(tables) == 1:
428429
key = list(tables.keys())[0]
429430
else:
430-
key = X.key
431+
key = b.key
431432
assert isinstance(key, str)
432433
old = tables[key]
433-
assert set(X.column_names) == set(old.column_names) # this is defending associativity of composition against table narrowing
434-
return self.replace_leaves({key: X})
434+
assert set(b.column_names) == set(old.column_names) # this is defending associativity of composition against table narrowing
435+
return self.replace_leaves({key: b})
436+
# see if b is ShiftPipeAction, so it can handle the mapping (using fact data is not a ShiftPipeAction instance)
437+
if correct_ordered_first_call and isinstance(b, ShiftPipeAction):
438+
return b.act_on(self, correct_ordered_first_call=False)
435439
# assume a table
436440
assert len(tables) == 1
437441
key = list(tables.keys())[0]
438442
assert isinstance(key, str)
439443
old = tables[key]
440-
assert set(X.columns) == set(old.column_names)
444+
assert set(b.columns) == set(old.column_names)
441445
return self.transform(
442-
X=X,
443-
data_model=data_model,
446+
b,
444447
strict=True,
445448
)
446449

build/lib/data_algebra/data_ops_types.py

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import data_algebra.expr_rep
99
import data_algebra.cdata
1010
import data_algebra.OrderedSet
11+
from data_algebra.shift_pipe_action import ShiftPipeAction
1112

1213

1314
class MethodUse(NamedTuple):
@@ -19,12 +20,13 @@ class MethodUse(NamedTuple):
1920
is_ordered: bool = False
2021

2122

22-
class OperatorPlatform(abc.ABC):
23+
class OperatorPlatform(ShiftPipeAction):
2324
"""Abstract class representing ability to apply data_algebra operations."""
2425

2526
node_name: str
2627

2728
def __init__(self, *, node_name: str):
29+
ShiftPipeAction.__init__(self)
2830
assert isinstance(node_name, str)
2931
self.node_name = node_name
3032

@@ -73,24 +75,6 @@ def replace_leaves(self, replacement_map: Dict[str, Any]):
7375
:return: new operator DAG
7476
"""
7577

76-
# noinspection PyPep8Naming
77-
@abc.abstractmethod
78-
def act_on(self, X, *, data_model=None):
79-
"""
80-
apply self to X, must associate with composition
81-
Operator is strict about column names.
82-
83-
:param X: input data frame
84-
:param data_model implementation to use
85-
:return: transformed result
86-
"""
87-
88-
def __rshift__(self, other): # override self >> other
89-
return other.act_on(self)
90-
91-
def __rrshift__(self, other): # override other >> self
92-
return self.act_on(other)
93-
9478
# imitate a method
9579
def use(self, user_function, *args, **kwargs):
9680
"""
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
2+
import abc
3+
4+
5+
class ShiftPipeAction(abc.ABC):
6+
"""
7+
Class representing mapping a >> b to b.act_on(a).
8+
This is read as "sending a to b".
9+
"""
10+
def __init__(self) -> None:
11+
pass
12+
13+
@abc.abstractmethod
14+
def act_on(self, b, *, correct_ordered_first_call: bool = False):
15+
"""
16+
Apply self onto b.
17+
18+
:param b: item to act on, or item that has been sent to self.
19+
:param correct_ordered_first_call: if True indicates this call is from __rshift__ or __rrshift__ and not the fallback paths.
20+
"""
21+
22+
def __rshift__(self, b): # override self >> b
23+
"""
24+
Delegate self >> b to b.act_on(self) b is a ShiftPipeAction instance, else call self.act_on(b)
25+
This is read as "sending self to b".
26+
"""
27+
if isinstance(b, ShiftPipeAction):
28+
# this is the expected path
29+
return b.act_on(self, correct_ordered_first_call=True)
30+
# fall back to our action
31+
return self.act_on(b, correct_ordered_first_call=False)
32+
33+
def __rrshift__(self, b): # override b >> self
34+
"""
35+
Delegate b >> self to self.act_on(b).
36+
This is read as sending b to self.
37+
"""
38+
return self.act_on(b, correct_ordered_first_call=True)

0 commit comments

Comments
 (0)