Skip to content

Commit f26cb5c

Browse files
author
Youcef Sebiat
authored
Merge pull request #24 from X-DataInitiative/CNAM-414-Detailed-Flowchart
Cnam 414 detailed flowchart.
2 parents 4053cce + afad2dc commit f26cb5c

File tree

4 files changed

+348
-17
lines changed

4 files changed

+348
-17
lines changed

src/exploration/core/flowchart.py

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
2+
import warnings
23
from copy import copy
3-
from typing import List
4+
from typing import Dict, List
45

56
from .cohort import Cohort
67
from .metadata import Metadata
@@ -10,7 +11,7 @@ def metadata_from_flowchart(metadata: Metadata, flowchart_json: str) -> Metadata
1011
flowchart_description = json.loads(flowchart_json)
1112
intermediate = flowchart_description[
1213
"intermediate_operations"
13-
] # type: Dict[str, Dict] # noqa:F821
14+
] # type: Dict[str, Dict]
1415
updated_metadata = copy(metadata)
1516
for (_, description) in intermediate.items():
1617
new_cohort = metadata.get_from_description(description)
@@ -33,22 +34,62 @@ class Flowchart:
3334
n-1.
3435
"""
3536

36-
def __init__(self, steps: List[Cohort]):
37-
self.steps = steps # type: List[Cohort]
37+
def __init__(self, cohorts: List[Cohort]):
38+
self._ordered_cohorts = None
39+
self.ordered_cohorts = cohorts
40+
self._compute_steps()
41+
42+
def __iter__(self):
43+
return iter(self.steps)
44+
45+
def __len__(self):
46+
return len(self.ordered_cohorts)
47+
48+
@property
49+
def ordered_cohorts(self):
50+
return self._ordered_cohorts
51+
52+
@ordered_cohorts.setter
53+
def ordered_cohorts(self, value: List[Cohort]):
54+
self._ordered_cohorts = value
55+
56+
def _compute_steps(self):
57+
steps_length = self.__len__()
58+
if steps_length == 0:
59+
warnings.warn("You are initiating a en Empty Flowchart.")
60+
self.steps = []
61+
elif steps_length == 1:
62+
self.steps = self.ordered_cohorts
63+
elif steps_length == 2:
64+
self.steps = [
65+
self.ordered_cohorts[0],
66+
self.ordered_cohorts[0].intersection(self.ordered_cohorts[1]),
67+
]
68+
else:
69+
new_steps = [self.ordered_cohorts[0]]
70+
for step in self.ordered_cohorts[1:]:
71+
new_steps.append(new_steps[-1].intersection(step))
72+
self.steps = new_steps
73+
74+
def prepend_cohort(self, input: Cohort) -> "Flowchart":
75+
"""
76+
Create a new Flowchart where input is pre-appended to the existing Flowchart.
77+
Parameters
78+
----------
79+
input : Cohort to be pre-appended.
80+
81+
Returns
82+
-------
83+
A new Flowchart object where the new first step is the input Cohort and the
84+
subsequent steps are the current steps.
85+
"""
86+
new_steps = [input]
87+
new_steps.extend(self.ordered_cohorts)
88+
return Flowchart(new_steps)
3889

3990
@staticmethod
4091
def from_json(metadata: Metadata, flowchart_json: str) -> "Flowchart":
4192
steps = get_steps(flowchart_json) # type: List[str]
4293
metadata_flow_chart = metadata_from_flowchart(metadata, flowchart_json)
4394
new_metadata = metadata.union(metadata_flow_chart) # type: Metadata
4495
return Flowchart([new_metadata.get(step) for step in steps])
45-
46-
def create_flowchart(self, input: Cohort) -> "Flowchart":
47-
"""Create a flowchart for the input."""
48-
new_steps = [input.intersection(self.steps[0])] # type: List[Cohort]
49-
for step in self.steps[1:]:
50-
new_steps.append(new_steps[-1].intersection(step))
51-
return Flowchart(new_steps)
52-
53-
def __iter__(self):
54-
return iter(self.steps)

src/exploration/core/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ def fold_right(f: Callable, cohorts: Iterable):
1313

1414

1515
def data_frame_equality(df1: DataFrame, df2: Optional[DataFrame]) -> bool:
16+
df1.schema.fieldNames()
1617
if isinstance(df1, DataFrame) and (isinstance(df2, DataFrame)):
17-
return (df1.subtract(df2).count() == 0) and (df2.subtract(df1).count() == 0)
18+
return (df1.subtract(df2.select(df1.schema.fieldNames())).count() == 0) and (
19+
df2.subtract(df1.select(df2.schema.fieldNames())).count() == 0
20+
)
1821
else:
1922
return False
2023

0 commit comments

Comments
 (0)