2121
2222from pm4py .algo .discovery .batches .utils import detection
2323from pm4py .util import exec_utils , constants , xes_constants
24+ import numpy as np
2425
2526
2627class Parameters (Enum ):
@@ -29,6 +30,7 @@ class Parameters(Enum):
2930 START_TIMESTAMP_KEY = constants .PARAMETER_CONSTANT_START_TIMESTAMP_KEY
3031 TIMESTAMP_KEY = constants .PARAMETER_CONSTANT_TIMESTAMP_KEY
3132 CASE_ID_KEY = constants .PARAMETER_CONSTANT_CASEID_KEY
33+ EVENT_ID_KEY = "event_id_key"
3234 MERGE_DISTANCE = "merge_distance"
3335 MIN_BATCH_SIZE = "min_batch_size"
3436
@@ -84,22 +86,38 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
8486 start_timestamp_key = exec_utils .get_param_value (Parameters .START_TIMESTAMP_KEY , parameters ,
8587 timestamp_key )
8688 case_id_key = exec_utils .get_param_value (Parameters .CASE_ID_KEY , parameters , constants .CASE_CONCEPT_NAME )
89+ event_id_key = exec_utils .get_param_value (Parameters .EVENT_ID_KEY , parameters , constants .DEFAULT_INDEX_KEY )
8790
88- log = log [list ({activity_key , resource_key , start_timestamp_key , timestamp_key , case_id_key })]
89- events = log .to_dict ('records' )
91+ attributes_to_consider = {activity_key , resource_key , start_timestamp_key , timestamp_key , case_id_key }
92+ log_contains_evidkey = event_id_key in log
93+ if log_contains_evidkey :
94+ attributes_to_consider .add (event_id_key )
9095
91- actres_grouping = {}
92-
93- for ev in events :
94- case = ev [case_id_key ]
95- activity = ev [activity_key ]
96- resource = ev [resource_key ]
97- st = ev [start_timestamp_key ].timestamp ()
98- et = ev [timestamp_key ].timestamp ()
96+ log = log [list (attributes_to_consider )]
97+ log [timestamp_key ] = log [timestamp_key ].values .astype (np .int64 ) // 10 ** 9
98+ if start_timestamp_key != timestamp_key :
99+ log [start_timestamp_key ] = log [start_timestamp_key ].values .astype (np .int64 ) // 10 ** 9
99100
100- if (activity , resource ) not in actres_grouping :
101- actres_grouping [(activity , resource )] = []
101+ actres_grouping0 = log .groupby ([activity_key , resource_key ]).agg (list ).to_dict ()
102+ start_timestamps = actres_grouping0 [start_timestamp_key ]
103+ complete_timestamps = actres_grouping0 [timestamp_key ]
104+ cases = actres_grouping0 [case_id_key ]
105+ if log_contains_evidkey :
106+ events_ids = actres_grouping0 [event_id_key ]
102107
103- actres_grouping [(activity , resource )].append ((st , et , case ))
108+ actres_grouping = {}
109+ for k in start_timestamps :
110+ st = start_timestamps [k ]
111+ et = complete_timestamps [k ]
112+ c = cases [k ]
113+ if log_contains_evidkey :
114+ eid = events_ids [k ]
115+ actres_grouping_k = []
116+ for i in range (len (st )):
117+ if log_contains_evidkey :
118+ actres_grouping_k .append ((st [i ], et [i ], c [i ], eid [i ]))
119+ else :
120+ actres_grouping_k .append ((st [i ], et [i ], c [i ]))
121+ actres_grouping [k ] = actres_grouping_k
104122
105123 return detection .detect (actres_grouping , parameters = parameters )
0 commit comments