Skip to content

Commit 235c092

Browse files
feat(pm4py): preparing for 2.2.27
2 parents 87cd139 + 0c0b8dd commit 235c092

File tree

11 files changed

+114
-33
lines changed

11 files changed

+114
-33
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
11
# Changelog of pm4py
22

3+
## pm4py 2.2.27 (2022.08.XX)
4+
5+
### Added
6+
* 58e266610e82cfcc41868313f7b9ccfd9975d49c
7+
* discover_objects_graph utility for OCELs.
8+
9+
### Changed
10+
* 1cbd37ac4b54a4c0e943b506ed685435f003640b
11+
* performance improvement batch detection on Pandas dataframes.
12+
* 94dd96e0095f7cb1ef8d1eb48bd3da0a3cd85793
13+
* minor changes to DFG variants simulation.
14+
15+
### Deprecated
16+
17+
### Fixed
18+
* 98fd3c740d8b6ae2dfde4d7a018f181030f22175
19+
* fixed reflexivity in EventLog eventually_follows filter.
20+
* 9423897cdf0ea293ff1b032a0d4fa49ba746709c
21+
* fixed chunk_regex XES importer.
22+
23+
### Removed
24+
25+
### Other
26+
327
## pm4py 2.2.26 (2022.08.05)
428

529
### Added
@@ -8,6 +32,7 @@
832

933
### Changed
1034
* 5c06d520182317d140bd1b82d9d986c3edc81cf7
35+
6a2eb404ba240b2c04eb91e7cf1407f72c5ae3e5
1136
* minor fixes to DFG simulation
1237
* fe1aa9c5efa7dc274e728a769625a784d7f87c6f
1338
* added default option for background color setup

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# The short X.Y version
2727
version = '2.2'
2828
# The full version, including alpha/beta/rc tags
29-
release = '2.2.26'
29+
release = '2.2.27'
3030

3131
# -- General configuration ---------------------------------------------------
3232

pm4py/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,6 @@
6161
from pm4py.write import write_xes, write_petri_net, write_process_tree, write_dfg, write_bpmn, write_pnml, write_ptml, write_ocel
6262
from pm4py.org import discover_handover_of_work_network, discover_activity_based_resource_similarity, discover_subcontracting_network, discover_working_together_network, discover_organizational_roles, discover_network_analysis
6363
from pm4py.ml import split_train_test, get_prefixes_from_log
64-
from pm4py.ocel import ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count, ocel_temporal_summary, ocel_objects_summary
64+
from pm4py.ocel import ocel_get_object_types, ocel_get_attribute_names, ocel_flattening, ocel_object_type_activities, ocel_objects_ot_count, ocel_temporal_summary, ocel_objects_summary, discover_objects_graph
6565

6666
time.clock = time.process_time

pm4py/algo/discovery/batches/utils/detection.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from typing import Tuple, List, Any, Set, Optional, Dict, Union
1919

2020
from pm4py.util import exec_utils
21+
import heapq
22+
from copy import copy
2123

2224

2325
class Parameters(Enum):
@@ -78,18 +80,15 @@ def __merge_near_intervals(intervals: List[Tuple[float, float, Set[Any]]], max_a
7880
# decide to merge interval i and i+1
7981
new_interval = (min(intervals[i][0], intervals[i + 1][0]), max(intervals[i][1], intervals[i + 1][1]),
8082
intervals[i][2].union(intervals[i + 1][2]))
81-
# add the new interval to the list
82-
intervals.append(new_interval)
8383
# remove the i+1 interval
8484
del intervals[i + 1]
8585
# remove the i interval
8686
del intervals[i]
87-
# sort the intervals
88-
intervals.sort()
87+
# add the new interval to the list
88+
heapq.heappush(intervals, new_interval)
8989
# set the variable continue_cycle to True
9090
continue_cycle = True
91-
# interrupt the current iteration on the intervals
92-
break
91+
i = i - 1
9392
i = i + 1
9493
return intervals
9594

@@ -152,9 +151,9 @@ def __detect_single(events: List[Tuple[float, float, str]], parameters: Optional
152151
merge_distance = exec_utils.get_param_value(Parameters.MERGE_DISTANCE, parameters, 15 * 60)
153152
min_batch_size = exec_utils.get_param_value(Parameters.MIN_BATCH_SIZE, parameters, 2)
154153

155-
intervals = [(e[0], e[1], {(e[0], e[1], e[2])}) for e in
154+
intervals = [(e[0], e[1], {copy(e)}) for e in
156155
events]
157-
intervals.sort()
156+
heapq.heapify(intervals)
158157
intervals = __merge_overlapping_intervals(intervals)
159158
intervals = __merge_near_intervals(intervals, merge_distance)
160159
batches = [x for x in intervals if len(x[2]) >= min_batch_size]

pm4py/algo/discovery/batches/variants/pandas.py

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from pm4py.algo.discovery.batches.utils import detection
2323
from pm4py.util import exec_utils, constants, xes_constants
24+
import numpy as np
2425

2526

2627
class Parameters(Enum):
@@ -29,6 +30,7 @@ class Parameters(Enum):
2930
START_TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_START_TIMESTAMP_KEY
3031
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
3132
CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
33+
EVENT_ID_KEY = "event_id_key"
3234
MERGE_DISTANCE = "merge_distance"
3335
MIN_BATCH_SIZE = "min_batch_size"
3436

@@ -84,22 +86,38 @@ def apply(log: pd.DataFrame, parameters: Optional[Dict[Union[str, Parameters], A
8486
start_timestamp_key = exec_utils.get_param_value(Parameters.START_TIMESTAMP_KEY, parameters,
8587
timestamp_key)
8688
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
89+
event_id_key = exec_utils.get_param_value(Parameters.EVENT_ID_KEY, parameters, constants.DEFAULT_INDEX_KEY)
8790

88-
log = log[list({activity_key, resource_key, start_timestamp_key, timestamp_key, case_id_key})]
89-
events = log.to_dict('records')
91+
attributes_to_consider = {activity_key, resource_key, start_timestamp_key, timestamp_key, case_id_key}
92+
log_contains_evidkey = event_id_key in log
93+
if log_contains_evidkey:
94+
attributes_to_consider.add(event_id_key)
9095

91-
actres_grouping = {}
92-
93-
for ev in events:
94-
case = ev[case_id_key]
95-
activity = ev[activity_key]
96-
resource = ev[resource_key]
97-
st = ev[start_timestamp_key].timestamp()
98-
et = ev[timestamp_key].timestamp()
96+
log = log[list(attributes_to_consider)]
97+
log[timestamp_key] = log[timestamp_key].values.astype(np.int64) // 10**9
98+
if start_timestamp_key != timestamp_key:
99+
log[start_timestamp_key] = log[start_timestamp_key].values.astype(np.int64) // 10**9
99100

100-
if (activity, resource) not in actres_grouping:
101-
actres_grouping[(activity, resource)] = []
101+
actres_grouping0 = log.groupby([activity_key, resource_key]).agg(list).to_dict()
102+
start_timestamps = actres_grouping0[start_timestamp_key]
103+
complete_timestamps = actres_grouping0[timestamp_key]
104+
cases = actres_grouping0[case_id_key]
105+
if log_contains_evidkey:
106+
events_ids = actres_grouping0[event_id_key]
102107

103-
actres_grouping[(activity, resource)].append((st, et, case))
108+
actres_grouping = {}
109+
for k in start_timestamps:
110+
st = start_timestamps[k]
111+
et = complete_timestamps[k]
112+
c = cases[k]
113+
if log_contains_evidkey:
114+
eid = events_ids[k]
115+
actres_grouping_k = []
116+
for i in range(len(st)):
117+
if log_contains_evidkey:
118+
actres_grouping_k.append((st[i], et[i], c[i], eid[i]))
119+
else:
120+
actres_grouping_k.append((st[i], et[i], c[i]))
121+
actres_grouping[k] = actres_grouping_k
104122

105123
return detection.detect(actres_grouping, parameters=parameters)

pm4py/algo/filtering/log/ltl/ltl_checker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def eventually_follows(log: EventLog, attribute_values: List[str], parameters: O
326326
for c in itertools.product(*occurrences):
327327
ok = True
328328
for i in range(len(c)-1):
329-
if c[i] > c[i+1]:
329+
if c[i] >= c[i+1]:
330330
ok = False
331331
break
332332
if ok:

pm4py/algo/simulation/playout/dfg/variants/classic.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,11 +206,17 @@ def apply(dfg: Dict[Tuple[str, str], int], start_activities: Dict[str, int], end
206206
overall_probability = 0.0
207207

208208
final_traces = []
209+
max_occ = 0
209210

210211
start_time = time.time()
211212
for tr, p in get_traces(dfg, start_activities, end_activities, parameters=parameters):
212-
if (interrupt_simulation_when_dfg_complete and interrupt_break_condition) or not (
213-
len(final_traces) < max_no_variants and overall_probability <= min_weighted_probability):
213+
if interrupt_simulation_when_dfg_complete and interrupt_break_condition:
214+
break
215+
if len(final_traces) >= max_no_variants:
216+
interrupted = True
217+
break
218+
if overall_probability > min_weighted_probability:
219+
interrupted = True
214220
break
215221
current_time = time.time()
216222
if (current_time - start_time) > max_execution_time:
@@ -236,6 +242,10 @@ def apply(dfg: Dict[Tuple[str, str], int], start_activities: Dict[str, int], end
236242
diff_original_dfg = set(dfg).difference(simulated_traces_dfg)
237243
interrupt_break_condition = len(diff_original_sa) == 0 and len(diff_original_ea) == 0 and len(
238244
diff_original_dfg) == 0
245+
var_occ = math.ceil(p * max_no_variants)
246+
max_occ = max(max_occ, var_occ)
247+
if var_occ < min_variant_occ <= max_occ:
248+
break
239249
final_traces.append((-p, tr))
240250
if interrupt_simulation_when_dfg_complete and interrupt_break_condition:
241251
break
@@ -248,12 +258,8 @@ def apply(dfg: Dict[Tuple[str, str], int], start_activities: Dict[str, int], end
248258
if return_variants:
249259
# returns the variants instead of the log
250260
variants = {}
251-
max_occ = 0
252261
for p, tr in final_traces:
253262
var_occ = math.ceil(-p * max_no_variants)
254-
max_occ = max(max_occ, var_occ)
255-
if max_occ >= min_variant_occ and var_occ < min_variant_occ:
256-
break
257263
variants[tr] = var_occ
258264

259265
if not (interrupted and return_only_if_complete):

pm4py/meta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
1616
'''
1717
__name__ = 'pm4py'
18-
VERSION = '2.2.26'
18+
VERSION = '2.2.27'
1919
__version__ = VERSION
2020
__doc__ = 'Process Mining for Python (PM4Py)'
2121
__author__ = 'Fraunhofer Institute for Applied Technology'

pm4py/objects/log/importer/xes/variants/chunk_regex.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def import_log_from_file_object(F, encoding, file_size=sys.maxsize, parameters=N
9393
if idx > -1:
9494
tag = el[:idx]
9595
el = el.split('\"')
96+
el[-1] = el[-1].strip()
9697
if tag == "string":
9798
curr_els_attrs[-1][el[1]] = el[3]
9899
if el[-1] != '/':

pm4py/ocel.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
1616
'''
1717

18-
from typing import List, Dict, Collection
18+
from typing import List, Dict, Collection, Set, Tuple
1919

2020
import pandas as pd
2121

@@ -180,3 +180,35 @@ def ocel_objects_summary(ocel: OCEL) -> pd.DataFrame:
180180
graph[o1].add(o2)
181181
objects_summary["interacting_objects"] = objects_summary[ocel.object_id_column].map(graph)
182182
return objects_summary
183+
184+
185+
def discover_objects_graph(ocel: OCEL, graph_type: str = "object_interaction") -> Set[Tuple[str, str]]:
186+
"""
187+
Discovers an object graph from the provided object-centric event log
188+
189+
:param ocel: object-centric event log
190+
:param graph_type: type of graph to consider (object_interaction, object_descendants, object_inheritance, object_cobirth, object_codeath)
191+
:rtype: ``Dict[str, Any]``
192+
193+
.. code-block:: python3
194+
195+
import pm4py
196+
197+
ocel = pm4py.read_ocel('trial.ocel')
198+
obj_graph = pm4py.ocel_discover_objects_graph(ocel, graph_type='object_interaction')
199+
"""
200+
if graph_type == "object_interaction":
201+
from pm4py.algo.transformation.ocel.graphs import object_interaction_graph
202+
return object_interaction_graph.apply(ocel)
203+
elif graph_type == "object_descendants":
204+
from pm4py.algo.transformation.ocel.graphs import object_descendants_graph
205+
return object_descendants_graph.apply(ocel)
206+
elif graph_type == "object_inheritance":
207+
from pm4py.algo.transformation.ocel.graphs import object_inheritance_graph
208+
return object_inheritance_graph.apply(ocel)
209+
elif graph_type == "object_cobirth":
210+
from pm4py.algo.transformation.ocel.graphs import object_cobirth_graph
211+
return object_cobirth_graph.apply(ocel)
212+
elif graph_type == "object_codeath":
213+
from pm4py.algo.transformation.ocel.graphs import object_codeath_graph
214+
return object_codeath_graph.apply(ocel)

0 commit comments

Comments
 (0)