Skip to content

Commit 9c51ae4

Browse files
Merge branch 'release' into release-github
# Conflicts: # pm4py/algo/simulation/playout/dfg/algorithm.py # pm4py/algo/simulation/playout/dfg/variants/__init__.py # pm4py/meta.py # pm4py/streaming/__init__.py
2 parents c1adc23 + 9bbaa1b commit 9c51ae4

File tree

40 files changed

+1051
-227
lines changed

40 files changed

+1051
-227
lines changed

CHANGELOG.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,36 @@
11
# PM4Py Changelog
22

3+
## PM4Py 2.2.13 (2021.09.03)
4+
5+
### Fixed
6+
7+
### Removed
8+
9+
### Deprecated
10+
11+
### Changed
12+
* 5723df7b
13+
* xes exporter now reports on xes features and xmlns
14+
* 3b632548
15+
* graphviz based visualizations now expose background color as a parameter
16+
17+
### Added
18+
* 0592157b
19+
* new dfg playout including performance specification
20+
* 85739ba0
21+
* allow pandas df to be used as an iterable for streaming simulation
22+
* 2fa9993f
23+
* path filter that filters the cases of an event log where there is at least one occurrence of the provided path
24+
occurring in a given time range.
25+
* a7ee73a8
26+
* added filter based on rework detection
27+
* c03b6188
28+
* add petri net, reset/inhibitor net and data petri net semantics
29+
### Other
30+
31+
32+
---
33+
334
## PM4Py 2.2.12 (2021.08.19)
435

536
### Fixed

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# The short X.Y version
2727
version = '2.2'
2828
# The full version, including alpha/beta/rc tags
29-
release = '2.2.12'
29+
release = '2.2.13'
3030

3131
# -- General configuration ---------------------------------------------------
3232

examples/pandas_iterable.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import pandas as pd
2+
import pm4py
3+
import os
4+
from pm4py.streaming.conversion import from_pandas
5+
6+
7+
def execute_script():
8+
df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"))
9+
df = pm4py.format_dataframe(df)
10+
it = from_pandas.apply(df)
11+
count = 0
12+
for trace in it:
13+
print(count, trace)
14+
count = count + 1
15+
16+
17+
if __name__ == "__main__":
18+
execute_script()
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pandas as pd
2+
import pm4py
3+
import os
4+
from pm4py.streaming.conversion import from_pandas
5+
from pm4py.streaming.stream.live_trace_stream import LiveTraceStream
6+
from pm4py.streaming.util import trace_stream_printer
7+
8+
9+
def execute_script():
10+
df = pd.read_csv(os.path.join("..", "tests", "input_data", "receipt.csv"))
11+
df = pm4py.format_dataframe(df)
12+
it = from_pandas.apply(df)
13+
printer = trace_stream_printer.TraceStreamPrinter()
14+
trace_stream = LiveTraceStream()
15+
trace_stream.register(printer)
16+
trace_stream.start()
17+
it.to_trace_stream(trace_stream)
18+
trace_stream.stop()
19+
20+
21+
if __name__ == "__main__":
22+
execute_script()
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import os
2+
3+
import pm4py
4+
from pm4py.algo.simulation.playout.dfg import algorithm as dfg_simulator
5+
6+
7+
def execute_script():
8+
log = pm4py.read_xes(os.path.join("..", "tests", "input_data", "receipt.xes"))
9+
frequency_dfg, sa, ea = pm4py.discover_dfg(log)
10+
performance_dfg, sa, ea = pm4py.discover_performance_dfg(log)
11+
simulated_log = dfg_simulator.apply(frequency_dfg, sa, ea, variant=dfg_simulator.Variants.PERFORMANCE,
12+
parameters={"performance_dfg": performance_dfg})
13+
print(simulated_log)
14+
15+
16+
if __name__ == "__main__":
17+
execute_script()

pm4py/algo/filtering/log/paths/paths_filter.py

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
from pm4py.objects.log.obj import EventLog, Trace
2222
from pm4py.util import exec_utils
2323
from pm4py.util import xes_constants as xes
24-
from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY
24+
from pm4py.util.constants import PARAMETER_CONSTANT_ATTRIBUTE_KEY, PARAMETER_CONSTANT_TIMESTAMP_KEY
2525
import deprecation
26+
import sys
2627

2728
from typing import Optional, Dict, Any, Union, Tuple, List
2829
from pm4py.objects.log.obj import EventLog, EventStream, Trace
@@ -32,6 +33,9 @@ class Parameters(Enum):
3233
ATTRIBUTE_KEY = PARAMETER_CONSTANT_ATTRIBUTE_KEY
3334
DECREASING_FACTOR = "decreasingFactor"
3435
POSITIVE = "positive"
36+
TIMESTAMP_KEY = PARAMETER_CONSTANT_TIMESTAMP_KEY
37+
MIN_PERFORMANCE = "min_performance"
38+
MAX_PERFORMANCE = "max_performance"
3539

3640

3741
def apply(log: EventLog, paths: List[Tuple[str, str]], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
@@ -72,6 +76,53 @@ def apply(log: EventLog, paths: List[Tuple[str, str]], parameters: Optional[Dict
7276
return filtered_log
7377

7478

79+
def apply_performance(log: EventLog, provided_path: Tuple[str, str], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
80+
"""
81+
Filters the cases of an event log where there is at least one occurrence of the provided path
82+
occurring in the defined timedelta range.
83+
84+
Parameters
85+
----------------
86+
log
87+
Event log
88+
provided_path
89+
Path between two activities (expressed as tuple)
90+
parameters
91+
Parameters of the filter, including:
92+
Parameters.ATTRIBUTE_KEY -> Attribute identifying the activity in the log
93+
Parameters.TIMESTAMP_KEY -> Attribute identifying the timestamp in the log
94+
Parameters.POSITIVE -> Indicate if events should be kept/removed
95+
Parameters.MIN_PERFORMANCE -> Minimal allowed performance of the provided path
96+
Parameters.MAX_PERFORMANCE -> Maximal allowed performance of the provided path
97+
98+
Returns
99+
----------------
100+
filtered_log
101+
Filtered event log
102+
"""
103+
if parameters is None:
104+
parameters = {}
105+
attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, xes.DEFAULT_NAME_KEY)
106+
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes.DEFAULT_TIMESTAMP_KEY)
107+
min_performance = exec_utils.get_param_value(Parameters.MIN_PERFORMANCE, parameters, 0)
108+
max_performance = exec_utils.get_param_value(Parameters.MAX_PERFORMANCE, parameters, sys.maxsize)
109+
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
110+
filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
111+
omni_present=log.omni_present, properties=log.properties)
112+
for trace in log:
113+
found = False
114+
for i in range(len(trace) - 1):
115+
path = (trace[i][attribute_key], trace[i + 1][attribute_key])
116+
if path == provided_path:
117+
timediff = trace[i + 1][timestamp_key].timestamp() - trace[i][timestamp_key].timestamp()
118+
if min_performance <= timediff <= max_performance:
119+
found = True
120+
break
121+
if (found and positive) or (not found and not positive):
122+
filtered_log.append(trace)
123+
return filtered_log
124+
125+
75126
def get_paths_from_log(log, attribute_key="concept:name"):
76127
"""
77128
Get the paths of the log along with their count
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
'''
2+
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
3+
4+
PM4Py is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
PM4Py is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
16+
'''
17+
from pm4py.algo.filtering.log.rework import rework_filter
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
'''
2+
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
3+
4+
PM4Py is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
PM4Py is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
16+
'''
17+
from enum import Enum
18+
from pm4py.util import constants, xes_constants, exec_utils
19+
from pm4py.objects.log.obj import EventLog
20+
from collections import Counter
21+
from typing import Optional, Dict, Any
22+
23+
24+
class Parameters(Enum):
25+
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
26+
MIN_OCCURRENCES = "min_occurrences"
27+
POSITIVE = "positive"
28+
29+
30+
def apply(log: EventLog, activity: str, parameters: Optional[Dict[Any, Any]] = None) -> EventLog:
31+
"""
32+
Applies the rework filter on the provided event log and activity.
33+
This filter the cases of the log having at least Parameters.MIN_OCCURRENCES (default: 2) occurrences
34+
of the given activity.
35+
36+
It is also possible (setting Parameters.POSITIVE to False) to retrieve the cases of the log not having the
37+
given activity or having the activity occurred less than Parameters.MIN_OCCURRENCES times.
38+
39+
Parameters
40+
-------------------
41+
log
42+
Event log
43+
activity
44+
Activity of which the rework shall be filtered
45+
parameters
46+
Parameters of the filter, including:
47+
- Parameters.ACTIVITY_KEY => the attribute to use as activity
48+
- Parameters.MIN_OCCURRENCES => the minimum number of occurrences for the activity
49+
- Parameters.POSITIVE => if True, filters the cases of the log having at least MIN_OCCURRENCES occurrences.
50+
if False, filters the cases of the log where such behavior does not occur.
51+
52+
Returns
53+
-----------------
54+
filtered_log
55+
Filtered event log
56+
"""
57+
if parameters is None:
58+
parameters = {}
59+
60+
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
61+
min_occurrences = exec_utils.get_param_value(Parameters.MIN_OCCURRENCES, parameters, 2)
62+
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
63+
64+
filtered_log = EventLog(list(), attributes=log.attributes, extensions=log.extensions, classifiers=log.classifiers,
65+
omni_present=log.omni_present, properties=log.properties)
66+
67+
for trace in log:
68+
act_counter = Counter([x[activity_key] for x in trace])
69+
if positive and activity in act_counter and act_counter[activity] >= min_occurrences:
70+
filtered_log.append(trace)
71+
elif not positive and (activity not in act_counter or act_counter[activity] < min_occurrences):
72+
filtered_log.append(trace)
73+
74+
return filtered_log

pm4py/algo/filtering/pandas/paths/paths_filter.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import deprecation
2929
from typing import Optional, Dict, Any, Union, Tuple, List
3030
import pandas as pd
31+
import sys
3132

3233

3334
class Parameters(Enum):
@@ -36,6 +37,8 @@ class Parameters(Enum):
3637
TIMESTAMP_KEY = PARAMETER_CONSTANT_TIMESTAMP_KEY
3738
DECREASING_FACTOR = "decreasingFactor"
3839
POSITIVE = "positive"
40+
MIN_PERFORMANCE = "min_performance"
41+
MAX_PERFORMANCE = "max_performance"
3942

4043

4144
def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame:
@@ -61,11 +64,11 @@ def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[D
6164
"""
6265
if parameters is None:
6366
parameters = {}
64-
paths = [path[0] + "," + path[1] for path in paths]
6567
case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
6668
attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
6769
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
6870
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
71+
paths = [path[0] + "," + path[1] for path in paths]
6972
df = df.sort_values([case_id_glue, timestamp_key])
7073
filt_df = df[[case_id_glue, attribute_key]]
7174
filt_dif_shifted = filt_df.shift(-1)
@@ -84,6 +87,62 @@ def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[D
8487
return ret
8588

8689

90+
def apply_performance(df: pd.DataFrame, provided_path: Tuple[str, str], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> pd.DataFrame:
91+
"""
92+
Filters the cases of a dataframe where there is at least one occurrence of the provided path
93+
occurring in the defined timedelta range.
94+
95+
Parameters
96+
----------
97+
df
98+
Dataframe
99+
paths
100+
Paths to filter on
101+
parameters
102+
Possible parameters of the algorithm, including:
103+
Parameters.CASE_ID_KEY -> Case ID column in the dataframe
104+
Parameters.ATTRIBUTE_KEY -> Attribute we want to filter
105+
Parameters.TIMESTAMP_KEY -> Attribute identifying the timestamp in the log
106+
Parameters.POSITIVE -> Specifies if the filter should be applied including traces (positive=True)
107+
or excluding traces (positive=False)
108+
Parameters.MIN_PERFORMANCE -> Minimal allowed performance of the provided path
109+
Parameters.MAX_PERFORMANCE -> Maximal allowed performance of the provided path
110+
111+
Returns
112+
----------
113+
df
114+
Filtered dataframe
115+
"""
116+
if parameters is None:
117+
parameters = {}
118+
case_id_glue = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
119+
attribute_key = exec_utils.get_param_value(Parameters.ATTRIBUTE_KEY, parameters, DEFAULT_NAME_KEY)
120+
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, DEFAULT_TIMESTAMP_KEY)
121+
positive = exec_utils.get_param_value(Parameters.POSITIVE, parameters, True)
122+
provided_path = provided_path[0] + "," + provided_path[1]
123+
min_performance = exec_utils.get_param_value(Parameters.MIN_PERFORMANCE, parameters, 0)
124+
max_performance = exec_utils.get_param_value(Parameters.MAX_PERFORMANCE, parameters, sys.maxsize)
125+
df = df.sort_values([case_id_glue, timestamp_key])
126+
filt_df = df[[case_id_glue, attribute_key, timestamp_key]]
127+
filt_dif_shifted = filt_df.shift(-1)
128+
filt_dif_shifted.columns = [str(col) + '_2' for col in filt_dif_shifted.columns]
129+
stacked_df = pd.concat([filt_df, filt_dif_shifted], axis=1)
130+
stacked_df["@@path"] = stacked_df[attribute_key] + "," + stacked_df[attribute_key + "_2"]
131+
stacked_df = stacked_df[stacked_df["@@path"] == provided_path]
132+
stacked_df["@@timedelta"] = (stacked_df[timestamp_key + "_2"] - stacked_df[timestamp_key]).astype('timedelta64[s]')
133+
stacked_df = stacked_df[stacked_df["@@timedelta"] >= min_performance]
134+
stacked_df = stacked_df[stacked_df["@@timedelta"] <= max_performance]
135+
i1 = df.set_index(case_id_glue).index
136+
i2 = stacked_df.set_index(case_id_glue).index
137+
if positive:
138+
ret = df[i1.isin(i2)]
139+
else:
140+
ret = df[~i1.isin(i2)]
141+
142+
ret.attrs = copy(df.attrs) if hasattr(df, 'attrs') else {}
143+
return ret
144+
145+
87146
@deprecation.deprecated("2.2.11", "3.0.0", details="Removed")
88147
def apply_auto_filter(df, parameters=None):
89148
del df
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
'''
2+
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
3+
4+
PM4Py is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
PM4Py is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
16+
'''
17+
from pm4py.algo.filtering.pandas.rework import rework_filter

0 commit comments

Comments
 (0)