Skip to content

Commit 393bf46

Browse files
Merge branch 'release' into release-github
2 parents 8bea3b6 + e9ca84b commit 393bf46

File tree

19 files changed

+152
-177
lines changed

19 files changed

+152
-177
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
/venv/
44
*.mps
55
*.sol
6-
debug.log
6+
*debug.log*

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake
1313
RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake
1414
RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev
1515
RUN pip install -U wheel six pytest
16-
RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.11.8 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==0.17.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.4 parso==0.8.0 pickleshare==0.7.5 Pillow==8.0.1 Pygments==2.7.2 pyparsing==2.4.7 pytz==2020.4 setuptools==50.3.2 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.17.2 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.4 prompt-toolkit==3.0.7 python-dateutil==2.8.1 scipy==1.5.4 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==2.0.0 ipython==7.19.0 jsonpickle==1.4.1 deprecation==2.1.0 graphviz==0.14.2 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.2 pandas==1.1.4 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.23.2 StringDist==1.0.9 sympy==1.6.2 cython==0.29.21 tqdm==4.51.0
16+
RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.11.8 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==0.17.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.4 parso==0.8.0 pickleshare==0.7.5 Pillow==8.0.1 Pygments==2.7.2 pyparsing==2.4.7 pytz==2020.4 setuptools==50.3.2 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.17.2 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.4 prompt-toolkit==3.0.7 python-dateutil==2.8.1 scipy==1.5.4 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==2.0.0 ipython==7.19.0 jsonpickle==1.4.1 deprecation==2.1.0 graphviz==0.14.2 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.3 pandas==1.1.4 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.23.2 StringDist==1.0.9 sympy==1.6.2 cython==0.29.21 tqdm==4.51.0
1717

1818
COPY . /app
1919
RUN cd /app && cp tests/test_dockers/setups/setup_master.py setup.py && python setup.py install

README.THIRD_PARTY.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ to change as libraries are added or removed.
2626
| kiwisolver | https://pypi.org/project/kiwisolver | BSD | 1.3.1 | X | X |
2727
| lxml | https://pypi.org/project/lxml | BSD | 4.6.1 | X | X |
2828
| MarkupSafe | https://pypi.org/project/MarkupSafe | BSD | 1.1.1 | | X |
29-
| matplotlib | https://pypi.org/project/matplotlib | PSF | 3.3.2 | X | X |
29+
| matplotlib | https://pypi.org/project/matplotlib | PSF | 3.3.3 | X | X |
3030
| mpmath | https://pypi.org/project/mpmath | BSD | 1.1.0 | | |
3131
| networkx | https://pypi.org/project/networkx | BSD | 2.5 | X | |
3232
| numpy | https://pypi.org/project/numpy | BSD | 1.19.4 | | X |

RELEASE_NOTES

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
===== PM4Py 2.0.1.2 =====
2-
This is a minor release, fixing the compatibility of the streaming package with Python 3.5.x - 3.7.x
3-
41
===== PM4Py 2.0.1 =====
52
This is a minor release, consisting of the following changes:
63
1. commit 81579e19e

examples/debug.log

Lines changed: 0 additions & 1 deletion
This file was deleted.

pm4py/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,16 @@
5555
else:
5656
logging.error("intervaltree is not available. This can lead some features of PM4Py to not work correctly!")
5757

58-
__version__ = '2.0.1.2'
58+
VERSION = '2.0.1.3'
59+
60+
__version__ = VERSION
5961
__doc__ = "Process Mining for Python (PM4Py)"
6062
__author__ = 'Fraunhofer Institute for Applied Technology'
6163
__author_email__ = 'pm4py@fit.fraunhofer.de'
6264
__maintainer__ = 'Fraunhofer Institute for Applied Technology'
6365
__maintainer_email__ = "pm4py@fit.fraunhofer.de"
6466

65-
from pm4py.read import read_xes, read_csv, read_petri_net, read_process_tree, read_dfg, format_dataframe, \
67+
from pm4py.read import read_xes, read_csv, read_petri_net, read_process_tree, read_dfg, \
6668
convert_to_event_log, convert_to_event_stream, convert_to_dataframe
6769
from pm4py.write import write_xes, write_csv, write_petri_net, write_process_tree, write_dfg
6870
from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_heuristics, \
@@ -77,6 +79,7 @@
7779
filter_variants_percentage, filter_paths, filter_timestamp, filter_trace_attribute
7880
from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \
7981
get_trace_attributes
82+
from pm4py.utils import format_dataframe
8083

8184
# this package is available only for Python >= 3.5
8285
if sys.version_info >= (3, 5):

pm4py/read.py

Lines changed: 7 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import logging
22

3-
from pm4py.util import constants, xes_constants, pandas_utils
3+
import deprecation
4+
5+
from pm4py import VERSION
46

57
INDEX_COLUMN = "@@index"
68

@@ -24,6 +26,9 @@ def read_xes(file_path):
2426
return log
2527

2628

29+
@deprecation.deprecated(deprecated_in="2.0.1.3", removed_in="3.0",
30+
current_version=VERSION,
31+
details="Use pandas to import CSV files")
2732
def read_csv(file_path, sep=",", quotechar=None, encoding='utf-8', nrows=10000000, timest_format=None):
2833
"""
2934
Reads an event log in the CSV format (Pandas adapter)
@@ -59,52 +64,13 @@ def read_csv(file_path, sep=",", quotechar=None, encoding='utf-8', nrows=1000000
5964
logging.error(
6065
"Less than three columns were imported from the CSV file. Please check the specification of the separation and the quote character!")
6166
else:
62-
#logging.warning(
67+
# logging.warning(
6368
# "Please specify the format of the dataframe: df = pm4py.format_dataframe(df, case_id='<name of the case ID column>', activity_key='<name of the activity column>', timestamp_key='<name of the timestamp column>')")
6469
pass
6570

6671
return df
6772

6873

69-
def format_dataframe(df, case_id=constants.CASE_CONCEPT_NAME, activity_key=xes_constants.DEFAULT_NAME_KEY,
70-
timestamp_key=xes_constants.DEFAULT_TIMESTAMP_KEY):
71-
"""
72-
Give the appropriate format on the dataframe, for process mining purposes
73-
74-
Parameters
75-
--------------
76-
df
77-
Dataframe
78-
case_id
79-
Case identifier column
80-
activity_key
81-
Activity column
82-
timestamp_key
83-
Timestamp column
84-
85-
Returns
86-
--------------
87-
df
88-
Dataframe
89-
"""
90-
if case_id not in df.columns:
91-
raise Exception(case_id + " column (case ID) is not in the dataframe!")
92-
if activity_key not in df.columns:
93-
raise Exception(activity_key + " column (activity) is not in the dataframe!")
94-
if timestamp_key not in df.columns:
95-
raise Exception(timestamp_key + " column (timestamp) is not in the dataframe!")
96-
df = df.rename(columns={case_id: constants.CASE_CONCEPT_NAME, activity_key: xes_constants.DEFAULT_NAME_KEY,
97-
timestamp_key: xes_constants.DEFAULT_TIMESTAMP_KEY})
98-
df[constants.CASE_CONCEPT_NAME] = df[constants.CASE_CONCEPT_NAME].astype(str)
99-
# set an index column
100-
df = pandas_utils.insert_index(df, INDEX_COLUMN)
101-
# sorts the dataframe
102-
df = df.sort_values([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY, INDEX_COLUMN])
103-
# logging.warning(
104-
# "please convert the dataframe for advanced process mining applications. log = pm4py.convert_to_event_log(df)")
105-
return df
106-
107-
10874
def convert_to_event_log(obj):
10975
"""
11076
Converts a log object to an event log

pm4py/streaming/algo/discovery/dfg/variants/frequency.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,17 @@ def _process(self, event):
113113
activity = self.encode_str(event[self.activity_key])
114114
if case not in self.case_dict:
115115
if activity not in self.start_activities:
116-
self.start_activities[activity] = 0
116+
self.start_activities[activity] = 1
117117
else:
118118
self.start_activities[activity] = self.start_activities[activity] + 1
119119
else:
120120
df = self.encode_tuple((self.case_dict[case], activity))
121121
if df not in self.dfg:
122-
self.dfg[df] = 0
122+
self.dfg[df] = 1
123123
else:
124124
self.dfg[df] = self.dfg[df] + 1
125125
if activity not in self.activities:
126-
self.activities[activity] = 0
126+
self.activities[activity] = 1
127127
else:
128128
self.activities[activity] = self.activities[activity] + 1
129129
self.case_dict[case] = activity

pm4py/util/vis_utils.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
import base64
2+
import os
3+
import subprocess
4+
import sys
25

36
MAX_EDGE_PENWIDTH_GRAPHVIZ = 2.6
47
MIN_EDGE_PENWIDTH_GRAPHVIZ = 1.0
@@ -58,7 +61,7 @@ def get_arc_penwidth(arc_measure, min_arc_measure, max_arc_measure):
5861
Current arc width in the graph
5962
"""
6063
return MIN_EDGE_PENWIDTH_GRAPHVIZ + (MAX_EDGE_PENWIDTH_GRAPHVIZ - MIN_EDGE_PENWIDTH_GRAPHVIZ) * (
61-
arc_measure - min_arc_measure) / (max_arc_measure - min_arc_measure + 0.00001)
64+
arc_measure - min_arc_measure) / (max_arc_measure - min_arc_measure + 0.00001)
6265

6366

6467
def get_trans_freq_color(trans_count, min_trans_count, max_trans_count):
@@ -119,3 +122,50 @@ def get_base64_from_file(temp_file):
119122
"""
120123
with open(temp_file, "rb") as f:
121124
return base64.b64encode(f.read())
125+
126+
127+
def check_visualization_inside_jupyter():
128+
"""
129+
Checks if the visualization of the model is performed
130+
inside a Jupyter notebook
131+
"""
132+
try:
133+
shell = get_ipython().__class__.__name__
134+
if shell == "ZMQInteractiveShell":
135+
return True
136+
else:
137+
return False
138+
except NameError:
139+
return False
140+
141+
142+
def view_image_in_jupyter(file_name):
143+
"""
144+
Visualizes a picture inside the Jupyter notebooks
145+
146+
Parameters
147+
-------------
148+
file_name
149+
Name of the file
150+
"""
151+
from IPython.display import Image
152+
image = Image(file_name)
153+
from IPython.display import display
154+
return display(image)
155+
156+
157+
def open_opsystem_image_viewer(file_name):
158+
"""
159+
Visualizes a picture using the image viewer of the operating system
160+
161+
Parameters
162+
-------------
163+
file_name
164+
Name of the file
165+
"""
166+
if sys.platform.startswith('darwin'):
167+
subprocess.call(('open', file_name))
168+
elif os.name == 'nt': # For Windows
169+
os.startfile(file_name)
170+
elif os.name == 'posix': # For Linux, Mac, etc.
171+
subprocess.call(('xdg-open', file_name))

pm4py/utils.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from pm4py.util import constants, xes_constants, pandas_utils
2+
3+
INDEX_COLUMN = "@@index"
4+
5+
6+
def format_dataframe(df, case_id=constants.CASE_CONCEPT_NAME, activity_key=xes_constants.DEFAULT_NAME_KEY,
7+
timestamp_key=xes_constants.DEFAULT_TIMESTAMP_KEY, timest_format=None):
8+
"""
9+
Give the appropriate format on the dataframe, for process mining purposes
10+
11+
Parameters
12+
--------------
13+
df
14+
Dataframe
15+
case_id
16+
Case identifier column
17+
activity_key
18+
Activity column
19+
timestamp_key
20+
Timestamp column
21+
timest_format
22+
Timestamp format that is provided to Pandas
23+
24+
Returns
25+
--------------
26+
df
27+
Dataframe
28+
"""
29+
import pandas as pd
30+
if case_id not in df.columns:
31+
raise Exception(case_id + " column (case ID) is not in the dataframe!")
32+
if activity_key not in df.columns:
33+
raise Exception(activity_key + " column (activity) is not in the dataframe!")
34+
if timestamp_key not in df.columns:
35+
raise Exception(timestamp_key + " column (timestamp) is not in the dataframe!")
36+
df = df.rename(columns={case_id: constants.CASE_CONCEPT_NAME, activity_key: xes_constants.DEFAULT_NAME_KEY,
37+
timestamp_key: xes_constants.DEFAULT_TIMESTAMP_KEY})
38+
df[constants.CASE_CONCEPT_NAME] = df[constants.CASE_CONCEPT_NAME].astype(str)
39+
# makes sure that the timestamp column is of timestamp type
40+
df[xes_constants.DEFAULT_TIMESTAMP_KEY] = pd.to_datetime(df[xes_constants.DEFAULT_TIMESTAMP_KEY],
41+
format=timest_format)
42+
# set an index column
43+
df = pandas_utils.insert_index(df, INDEX_COLUMN)
44+
# sorts the dataframe
45+
df = df.sort_values([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY, INDEX_COLUMN])
46+
# logging.warning(
47+
# "please convert the dataframe for advanced process mining applications. log = pm4py.convert_to_event_log(df)")
48+
return df

0 commit comments

Comments
 (0)