process-intelligence-solutions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.THIRD_PARTY.md‎
Lines changed: 1 addition & 1 deletion b/‎README.THIRD_PARTY.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎RELEASE_NOTES‎
Lines changed: 0 additions & 3 deletions b/‎RELEASE_NOTES‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/debug.log‎
Lines changed: 0 additions & 1 deletion b/‎examples/debug.log‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎pm4py/__init__.py‎
Lines changed: 5 additions & 2 deletions b/‎pm4py/__init__.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎pm4py/read.py‎
Lines changed: 7 additions & 41 deletions b/‎pm4py/read.py‎
Lines changed: 7 additions & 41 deletions
diff --git a/‎pm4py/streaming/algo/discovery/dfg/variants/frequency.py‎
Lines changed: 3 additions & 3 deletions b/‎pm4py/streaming/algo/discovery/dfg/variants/frequency.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pm4py/util/vis_utils.py‎
Lines changed: 51 additions & 1 deletion b/‎pm4py/util/vis_utils.py‎
Lines changed: 51 additions & 1 deletion
diff --git a/‎pm4py/utils.py‎
Lines changed: 48 additions & 0 deletions b/‎pm4py/utils.py‎
Lines changed: 48 additions & 0 deletions
@@ -3,4 +3,4 @@
 /venv/
 *.mps
 *.sol
-debug.log
+*debug.log*
@@ -13,7 +13,7 @@ RUN apt-get -y install libtool flex bison pkg-config g++ libssl-dev automake
 RUN apt-get -y install libjemalloc-dev libboost-dev libboost-filesystem-dev libboost-system-dev libboost-regex-dev python3-dev autoconf flex bison cmake
 RUN apt-get -y install libxml2-dev libxslt-dev libfreetype6-dev libsuitesparse-dev
 RUN pip install -U wheel six pytest
-RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.11.8 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==0.17.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.4 parso==0.8.0 pickleshare==0.7.5 Pillow==8.0.1 Pygments==2.7.2 pyparsing==2.4.7 pytz==2020.4 setuptools==50.3.2 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.17.2 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.4 prompt-toolkit==3.0.7 python-dateutil==2.8.1 scipy==1.5.4 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==2.0.0 ipython==7.19.0 jsonpickle==1.4.1 deprecation==2.1.0 graphviz==0.14.2 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.2 pandas==1.1.4 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.23.2 StringDist==1.0.9 sympy==1.6.2 cython==0.29.21 tqdm==4.51.0
+RUN pip install MarkupSafe==1.1.1 backcall==0.2.0 certifi==2020.11.8 colorama==0.4.3 decorator==4.4.2 ipython-genutils==0.2.0 joblib==0.17.0 more-itertools==8.6.0 mpmath==1.1.0 numpy==1.19.4 parso==0.8.0 pickleshare==0.7.5 Pillow==8.0.1 Pygments==2.7.2 pyparsing==2.4.7 pytz==2020.4 setuptools==50.3.2 six==1.15.0 sortedcontainers==2.3.0 threadpoolctl==2.1.0 wcwidth==0.2.5 cycler==0.10.0 jedi==0.17.2 jinja2==2.11.2 kiwisolver==1.3.1 networkx==2.5 packaging==20.4 prompt-toolkit==3.0.7 python-dateutil==2.8.1 scipy==1.5.4 traitlets==5.0.5 zipp==3.4.0 importlib-metadata==2.0.0 ipython==7.19.0 jsonpickle==1.4.1 deprecation==2.1.0 graphviz==0.14.2 intervaltree==3.1.0 lxml==4.6.1 matplotlib==3.3.3 pandas==1.1.4 pulp==2.1 pydotplus==2.0.2 pyvis==0.1.8.2 scikit-learn==0.23.2 StringDist==1.0.9 sympy==1.6.2 cython==0.29.21 tqdm==4.51.0
 
 COPY . /app
 RUN cd /app && cp tests/test_dockers/setups/setup_master.py setup.py && python setup.py install
@@ -26,7 +26,7 @@ to change as libraries are added or removed.
 | kiwisolver | https://pypi.org/project/kiwisolver | BSD | 1.3.1 | X | X |
 | lxml | https://pypi.org/project/lxml | BSD | 4.6.1 | X | X |
 | MarkupSafe | https://pypi.org/project/MarkupSafe | BSD | 1.1.1 | | X |
-| matplotlib | https://pypi.org/project/matplotlib | PSF | 3.3.2 | X | X |
+| matplotlib | https://pypi.org/project/matplotlib | PSF | 3.3.3 | X | X |
 | mpmath | https://pypi.org/project/mpmath | BSD | 1.1.0 | | |
 | networkx | https://pypi.org/project/networkx | BSD | 2.5 | X | |
 | numpy | https://pypi.org/project/numpy | BSD | 1.19.4 | | X |
 
@@ -1,6 +1,3 @@
-===== PM4Py 2.0.1.2 =====
-This is a minor release, fixing the compatibility of the streaming package with Python 3.5.x - 3.7.x
-
 ===== PM4Py 2.0.1 =====
 This is a minor release, consisting of the following changes:
 1. commit 81579e19e
 
@@ -55,14 +55,16 @@
 else:
     logging.error("intervaltree is not available. This can lead some features of PM4Py to not work correctly!")
 
-__version__ = '2.0.1.2'
+VERSION = '2.0.1.3'
+
+__version__ = VERSION
 __doc__ = "Process Mining for Python (PM4Py)"
 __author__ = 'Fraunhofer Institute for Applied Technology'
 __author_email__ = 'pm4py@fit.fraunhofer.de'
 __maintainer__ = 'Fraunhofer Institute for Applied Technology'
 __maintainer_email__ = "pm4py@fit.fraunhofer.de"
 
-from pm4py.read import read_xes, read_csv, read_petri_net, read_process_tree, read_dfg, format_dataframe, \
+from pm4py.read import read_xes, read_csv, read_petri_net, read_process_tree, read_dfg, \
     convert_to_event_log, convert_to_event_stream, convert_to_dataframe
 from pm4py.write import write_xes, write_csv, write_petri_net, write_process_tree, write_dfg
 from pm4py.discovery import discover_petri_net_alpha, discover_petri_net_alpha_plus, discover_petri_net_heuristics, \
@@ -77,6 +79,7 @@
     filter_variants_percentage, filter_paths, filter_timestamp, filter_trace_attribute
 from pm4py.stats import get_start_activities, get_end_activities, get_attributes, get_attribute_values, get_variants, \
     get_trace_attributes
+from pm4py.utils import format_dataframe
 
 # this package is available only for Python >= 3.5
 if sys.version_info >= (3, 5):
 
@@ -1,6 +1,8 @@
 import logging
 
-from pm4py.util import constants, xes_constants, pandas_utils
+import deprecation
+
+from pm4py import VERSION
 
 INDEX_COLUMN = "@@index"
 
@@ -24,6 +26,9 @@ def read_xes(file_path):
     return log
 
 
+@deprecation.deprecated(deprecated_in="2.0.1.3", removed_in="3.0",
+                        current_version=VERSION,
+                        details="Use pandas to import CSV files")
 def read_csv(file_path, sep=",", quotechar=None, encoding='utf-8', nrows=10000000, timest_format=None):
     """
     Reads an event log in the CSV format (Pandas adapter)
@@ -59,52 +64,13 @@ def read_csv(file_path, sep=",", quotechar=None, encoding='utf-8', nrows=1000000
         logging.error(
             "Less than three columns were imported from the CSV file. Please check the specification of the separation and the quote character!")
     else:
-        #logging.warning(
+        # logging.warning(
         #    "Please specify the format of the dataframe: df = pm4py.format_dataframe(df, case_id='<name of the case ID column>', activity_key='<name of the activity column>', timestamp_key='<name of the timestamp column>')")
         pass
 
     return df
 
 
-def format_dataframe(df, case_id=constants.CASE_CONCEPT_NAME, activity_key=xes_constants.DEFAULT_NAME_KEY,
-                     timestamp_key=xes_constants.DEFAULT_TIMESTAMP_KEY):
-    """
-    Give the appropriate format on the dataframe, for process mining purposes
-
-    Parameters
-    --------------
-    df
-        Dataframe
-    case_id
-        Case identifier column
-    activity_key
-        Activity column
-    timestamp_key
-        Timestamp column
-
-    Returns
-    --------------
-    df
-        Dataframe
-    """
-    if case_id not in df.columns:
-        raise Exception(case_id + " column (case ID) is not in the dataframe!")
-    if activity_key not in df.columns:
-        raise Exception(activity_key + " column (activity) is not in the dataframe!")
-    if timestamp_key not in df.columns:
-        raise Exception(timestamp_key + " column (timestamp) is not in the dataframe!")
-    df = df.rename(columns={case_id: constants.CASE_CONCEPT_NAME, activity_key: xes_constants.DEFAULT_NAME_KEY,
-                            timestamp_key: xes_constants.DEFAULT_TIMESTAMP_KEY})
-    df[constants.CASE_CONCEPT_NAME] = df[constants.CASE_CONCEPT_NAME].astype(str)
-    # set an index column
-    df = pandas_utils.insert_index(df, INDEX_COLUMN)
-    # sorts the dataframe
-    df = df.sort_values([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY, INDEX_COLUMN])
-    # logging.warning(
-    #    "please convert the dataframe for advanced process mining applications. log = pm4py.convert_to_event_log(df)")
-    return df
-
-
 def convert_to_event_log(obj):
     """
     Converts a log object to an event log
 
@@ -113,17 +113,17 @@ def _process(self, event):
             activity = self.encode_str(event[self.activity_key])
             if case not in self.case_dict:
                 if activity not in self.start_activities:
-                    self.start_activities[activity] = 0
+                    self.start_activities[activity] = 1
                 else:
                     self.start_activities[activity] = self.start_activities[activity] + 1
             else:
                 df = self.encode_tuple((self.case_dict[case], activity))
                 if df not in self.dfg:
-                    self.dfg[df] = 0
+                    self.dfg[df] = 1
                 else:
                     self.dfg[df] = self.dfg[df] + 1
             if activity not in self.activities:
-                self.activities[activity] = 0
+                self.activities[activity] = 1
             else:
                 self.activities[activity] = self.activities[activity] + 1
             self.case_dict[case] = activity
 
@@ -1,4 +1,7 @@
 import base64
+import os
+import subprocess
+import sys
 
 MAX_EDGE_PENWIDTH_GRAPHVIZ = 2.6
 MIN_EDGE_PENWIDTH_GRAPHVIZ = 1.0
@@ -58,7 +61,7 @@ def get_arc_penwidth(arc_measure, min_arc_measure, max_arc_measure):
         Current arc width in the graph
     """
     return MIN_EDGE_PENWIDTH_GRAPHVIZ + (MAX_EDGE_PENWIDTH_GRAPHVIZ - MIN_EDGE_PENWIDTH_GRAPHVIZ) * (
-                arc_measure - min_arc_measure) / (max_arc_measure - min_arc_measure + 0.00001)
+            arc_measure - min_arc_measure) / (max_arc_measure - min_arc_measure + 0.00001)
 
 
 def get_trans_freq_color(trans_count, min_trans_count, max_trans_count):
@@ -119,3 +122,50 @@ def get_base64_from_file(temp_file):
     """
     with open(temp_file, "rb") as f:
         return base64.b64encode(f.read())
+
+
+def check_visualization_inside_jupyter():
+    """
+    Checks if the visualization of the model is performed
+    inside a Jupyter notebook
+    """
+    try:
+        shell = get_ipython().__class__.__name__
+        if shell == "ZMQInteractiveShell":
+            return True
+        else:
+            return False
+    except NameError:
+        return False
+
+
+def view_image_in_jupyter(file_name):
+    """
+    Visualizes a picture inside the Jupyter notebooks
+
+    Parameters
+    -------------
+    file_name
+        Name of the file
+    """
+    from IPython.display import Image
+    image = Image(file_name)
+    from IPython.display import display
+    return display(image)
+
+
+def open_opsystem_image_viewer(file_name):
+    """
+    Visualizes a picture using the image viewer of the operating system
+
+    Parameters
+    -------------
+    file_name
+        Name of the file
+    """
+    if sys.platform.startswith('darwin'):
+        subprocess.call(('open', file_name))
+    elif os.name == 'nt':  # For Windows
+        os.startfile(file_name)
+    elif os.name == 'posix':  # For Linux, Mac, etc.
+        subprocess.call(('xdg-open', file_name))
@@ -0,0 +1,48 @@
+from pm4py.util import constants, xes_constants, pandas_utils
+
+INDEX_COLUMN = "@@index"
+
+
+def format_dataframe(df, case_id=constants.CASE_CONCEPT_NAME, activity_key=xes_constants.DEFAULT_NAME_KEY,
+                     timestamp_key=xes_constants.DEFAULT_TIMESTAMP_KEY, timest_format=None):
+    """
+    Give the appropriate format on the dataframe, for process mining purposes
+
+    Parameters
+    --------------
+    df
+        Dataframe
+    case_id
+        Case identifier column
+    activity_key
+        Activity column
+    timestamp_key
+        Timestamp column
+    timest_format
+        Timestamp format that is provided to Pandas
+
+    Returns
+    --------------
+    df
+        Dataframe
+    """
+    import pandas as pd
+    if case_id not in df.columns:
+        raise Exception(case_id + " column (case ID) is not in the dataframe!")
+    if activity_key not in df.columns:
+        raise Exception(activity_key + " column (activity) is not in the dataframe!")
+    if timestamp_key not in df.columns:
+        raise Exception(timestamp_key + " column (timestamp) is not in the dataframe!")
+    df = df.rename(columns={case_id: constants.CASE_CONCEPT_NAME, activity_key: xes_constants.DEFAULT_NAME_KEY,
+                            timestamp_key: xes_constants.DEFAULT_TIMESTAMP_KEY})
+    df[constants.CASE_CONCEPT_NAME] = df[constants.CASE_CONCEPT_NAME].astype(str)
+    # makes sure that the timestamp column is of timestamp type
+    df[xes_constants.DEFAULT_TIMESTAMP_KEY] = pd.to_datetime(df[xes_constants.DEFAULT_TIMESTAMP_KEY],
+                                                             format=timest_format)
+    # set an index column
+    df = pandas_utils.insert_index(df, INDEX_COLUMN)
+    # sorts the dataframe
+    df = df.sort_values([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY, INDEX_COLUMN])
+    # logging.warning(
+    #    "please convert the dataframe for advanced process mining applications. log = pm4py.convert_to_event_log(df)")
+    return df