2828import deprecation
2929from typing import Optional , Dict , Any , Union , Tuple , List
3030import pandas as pd
31+ import sys
3132
3233
3334class Parameters (Enum ):
@@ -36,6 +37,8 @@ class Parameters(Enum):
3637 TIMESTAMP_KEY = PARAMETER_CONSTANT_TIMESTAMP_KEY
3738 DECREASING_FACTOR = "decreasingFactor"
3839 POSITIVE = "positive"
40+ MIN_PERFORMANCE = "min_performance"
41+ MAX_PERFORMANCE = "max_performance"
3942
4043
4144def apply (df : pd .DataFrame , paths : List [Tuple [str , str ]], parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ) -> pd .DataFrame :
@@ -61,11 +64,11 @@ def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[D
6164 """
6265 if parameters is None :
6366 parameters = {}
64- paths = [path [0 ] + "," + path [1 ] for path in paths ]
6567 case_id_glue = exec_utils .get_param_value (Parameters .CASE_ID_KEY , parameters , CASE_CONCEPT_NAME )
6668 attribute_key = exec_utils .get_param_value (Parameters .ATTRIBUTE_KEY , parameters , DEFAULT_NAME_KEY )
6769 timestamp_key = exec_utils .get_param_value (Parameters .TIMESTAMP_KEY , parameters , DEFAULT_TIMESTAMP_KEY )
6870 positive = exec_utils .get_param_value (Parameters .POSITIVE , parameters , True )
71+ paths = [path [0 ] + "," + path [1 ] for path in paths ]
6972 df = df .sort_values ([case_id_glue , timestamp_key ])
7073 filt_df = df [[case_id_glue , attribute_key ]]
7174 filt_dif_shifted = filt_df .shift (- 1 )
@@ -84,6 +87,62 @@ def apply(df: pd.DataFrame, paths: List[Tuple[str, str]], parameters: Optional[D
8487 return ret
8588
8689
90+ def apply_performance (df : pd .DataFrame , provided_path : Tuple [str , str ], parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ) -> pd .DataFrame :
91+ """
92+ Filters the cases of a dataframe where there is at least one occurrence of the provided path
93+ occurring in the defined timedelta range.
94+
95+ Parameters
96+ ----------
97+ df
98+ Dataframe
99+ paths
100+ Paths to filter on
101+ parameters
102+ Possible parameters of the algorithm, including:
103+ Parameters.CASE_ID_KEY -> Case ID column in the dataframe
104+ Parameters.ATTRIBUTE_KEY -> Attribute we want to filter
105+ Parameters.TIMESTAMP_KEY -> Attribute identifying the timestamp in the log
106+ Parameters.POSITIVE -> Specifies if the filter should be applied including traces (positive=True)
107+ or excluding traces (positive=False)
108+ Parameters.MIN_PERFORMANCE -> Minimal allowed performance of the provided path
109+ Parameters.MAX_PERFORMANCE -> Maximal allowed performance of the provided path
110+
111+ Returns
112+ ----------
113+ df
114+ Filtered dataframe
115+ """
116+ if parameters is None :
117+ parameters = {}
118+ case_id_glue = exec_utils .get_param_value (Parameters .CASE_ID_KEY , parameters , CASE_CONCEPT_NAME )
119+ attribute_key = exec_utils .get_param_value (Parameters .ATTRIBUTE_KEY , parameters , DEFAULT_NAME_KEY )
120+ timestamp_key = exec_utils .get_param_value (Parameters .TIMESTAMP_KEY , parameters , DEFAULT_TIMESTAMP_KEY )
121+ positive = exec_utils .get_param_value (Parameters .POSITIVE , parameters , True )
122+ provided_path = provided_path [0 ] + "," + provided_path [1 ]
123+ min_performance = exec_utils .get_param_value (Parameters .MIN_PERFORMANCE , parameters , 0 )
124+ max_performance = exec_utils .get_param_value (Parameters .MAX_PERFORMANCE , parameters , sys .maxsize )
125+ df = df .sort_values ([case_id_glue , timestamp_key ])
126+ filt_df = df [[case_id_glue , attribute_key , timestamp_key ]]
127+ filt_dif_shifted = filt_df .shift (- 1 )
128+ filt_dif_shifted .columns = [str (col ) + '_2' for col in filt_dif_shifted .columns ]
129+ stacked_df = pd .concat ([filt_df , filt_dif_shifted ], axis = 1 )
130+ stacked_df ["@@path" ] = stacked_df [attribute_key ] + "," + stacked_df [attribute_key + "_2" ]
131+ stacked_df = stacked_df [stacked_df ["@@path" ] == provided_path ]
132+ stacked_df ["@@timedelta" ] = (stacked_df [timestamp_key + "_2" ] - stacked_df [timestamp_key ]).astype ('timedelta64[s]' )
133+ stacked_df = stacked_df [stacked_df ["@@timedelta" ] >= min_performance ]
134+ stacked_df = stacked_df [stacked_df ["@@timedelta" ] <= max_performance ]
135+ i1 = df .set_index (case_id_glue ).index
136+ i2 = stacked_df .set_index (case_id_glue ).index
137+ if positive :
138+ ret = df [i1 .isin (i2 )]
139+ else :
140+ ret = df [~ i1 .isin (i2 )]
141+
142+ ret .attrs = copy (df .attrs ) if hasattr (df , 'attrs' ) else {}
143+ return ret
144+
145+
87146@deprecation .deprecated ("2.2.11" , "3.0.0" , details = "Removed" )
88147def apply_auto_filter (df , parameters = None ):
89148 del df
0 commit comments