2626from pm4py .objects .log .util import xes
2727from pm4py .util import exec_utils
2828from pm4py .util import variants_util , pandas_utils
29- from pm4py .util .constants import PARAMETER_CONSTANT_ACTIVITY_KEY , PARAMETER_CONSTANT_CASEID_KEY , CASE_CONCEPT_NAME
29+ from pm4py .util .constants import (
30+ PARAMETER_CONSTANT_ACTIVITY_KEY ,
31+ PARAMETER_CONSTANT_CASEID_KEY ,
32+ CASE_CONCEPT_NAME ,
33+ )
3034from typing import Optional , Dict , Any , Union
3135from pm4py .objects .log .obj import EventLog
3236import pandas as pd
3539class Parameters (Enum ):
3640 # parameter for the noise threshold
3741 NOISE_THRESHOLD = "noise_threshold"
38- # considered constraints in conformance checking among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
42+ # considered constraints in conformance checking among: equivalence,
43+ # always_after, always_before, never_together, directly_follows,
44+ # activ_freq
3945 CONSIDERED_CONSTRAINTS = "considered_constraints"
4046 # default choice for conformance checking
41- DEFAULT_CONSIDERED_CONSTRAINTS = ["equivalence" , "always_after" , "always_before" , "never_together" ,
42- "directly_follows" , "activ_freq" ]
47+ DEFAULT_CONSIDERED_CONSTRAINTS = [
48+ "equivalence" ,
49+ "always_after" ,
50+ "always_before" ,
51+ "never_together" ,
52+ "directly_follows" ,
53+ "activ_freq" ,
54+ ]
4355 CASE_ID_KEY = PARAMETER_CONSTANT_CASEID_KEY
4456 ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY
4557 PARAMETER_VARIANT_DELIMITER = "variant_delimiter"
@@ -85,7 +97,11 @@ def equivalence(logs_traces, all_activs, noise_threshold=0):
8597 for k in rs :
8698 rs [k ] = rs [k ] * logs_traces [trace ]
8799 ret0 += rs
88- ret = set (x for x , y in ret0 .items () if y >= all_activs [x [0 ]] * (1.0 - noise_threshold ))
100+ ret = set (
101+ x
102+ for x , y in ret0 .items ()
103+ if y >= all_activs [x [0 ]] * (1.0 - noise_threshold )
104+ )
89105 return ret
90106
91107
@@ -107,17 +123,30 @@ def always_after(logs_traces, all_activs, noise_threshold=0):
107123 rel
108124 List of relations in the log
109125 """
110- ret0 = Counter ()
111- for trace in logs_traces :
112- rs = Counter (trace_skel .after (list (trace )))
113- for k in rs :
114- rs [k ] = rs [k ] * logs_traces [trace ]
115- ret0 += rs
116- first_count = Counter ()
117- for x , y in ret0 .items ():
118- first_count [x [0 ]] += y
119- ret = set (x for x , y in ret0 .items () if y >= first_count [x [0 ]] * (1.0 - noise_threshold ))
120- return ret
126+ # logs_traces: Counter mapping each trace‐tuple → frequency
127+ # First, for each A, count how many traces have A at all.
128+ traces_with_A = Counter ()
129+ # For each (trace_variant → freq), check if A appears in that variant.
130+ for trace_variant , freq in logs_traces .items ():
131+ unique_activities = set (trace_variant )
132+ for act in unique_activities :
133+ traces_with_A [act ] += freq
134+
135+ # Next, for each pair (A,B), count how many traces have B after A at least once.
136+ traces_with_A_then_B = Counter ()
137+ for trace_variant , freq in logs_traces .items ():
138+ # Build the set of all (A,B) such that B comes after A in this one variant
139+ after_pairs = set (trace_skel .after (list (trace_variant )))
140+ for (A ,B ) in after_pairs :
141+ traces_with_A_then_B [(A ,B )] += freq
142+
143+ # Finally, keep only those (A,B) with
144+ # traces_with_A_then_B[(A,B)] >= traces_with_A[A] * (1 - noise_threshold)
145+ result = set ()
146+ for (A ,B ), count_AB in traces_with_A_then_B .items ():
147+ if count_AB >= traces_with_A [A ] * (1 - noise_threshold ):
148+ result .add ((A ,B ))
149+ return result
121150
122151
123152def always_before (logs_traces , all_activs , noise_threshold = 0 ):
@@ -138,17 +167,23 @@ def always_before(logs_traces, all_activs, noise_threshold=0):
138167 rel
139168 List of relations in the log
140169 """
141- ret0 = Counter ()
142- for trace in logs_traces :
143- rs = Counter (trace_skel .before (list (trace )))
144- for k in rs :
145- rs [k ] = rs [k ] * logs_traces [trace ]
146- ret0 += rs
147- first_count = Counter ()
148- for x , y in ret0 .items ():
149- first_count [x [0 ]] += y
150- ret = set (x for x , y in ret0 .items () if y >= first_count [x [0 ]] * (1.0 - noise_threshold ))
151- return ret
170+ traces_with_B = Counter ()
171+ for trace_variant , freq in logs_traces .items ():
172+ unique_activities = set (trace_variant )
173+ for act in unique_activities :
174+ traces_with_B [act ] += freq
175+
176+ traces_with_A_then_B = Counter ()
177+ for trace_variant , freq in logs_traces .items ():
178+ before_pairs = set (trace_skel .before (list (trace_variant )))
179+ for (A ,B ) in before_pairs :
180+ traces_with_A_then_B [(A ,B )] += freq
181+
182+ result = set ()
183+ for (A ,B ), count_AB in traces_with_A_then_B .items ():
184+ if count_AB >= traces_with_B [B ] * (1 - noise_threshold ):
185+ result .add ((A ,B ))
186+ return result
152187
153188
154189def never_together (logs_traces , all_activs , len_log , noise_threshold = 0 ):
@@ -180,7 +215,11 @@ def never_together(logs_traces, all_activs, len_log, noise_threshold=0):
180215 for k in rs :
181216 rs [k ] = rs [k ] * logs_traces [trace ]
182217 ret0 -= rs
183- ret = set (x for x , y in ret0 .items () if y >= all_activs [x [0 ]] * (1.0 - noise_threshold ))
218+ ret = set (
219+ x
220+ for x , y in ret0 .items ()
221+ if y >= all_activs [x [0 ]] * (1.0 - noise_threshold )
222+ )
184223 return ret
185224
186225
@@ -208,7 +247,11 @@ def directly_follows(logs_traces, all_activs, noise_threshold=0):
208247 for k in rs :
209248 rs [k ] = rs [k ] * logs_traces [trace ]
210249 ret0 += rs
211- ret = set (x for x , y in ret0 .items () if y >= all_activs [x [0 ]] * (1.0 - noise_threshold ))
250+ ret = set (
251+ x
252+ for x , y in ret0 .items ()
253+ if y >= all_activs [x [0 ]] * (1.0 - noise_threshold )
254+ )
212255 return ret
213256
214257
@@ -244,19 +287,26 @@ def activ_freq(logs_traces, all_activs, len_log, noise_threshold=0):
244287 ret0 [act ] = Counter ()
245288 ret0 [act ][rs [act ]] += logs_traces [trace ]
246289 for act in ret0 :
247- ret0 [act ] = sorted (list ((x , y ) for x , y in ret0 [act ].items ()), key = lambda x : x [1 ], reverse = True )
290+ ret0 [act ] = sorted (
291+ list ((x , y ) for x , y in ret0 [act ].items ()),
292+ key = lambda x : x [1 ],
293+ reverse = True ,
294+ )
248295 added = 0
249296 i = 0
250297 while i < len (ret0 [act ]):
251298 added += ret0 [act ][i ][1 ]
252299 if added >= (1.0 - noise_threshold ) * len_log :
253- ret0 [act ] = ret0 [act ][:min (i + 1 , len (ret0 [act ]))]
300+ ret0 [act ] = ret0 [act ][: min (i + 1 , len (ret0 [act ]))]
254301 i = i + 1
255302 ret [act ] = set (x [0 ] for x in ret0 [act ])
256303 return ret
257304
258305
259- def apply (log : Union [EventLog , pd .DataFrame ], parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ) -> Dict [str , Any ]:
306+ def apply (
307+ log : Union [EventLog , pd .DataFrame ],
308+ parameters : Optional [Dict [Union [str , Parameters ], Any ]] = None ,
309+ ) -> Dict [str , Any ]:
260310 """
261311 Discover a log skeleton from an event log
262312
@@ -277,25 +327,50 @@ def apply(log: Union[EventLog, pd.DataFrame], parameters: Optional[Dict[Union[st
277327 if parameters is None :
278328 parameters = {}
279329
280- activity_key = exec_utils .get_param_value (Parameters .ACTIVITY_KEY , parameters , xes .DEFAULT_NAME_KEY )
281- noise_threshold = exec_utils .get_param_value (Parameters .NOISE_THRESHOLD , parameters , 0.0 )
330+ activity_key = exec_utils .get_param_value (
331+ Parameters .ACTIVITY_KEY , parameters , xes .DEFAULT_NAME_KEY
332+ )
333+ noise_threshold = exec_utils .get_param_value (
334+ Parameters .NOISE_THRESHOLD , parameters , 0.0
335+ )
282336
283337 if type (log ) is EventLog :
284338 logs_traces = Counter ([tuple (y [activity_key ] for y in x ) for x in log ])
285339 all_activs = Counter (list (y [activity_key ] for x in log for y in x ))
286340 elif pandas_utils .check_is_pandas_dataframe (log ):
287- case_id_key = exec_utils .get_param_value (Parameters .CASE_ID_KEY , parameters , CASE_CONCEPT_NAME )
341+ case_id_key = exec_utils .get_param_value (
342+ Parameters .CASE_ID_KEY , parameters , CASE_CONCEPT_NAME
343+ )
288344 all_activs = log [activity_key ].value_counts ().to_dict ()
289- logs_traces = Counter ([tuple (x ) for x in log .groupby (case_id_key )[activity_key ].agg (list ).to_dict ().values ()])
345+ logs_traces = Counter (
346+ [
347+ tuple (x )
348+ for x in log .groupby (case_id_key )[activity_key ]
349+ .agg (list )
350+ .to_dict ()
351+ .values ()
352+ ]
353+ )
290354
291355 ret = {}
292- ret [Outputs .EQUIVALENCE .value ] = equivalence (logs_traces , all_activs , noise_threshold = noise_threshold )
293- ret [Outputs .ALWAYS_AFTER .value ] = always_after (logs_traces , all_activs , noise_threshold = noise_threshold )
294- ret [Outputs .ALWAYS_BEFORE .value ] = always_before (logs_traces , all_activs , noise_threshold = noise_threshold )
295- ret [Outputs .NEVER_TOGETHER .value ] = never_together (logs_traces , all_activs , len (log ),
296- noise_threshold = noise_threshold )
297- ret [Outputs .DIRECTLY_FOLLOWS .value ] = directly_follows (logs_traces , all_activs , noise_threshold = noise_threshold )
298- ret [Outputs .ACTIV_FREQ .value ] = activ_freq (logs_traces , all_activs , len (log ), noise_threshold = noise_threshold )
356+ ret [Outputs .EQUIVALENCE .value ] = equivalence (
357+ logs_traces , all_activs , noise_threshold = noise_threshold
358+ )
359+ ret [Outputs .ALWAYS_AFTER .value ] = always_after (
360+ logs_traces , all_activs , noise_threshold = noise_threshold
361+ )
362+ ret [Outputs .ALWAYS_BEFORE .value ] = always_before (
363+ logs_traces , all_activs , noise_threshold = noise_threshold
364+ )
365+ ret [Outputs .NEVER_TOGETHER .value ] = never_together (
366+ logs_traces , all_activs , len (log ), noise_threshold = noise_threshold
367+ )
368+ ret [Outputs .DIRECTLY_FOLLOWS .value ] = directly_follows (
369+ logs_traces , all_activs , noise_threshold = noise_threshold
370+ )
371+ ret [Outputs .ACTIV_FREQ .value ] = activ_freq (
372+ logs_traces , all_activs , len (log ), noise_threshold = noise_threshold
373+ )
299374
300375 return ret
301376
@@ -342,11 +417,23 @@ def prepare_encode(log_skeleton):
342417 log_skeleton
343418 Log skeleton (with lists instead of sets)
344419 """
345- log_skeleton [Outputs .EQUIVALENCE .value ] = list (log_skeleton [Outputs .EQUIVALENCE .value ])
346- log_skeleton [Outputs .ALWAYS_AFTER .value ] = list (log_skeleton [Outputs .ALWAYS_AFTER .value ])
347- log_skeleton [Outputs .ALWAYS_BEFORE .value ] = list (log_skeleton [Outputs .ALWAYS_BEFORE .value ])
348- log_skeleton [Outputs .NEVER_TOGETHER .value ] = list (log_skeleton [Outputs .NEVER_TOGETHER .value ])
349- log_skeleton [Outputs .DIRECTLY_FOLLOWS .value ] = list (log_skeleton [Outputs .DIRECTLY_FOLLOWS .value ])
420+ log_skeleton [Outputs .EQUIVALENCE .value ] = list (
421+ log_skeleton [Outputs .EQUIVALENCE .value ]
422+ )
423+ log_skeleton [Outputs .ALWAYS_AFTER .value ] = list (
424+ log_skeleton [Outputs .ALWAYS_AFTER .value ]
425+ )
426+ log_skeleton [Outputs .ALWAYS_BEFORE .value ] = list (
427+ log_skeleton [Outputs .ALWAYS_BEFORE .value ]
428+ )
429+ log_skeleton [Outputs .NEVER_TOGETHER .value ] = list (
430+ log_skeleton [Outputs .NEVER_TOGETHER .value ]
431+ )
432+ log_skeleton [Outputs .DIRECTLY_FOLLOWS .value ] = list (
433+ log_skeleton [Outputs .DIRECTLY_FOLLOWS .value ]
434+ )
350435 for act in log_skeleton [Outputs .ACTIV_FREQ .value ]:
351- log_skeleton [Outputs .ACTIV_FREQ .value ][act ] = list (log_skeleton [Outputs .ACTIV_FREQ .value ][act ])
436+ log_skeleton [Outputs .ACTIV_FREQ .value ][act ] = list (
437+ log_skeleton [Outputs .ACTIV_FREQ .value ][act ]
438+ )
352439 return log_skeleton
0 commit comments