process-intelligence-solutions
diff --git a/‎pm4py/algo/discovery/inductive/base_case/single_activity.py‎
Lines changed: 12 additions & 2 deletions b/‎pm4py/algo/discovery/inductive/base_case/single_activity.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎pm4py/algo/discovery/inductive/cuts/sequence.py‎
Lines changed: 1 addition & 1 deletion b/‎pm4py/algo/discovery/inductive/cuts/sequence.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pm4py/objects/log/util/dataframe_utils.py‎
Lines changed: 33 additions & 9 deletions b/‎pm4py/objects/log/util/dataframe_utils.py‎
Lines changed: 33 additions & 9 deletions
@@ -24,7 +24,7 @@
     IMDataStructureUVCL,
     IMDataStructureDFG,
 )
-from pm4py.objects.process_tree.obj import ProcessTree
+from pm4py.objects.process_tree.obj import ProcessTree, Operator
 from typing import Optional, Dict, Any
 
 
@@ -76,4 +76,14 @@ def leaf(
         obj=IMDataStructureDFG,
         parameters: Optional[Dict[str, Any]] = None,
     ) -> ProcessTree:
-        return ProcessTree(label=list(obj.dfg.start_activities)[0])
+        leaf = ProcessTree(label=list(obj.dfg.start_activities)[0])
+        if obj.data_structure.skip is False:
+            return leaf
+        else:
+            tree = ProcessTree(operator=Operator.XOR)
+            skip = ProcessTree()
+            skip.parent = tree
+            leaf.parent = tree
+            tree.children.append(leaf)
+            tree.children.append(skip)
+            return tree
@@ -364,7 +364,7 @@ def project(
             z = activities_idx[b]
             j = activities_idx[a] + 1
             while j < z:
-                skippable[j] = False
+                skippable[j] = True
                 j = j + 1
 
         return [
 
@@ -47,8 +47,8 @@ class Parameters(Enum):
     CASE_ATTRIBUTES = "case_attributes"
     MANDATORY_ATTRIBUTES = "mandatory_attributes"
     MAX_NO_CASES = "max_no_cases"
-    MIN_DIFFERENT_OCC_STR_ATTR = 5
-    MAX_DIFFERENT_OCC_STR_ATTR = 50
+    MIN_DIFFERENT_OCC_STR_ATTR = "min_different_occ_str_attr"
+    MAX_DIFFERENT_OCC_STR_ATTR = "max_different_occ_str_attr"
     TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY
     ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
     PARAM_ARTIFICIAL_START_ACTIVITY = constants.PARAM_ARTIFICIAL_START_ACTIVITY
@@ -58,6 +58,7 @@ class Parameters(Enum):
     USE_EXTREMES_TIMESTAMP = "use_extremes_timestamp"
     ADD_CASE_IDENTIFIER_COLUMN = "add_case_identifier_column"
     DETERMINISTIC = "deterministic"
+    COUNT_OCCURRENCES = "count_occurrences"
 
 
 def insert_partitioning(df, num_partitions, parameters=None):
@@ -396,6 +397,7 @@ def select_string_column(
     fea_df: pd.DataFrame,
     col: str,
     case_id_key=constants.CASE_CONCEPT_NAME,
+    count_occurrences=False,
 ) -> pd.DataFrame:
     """
     Extract N columns (for N different attribute values; hotencoding) for the features dataframe for the given string attribute
@@ -410,6 +412,9 @@ def select_string_column(
         String column
     case_id_key
         Case ID key
+    count_occurrences
+        If True, count the number of occurrences of the attribute value in each case.
+        If False (default), use binary encoding (1 if present, 0 if not present)
 
     Returns
     --------------
@@ -419,18 +424,33 @@ def select_string_column(
     vals = pandas_utils.format_unique(df[col].unique())
     for val in vals:
         if val is not None:
-            filt_df_cases = pandas_utils.format_unique(
-                df[df[col] == val][case_id_key].unique()
-            )
+            # Convert value to string first to handle all data types
+            val_str = str(val)
+            # Remove non-ASCII characters and spaces for column naming
             new_col = (
                 col
                 + "_"
-                + val.encode("ascii", errors="ignore")
+                + val_str.encode("ascii", errors="ignore")
                 .decode("ascii")
                 .replace(" ", "")
             )
-            fea_df[new_col] = fea_df[case_id_key].isin(filt_df_cases)
-            fea_df[new_col] = fea_df[new_col].astype(np.float32)
+            
+            if count_occurrences:
+                # Count the number of occurrences of this value per case
+                counts = df[df[col] == val].groupby(case_id_key).size().reset_index(name='count')
+                fea_df = fea_df.merge(
+                    counts.rename(columns={'count': new_col}),
+                    on=case_id_key,
+                    how="left"
+                )
+                fea_df[new_col] = fea_df[new_col].fillna(0).astype(np.float32)
+            else:
+                # Binary encoding (original behavior)
+                filt_df_cases = pandas_utils.format_unique(
+                    df[df[col] == val][case_id_key].unique()
+                )
+                fea_df[new_col] = fea_df[case_id_key].isin(filt_df_cases)
+                fea_df[new_col] = fea_df[new_col].astype(np.float32)
     return fea_df
 
 
@@ -451,6 +471,7 @@ def get_features_df(
     parameters
         Parameters of the algorithm, including:
         - Parameters.CASE_ID_KEY: the case ID
+        - Parameters.COUNT_OCCURRENCES: if True, count occurrences of string attributes instead of binary encoding
 
     Returns
     ---------------
@@ -466,6 +487,9 @@ def get_features_df(
     add_case_identifier_column = exec_utils.get_param_value(
         Parameters.ADD_CASE_IDENTIFIER_COLUMN, parameters, False
     )
+    count_occurrences = exec_utils.get_param_value(
+        Parameters.COUNT_OCCURRENCES, parameters, False
+    )
 
     fea_df = pandas_utils.instantiate_dataframe(
         {
@@ -477,7 +501,7 @@ def get_features_df(
     for col in list_columns:
         if "obj" in str(df[col].dtype) or "str" in str(df[col].dtype):
             fea_df = select_string_column(
-                df, fea_df, col, case_id_key=case_id_key
+                df, fea_df, col, case_id_key=case_id_key, count_occurrences=count_occurrences
             )
         elif "float" in str(df[col].dtype) or "int" in str(df[col].dtype):
             fea_df = select_number_column(