move PAPNI to separate file

Edi Muskardin · Edi Muskardin · commit b6b2c94c6c28 · 2025-09-25T09:28:14.000+02:00
diff --git a/Benchmarking/papni_vs_rpni_benchmarking.py b/Benchmarking/papni_vs_rpni_benchmarking.py
@@ -144,14 +144,6 @@ def run_experiment(experiment_id,
         data += positive_seq[:5000]
         data += negative_seq[:10000 - len(data)]
 
-        # wm_negative = 0
-        # for seq, label in data:
-        #     if not label and ground_truth_model.is_balanced(seq):
-        #         wm_negative += 1
-        # print(wm_negative)
-
-        # data = get_sequences_from_active_sevpa(ground_truth_model)
-
     vpa_alphabet = ground_truth_model.get_input_alphabet()
 
     learning_data, test_data = split_data_to_learning_and_testing(data, learning_to_test_ratio=learning_to_test_ratio)
@@ -171,20 +163,10 @@ def run_experiment(experiment_id,
     comparison_results = comparison_results + [learning_set_size, num_test_size]
     return comparison_results
 
-
-def run_all_experiments_experiments(test_models, learning_to_test_ratio):
-    for idx, gt in enumerate(test_models):
-        results = run_experiment(idx, gt, num_of_learning_seq=10000, max_learning_seq_len=50,
-                                 random_data_generation=False, learning_to_test_ratio=learning_to_test_ratio)
-
-        res_str = f'GT {idx + 1}:\t Learning ({results[-2][0]}/{results[-2][1]}),\t Test ({results[-1][0]}/{results[-1][1]}),\t'
-        res_str += f'RPNI: size: {results[0]}, prec/rec/F1: {results[2]}, \t PAPNI size: {results[1]}, prec/rec/F1: {results[3]}'
-
-        print(res_str)
-
-
 def run_experiments_multiple_times(test_models, num_times, learning_to_test_ratio=0.5):
     all_results = defaultdict(list)
+    print(f'Running each experiment/model {num_times} times.')
+
     for idx, gt in enumerate(test_models):
         for _ in range(num_times):
             r = run_experiment(idx, gt, num_of_learning_seq=10000, max_learning_seq_len=50,
@@ -257,4 +239,7 @@ def test_papni_based_on_sevpa_dataset():
 
         assert in_learning + not_in_learning == balanced_counter
 
-test_papni_based_on_sevpa_dataset()
+#test_papni_based_on_sevpa_dataset()
+
+all_models = get_all_VPAs()
+run_experiments_multiple_times(all_models, num_times=2)
diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py
@@ -8,7 +8,8 @@
 from .stochastic.StochasticLStar import run_stochastic_Lstar
 from .stochastic_passive.Alergia import run_Alergia, run_JAlergia
 from .stochastic_passive.ActiveAleriga import run_active_Alergia
-from .deterministic_passive.RPNI import run_RPNI, run_PAPNI
+from .deterministic_passive.RPNI import run_RPNI
+from .deterministic_passive.PAPNI import run_PAPNI
 from .deterministic_passive.active_RPNI import run_active_RPNI
 from .general_passive.GeneralizedStateMerging import run_GSM
 from .general_passive.GsmAlgorithms import run_EDSM, run_Alergia_EDSM, run_k_tails
diff --git a/aalpy/learning_algs/deterministic_passive/PAPNI.py b/aalpy/learning_algs/deterministic_passive/PAPNI.py
@@ -0,0 +1,58 @@
+from aalpy.utils import is_balanced
+from aalpy.automata.Vpa import vpa_from_dfa_representation
+
+def run_PAPNI(data, vpa_alphabet, algorithm='edsm', print_info=True):
+    """
+    Run PAPNI, a deterministic passive model learning algorithm of deterministic pushdown automata.
+    Resulting model conforms to the provided data.
+
+    Args:
+
+        data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...]
+        vpa_alphabet:  grouping of alphabet elements to call symbols, return symbols, and internal symbols. Call symbols
+        push to stack, return symbols pop from stack, and internal symbols do not affect the stack.
+        algorithm: either 'gsm' for classic RPNI or 'edsm' for evidence driven state merging variant of RPNI
+        print_info: print learning progress and runtime information
+
+    Returns:
+
+        VPA conforming to the data, or None if data is non-deterministic.
+    """
+    from aalpy.learning_algs import run_EDSM, run_RPNI
+    assert algorithm in {'gsm', 'classic', 'edsm'}
+
+    # preprocess input sequences to keep track of stack
+    papni_data = []
+    for input_seq, label in data:
+        # if input sequance is not balanced we do not consider it (it would lead to error state anyway)
+        if not is_balanced(input_seq, vpa_alphabet):
+            continue
+
+        # for each sequance keep track of the stack, and when pop/return element is observed encode it along with the
+        # current top of stack. This keeps track of stack during execution
+        processed_sequance = []
+        stack = []
+
+        for input_symbol in input_seq:
+            input_element = input_symbol
+            # if call/push symbol push to stack
+            if input_symbol in vpa_alphabet.call_alphabet:
+                stack.append(input_symbol)
+            # if return/pop symbol pop from stack and add it to the input data
+            if input_symbol in vpa_alphabet.return_alphabet:
+                top_of_stack = stack.pop()
+                input_element = (input_symbol, top_of_stack)
+            processed_sequance.append(input_element)
+
+        papni_data.append((processed_sequance, label))
+
+    # instantiate and run PAPNI as base RPNI with stack-aware data
+    if algorithm != 'edsm':
+        learned_model = run_RPNI(papni_data, automaton_type='dfa', algorithm=algorithm, print_info=print_info)
+    else:
+        learned_model = run_EDSM(papni_data, automaton_type='dfa', print_info=print_info)
+
+    # convert intermediate DFA representation to VPA
+    learned_model = vpa_from_dfa_representation(learned_model, vpa_alphabet)
+
+    return learned_model
diff --git a/aalpy/learning_algs/deterministic_passive/RPNI.py b/aalpy/learning_algs/deterministic_passive/RPNI.py
@@ -56,62 +56,3 @@ def run_RPNI(data, automaton_type, algorithm='gsm',
     return learned_model
 
 
-def run_PAPNI(data, vpa_alphabet, algorithm='edsm', print_info=True):
-    """
-    Run PAPNI, a deterministic passive model learning algorithm of deterministic pushdown automata.
-    Resulting model conforms to the provided data.
-
-    Args:
-
-        data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...]
-        vpa_alphabet:  grouping of alphabet elements to call symbols, return symbols, and internal symbols. Call symbols
-        push to stack, return symbols pop from stack, and internal symbols do not affect the stack.
-        algorithm: either 'gsm' for classic RPNI or 'edsm' for evidence driven state merging variant of RPNI
-        GSM is much faster and less resource intensive.
-        print_info: print learning progress and runtime information
-
-    Returns:
-
-        VPA conforming to the data, or None if data is non-deterministic.
-    """
-    from aalpy.utils import is_balanced
-    from aalpy.automata.Vpa import vpa_from_dfa_representation
-    from aalpy.learning_algs import run_EDSM
-
-    assert algorithm in {'gsm', 'classic', 'edsm'}
-
-    # preprocess input sequences to keep track of stack
-    papni_data = []
-    for input_seq, label in data:
-        # if input sequance is not balanced we do not consider it (it would lead to error state anyway)
-        if not is_balanced(input_seq, vpa_alphabet):
-            continue
-
-        # for each sequance keep track of the stack, and when pop/return element is observed encode it along with the
-        # current top of stack. This keeps track of stack during execution
-        processed_sequance = []
-        stack = []
-
-        for input_symbol in input_seq:
-            input_element = input_symbol
-            # if call/push symbol push to stack
-            if input_symbol in vpa_alphabet.call_alphabet:
-                stack.append(input_symbol)
-            # if return/pop symbol pop from stack and add it to the input data
-            if input_symbol in vpa_alphabet.return_alphabet:
-                top_of_stack = stack.pop()
-                input_element = (input_symbol, top_of_stack)
-            processed_sequance.append(input_element)
-
-        papni_data.append((processed_sequance, label))
-
-    # instantiate and run PAPNI as base RPNI with stack-aware data
-    if algorithm != 'edsm':
-        learned_model = run_RPNI(papni_data, automaton_type='dfa', algorithm=algorithm, print_info=print_info)
-    else:
-        learned_model = run_EDSM(papni_data, automaton_type='dfa', print_info=print_info)
-
-    # convert intermediate DFA representation to VPA
-    learned_model = vpa_from_dfa_representation(learned_model, vpa_alphabet)
-
-    return learned_model