DES-Lab · emuskardin · Feb 6, 2025 · Apr 5, 2023 · Apr 6, 2023 · Apr 6, 2023
diff --git a/Examples.py b/Examples.py
@@ -1140,15 +1140,12 @@ def passive_vpa_learning_arithmetics():
 
 def passive_vpa_learning_on_all_benchmark_models():
     from aalpy.learning_algs import run_PAPNI
-    from aalpy.utils.BenchmarkVpaModels import get_all_VPAs
+    from aalpy.utils.BenchmarkVpaModels import vpa_L1, vpa_L12, vpa_for_odd_parentheses
     from aalpy.utils import generate_input_output_data_from_vpa, convert_i_o_traces_for_RPNI
 
-    for gt in get_all_VPAs():
-
+    for gt in [vpa_L1(), vpa_L12(), vpa_for_odd_parentheses()]:
         vpa_alphabet = gt.input_alphabet
-        data = generate_input_output_data_from_vpa(gt, num_sequances=2000, min_seq_len=1, max_seq_len=16)
-
-        data = convert_i_o_traces_for_RPNI(data)
+        data = generate_input_output_data_from_vpa(gt, num_sequances=2000, max_seq_len=16)
 
         papni = run_PAPNI(data, vpa_alphabet, algorithm='gsm', print_info=True)
 
@@ -1160,3 +1157,147 @@ def passive_vpa_learning_on_all_benchmark_models():
                 assert False, 'Papni Learned Model not consistent with data.'
 
         print('PAPNI model conforms to data.')
+
+
+def gsm_rpni():
+    from aalpy import load_automaton_from_file
+    from aalpy.utils.Sampling import get_io_traces, sample_with_length_limits
+    from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+
+    automaton = load_automaton_from_file("DotModels/car_alarm.dot", "moore")
+    input_traces = sample_with_length_limits(automaton.get_input_alphabet(), 100, 20, 30)
+    traces = get_io_traces(automaton, input_traces)
+
+    learned_model = run_GSM(traces, output_behavior="moore", transition_behavior="deterministic")
+    learned_model.visualize()
+
+
+def gsm_edsm():
+    from typing import Dict
+    from aalpy import load_automaton_from_file
+    from aalpy.utils.Sampling import get_io_traces, sample_with_length_limits
+    from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+    from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation
+    from aalpy.learning_algs.general_passive.Node import Node
+
+    automaton = load_automaton_from_file("DotModels/car_alarm.dot", "moore")
+    input_traces = sample_with_length_limits(automaton.get_input_alphabet(), 100, 20, 30)
+    traces = get_io_traces(automaton, input_traces)
+
+    def EDSM_score(part: Dict[Node, Node]):
+        nr_partitions = len(set(part.values()))
+        nr_merged = len(part)
+        return nr_merged - nr_partitions
+
+    score = ScoreCalculation(score_function=EDSM_score)
+    learned_model = run_GSM(traces, output_behavior="moore", transition_behavior="deterministic", score_calc=score)
+    learned_model.visualize()
+
+
+def gsm_likelihood_ratio():
+    from typing import Dict
+    from scipy.stats import chi2
+    from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+    from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreFunction, differential_info, ScoreCalculation
+    from aalpy.learning_algs.general_passive.Node import Node
+    from aalpy.utils.Sampling import get_io_traces, sample_with_length_limits
+    from aalpy import load_automaton_from_file
+
+    automaton = load_automaton_from_file("DotModels/MDPs/faulty_car_alarm.dot", "mdp")
+    input_traces = sample_with_length_limits(automaton.get_input_alphabet(), 2000, 20, 30)
+    traces = get_io_traces(automaton, input_traces)
+
+    def likelihood_ratio_score(alpha=0.05) -> ScoreFunction:
+        if not 0 < alpha <= 1:
+            raise ValueError(f"Confidence {alpha} not between 0 and 1")
+
+        def score_fun(part: Dict[Node, Node]):
+            llh_diff, param_diff = differential_info(part)
+            if param_diff == 0:
+                # This should cover the corner case when the partition merges only states with no outgoing transitions.
+                return -1  # Let them be very bad merges.
+            score = 1 - chi2.cdf(2 * llh_diff, param_diff)
+            if score < alpha:
+                return False
+            return score
+
+        return score_fun
+
+    score = ScoreCalculation(score_function=likelihood_ratio_score())
+    learned_model = run_GSM(traces, output_behavior="moore", transition_behavior="stochastic", score_calc=score)
+    learned_model.visualize()
+
+
+def gsm_IOAlergia_EDSM():
+    from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+    from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import hoeffding_compatibility, ScoreCalculation
+    from aalpy.learning_algs.general_passive.Node import Node
+    from aalpy.utils.Sampling import get_io_traces, sample_with_length_limits
+    from aalpy import load_automaton_from_file
+
+    automaton = load_automaton_from_file("DotModels/MDPs/faulty_car_alarm.dot", "mdp")
+    input_traces = sample_with_length_limits(automaton.get_input_alphabet(), 2000, 20, 30)
+    traces = get_io_traces(automaton, input_traces)
+
+    class IOAlergiaWithEDSM(ScoreCalculation):
+        def __init__(self, epsilon):
+            super().__init__()
+            self.ioa_compatibility = hoeffding_compatibility(epsilon)
+            self.evidence = 0
+
+        def reset(self):
+            self.evidence = 0
+
+        def local_compatibility(self, a: Node, b: Node):
+            self.evidence += 1
+            return self.ioa_compatibility(a, b)
+
+        def score_function(self, part: dict[Node, Node]):
+            return self.evidence
+
+    epsilon = 0.05
+    scores = {
+        "IOA": ScoreCalculation(hoeffding_compatibility(epsilon)),
+        "IOA+EDSM": IOAlergiaWithEDSM(epsilon),
+    }
+    for name, score in scores.items():
+        learned_model = run_GSM(traces, output_behavior="moore", transition_behavior="stochastic", score_calc=score,
+                            compatibility_on_pta=True, compatibility_on_futures=True)
+        learned_model.visualize(name)
+
+
+def gsm_IOAlergia_domain_knowldege():
+    from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+    from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import hoeffding_compatibility, ScoreCalculation
+    from aalpy.learning_algs.general_passive.Node import Node
+    from aalpy.utils.Sampling import get_io_traces, sample_with_length_limits
+    from aalpy import load_automaton_from_file
+
+    automaton = load_automaton_from_file("DotModels/MDPs/faulty_car_alarm.dot", "mdp")
+    input_traces = sample_with_length_limits(automaton.get_input_alphabet(), 2000, 20, 30)
+    traces = get_io_traces(automaton, input_traces)
+
+    ioa_compat = hoeffding_compatibility(0.05)
+
+    def get_parity(node: Node):
+        pref = node.get_prefix()
+        return [sum(in_s == key for in_s, out_s in pref) % 2 for key in ["l", "d"]]
+
+    # The car has 4 physical states arising from the combination of locked/unlocked and open/closed.
+    # Each input toggles a transition between these four states. While the car alarm system has richer behavior than that,
+    # it still needs to discern the physical states. Thus, in every sane implementation of a car alarm system, every state
+    # is associated with exactly one physical state. This additional assumption can be enforced by checking the parity of
+    # all input symbols during merging.
+    def ioa_compat_domain_knowledge(a: Node, b: Node):
+        parity = get_parity(a) == get_parity(b)
+        ioa = ioa_compat(a, b)
+        return parity and ioa
+
+    scores = {
+        "IOA": ScoreCalculation(ioa_compat),
+        "IOA+DK": ScoreCalculation(ioa_compat_domain_knowledge),
+    }
+    for name, score in scores.items():
+        learned_model = run_GSM(traces, output_behavior="moore", transition_behavior="stochastic", score_calc=score,
+                            compatibility_on_pta=True, compatibility_on_futures=True)
+        learned_model.visualize(name)
diff --git a/aalpy/__init__.py b/aalpy/__init__.py
@@ -9,6 +9,8 @@
     MealyState,
     MooreMachine,
     MooreState,
+    NDMooreMachine,
+    NDMooreState,
     Onfsm,
     OnfsmState,
     Sevpa,
@@ -37,6 +39,7 @@
     run_non_det_Lstar,
     run_RPNI,
     run_stochastic_Lstar,
+    run_GSM,
     run_PAPNI
 )
 from .oracles import (

diff --git a/aalpy/automata/NonDeterministicMooreMachine.py b/aalpy/automata/NonDeterministicMooreMachine.py
@@ -0,0 +1,67 @@
+import random
+from collections import defaultdict
+from typing import List, Dict, Generic
+
+from aalpy.base import AutomatonState, Automaton
+from aalpy.base.Automaton import OutputType, InputType
+
+
+class NDMooreState(AutomatonState, Generic[InputType, OutputType]):
+    """
+    Single state of a non-deterministic Moore machine. Each state has an output value.
+    """
+
+    def __init__(self, state_id, output=None):
+        super().__init__(state_id)
+        self.transitions: Dict[InputType, List['NDMooreState']] = defaultdict(lambda: list())
+        self.output: OutputType = output
+
+
+class NDMooreMachine(Automaton[NDMooreState[InputType, OutputType]]):
+
+    def to_state_setup(self):
+        state_setup = dict()
+
+        def set_dict_entry(state: NDMooreState):
+            state_setup[state.state_id] = (state.output,
+                                           {in_sym: [target.state_id for target in trans] for in_sym, trans in
+                                            state.transitions.items()})
+
+        set_dict_entry(self.initial_state)
+        for state in self.states:
+            if state is self.initial_state:
+                continue
+            set_dict_entry(state)
+
+    @staticmethod
+    def from_state_setup(state_setup: dict, **kwargs) -> 'NDMooreMachine':
+        states_map = {key: NDMooreState(key, output=value[0]) for key, value in state_setup.items()}
+
+        for key, values in state_setup.items():
+            source = states_map[key]
+            for i, transitions in values[1].items():
+                for node in transitions:
+                    source.transitions[i].append(states_map[node])
+
+        initial_state = states_map[list(state_setup.keys())[0]]
+        return NDMooreMachine(initial_state, list(states_map.values()))
+
+    def __init__(self, initial_state: AutomatonState, states: list):
+        super().__init__(initial_state, states)
+
+    def step(self, letter):
+        """
+        In Moore machines outputs depend on the current state.
+
+        Args:
+
+            letter: single input that is looked up in the transition function leading to a new state
+
+        Returns:
+
+            the output of the reached state
+
+        """
+        options = self.current_state.transitions[letter]
+        self.current_state = random.choice(options)
+        return self.current_state.output
diff --git a/aalpy/automata/__init__.py b/aalpy/automata/__init__.py
@@ -5,5 +5,6 @@
 from .Onfsm import Onfsm, OnfsmState
 from .StochasticMealyMachine import StochasticMealyMachine, StochasticMealyState
 from .MarkovChain import MarkovChain, McState
+from .NonDeterministicMooreMachine import NDMooreMachine, NDMooreState
 from .Sevpa import Sevpa, SevpaState, SevpaAlphabet, SevpaTransition
 from .Vpa import Vpa, VpaAlphabet, VpaState, VpaTransition
diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py
@@ -10,3 +10,4 @@
 from .stochastic_passive.ActiveAleriga import run_active_Alergia
 from .deterministic_passive.RPNI import run_RPNI, run_PAPNI
 from .deterministic_passive.active_RPNI import run_active_RPNI
+from .general_passive.GeneralizedStateMerging import run_GSM
diff --git a/aalpy/learning_algs/deterministic_passive/ClassicRPNI.py b/aalpy/learning_algs/deterministic_passive/ClassicRPNI.py
@@ -0,0 +1,129 @@
+import time
+from bisect import insort
+from aalpy.learning_algs.deterministic_passive.rpni_helper_functions import to_automaton, createPTA, \
+    check_sequence, extract_unique_sequences
+
+
+class ClassicRPNI:
+    def __init__(self, data, automaton_type, print_info=True):
+        self.data = data
+        self.automaton_type = automaton_type
+        self.print_info = print_info
+
+        pta_construction_start = time.time()
+        self.root_node = createPTA(data, automaton_type)
+        self.test_data = extract_unique_sequences(self.root_node)
+
+        if self.print_info:
+            print(f'PTA Construction Time: {round(time.time() - pta_construction_start, 2)}')
+
+    def run_rpni(self):
+        start_time = time.time()
+
+        red = [self.root_node]
+        blue = list(red[0].children.values())
+        while blue:
+            lex_min_blue = min(list(blue))
+            merged = False
+
+            for red_state in red:
+                if not self._compatible_states(red_state, lex_min_blue):
+                    continue
+                merge_candidate = self._merge(red_state, lex_min_blue, copy_nodes=True)
+                if self._compatible(merge_candidate):
+                    self._merge(red_state, lex_min_blue)
+                    merged = True
+                    break
+
+            if not merged:
+                insort(red, lex_min_blue)
+                if self.print_info:
+                    print(f'\rCurrent automaton size: {len(red)}', end="")
+
+            blue.clear()
+            for r in red:
+                for c in r.children.values():
+                    if c not in red:
+                        blue.append(c)
+
+        if self.print_info:
+            print(f'\nRPNI Learning Time: {round(time.time() - start_time, 2)}')
+            print(f'RPNI Learned {len(red)} state automaton.')
+
+        assert sorted(red, key=lambda x: len(x.prefix)) == red
+        return to_automaton(red, self.automaton_type)
+
+    def _compatible(self, root_node):
+        """
+        Check if current model is compatible with the data.
+        """
+        for sequence in self.test_data:
+            if not check_sequence(root_node, sequence, automaton_type=self.automaton_type):
+                return False
+        return True
+
+    def _compatible_states(self, red_node, blue_node):
+        """
+        Only allow merging of states that have same output(s).
+        """
+        if self.automaton_type != 'mealy':
+            # None is compatible with everything
+            return red_node.output == blue_node.output or red_node.output is None or blue_node.output is None
+        else:
+            red_io = {i: o for i, o in red_node.children.keys()}
+            blue_io = {i: o for i, o in blue_node.children.keys()}
+            for common_i in set(red_io.keys()).intersection(blue_io.keys()):
+                if red_io[common_i] != blue_io[common_i]:
+                    return False
+        return True
+
+    def _merge(self, red_node, lex_min_blue, copy_nodes=False):
+        """
+        Merge two states and return the root node of resulting model.
+        """
+        root_node = self.root_node.copy() if copy_nodes else self.root_node
+        lex_min_blue = lex_min_blue.copy() if copy_nodes else lex_min_blue
+
+        red_node_in_tree = root_node
+        for p in red_node.prefix:
+            red_node_in_tree = red_node_in_tree.children[p]
+
+        to_update = root_node
+        for p in lex_min_blue.prefix[:-1]:
+            to_update = to_update.children[p]
+
+        to_update.children[lex_min_blue.prefix[-1]] = red_node_in_tree
+
+        if self.automaton_type != 'mealy':
+            self._fold(red_node_in_tree, lex_min_blue)
+        else:
+            self._fold_mealy(red_node_in_tree, lex_min_blue)
+
+        return root_node
+
+    def _fold(self, red_node, blue_node):
+        # Change the output of red only to concrete output, ignore None
+        red_node.output = blue_node.output if blue_node.output is not None else red_node.output
+
+        for i in blue_node.children.keys():
+            if i in red_node.children.keys():
+                self._fold(red_node.children[i], blue_node.children[i])
+            else:
+                red_node.children[i] = blue_node.children[i]
+
+    def _fold_mealy(self, red_node, blue_node):
+        blue_io_map = {i: o for i, o in blue_node.children.keys()}
+
+        updated_keys = {}
+        for io, val in red_node.children.items():
+            o = blue_io_map[io[0]] if io[0] in blue_io_map.keys() else io[1]
+            updated_keys[(io[0], o)] = val
+
+        red_node.children = updated_keys
+
+        for io in blue_node.children.keys():
+            if io in red_node.children.keys():
+                self._fold_mealy(red_node.children[io], blue_node.children[io])
+            else:
+                red_node.children[io] = blue_node.children[io]
+