From 9013e96c7facf54cc8069384442b26b6be373cae Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 09:26:20 +0100
Subject: [PATCH 01/23] Improved sampling utils

---
 aalpy/utils/Sampling.py | 50 +++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 14 deletions(-)

diff --git a/aalpy/utils/Sampling.py b/aalpy/utils/Sampling.py
index 14c60e36001..ba49b81ee85 100644
--- a/aalpy/utils/Sampling.py
+++ b/aalpy/utils/Sampling.py
@@ -1,13 +1,48 @@
+from functools import wraps
 from random import randint, choices, random
 
 from aalpy import MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain
 from aalpy.base import Automaton, DeterministicAutomaton
 
+def get_io_traces(automaton: Automaton, input_traces: list) -> list:
+    moore_automata = (MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain)
+    is_moore = isinstance(automaton, moore_automata)
 
+    traces = []
+    for input_trace in input_traces:
+        output_trace = automaton.execute_sequence(automaton.initial_state, input_trace)
+        trace = list(zip(input_trace, output_trace))
+        if is_moore:
+            trace = [automaton.initial_state.output] + trace
+        traces.append(trace)
+    return traces
+
+
+def support_automaton_arg(require_transform):
+    def decorator(f):
+        @wraps(f)
+        def inner(alphabet, *args, include_outputs=False, **kwargs):
+            automaton = None
+            if isinstance(alphabet, Automaton):
+                automaton = alphabet
+                if require_transform:
+                    alphabet = alphabet.get_input_alphabet()
+            traces = f(alphabet, *args, **kwargs)
+            if include_outputs:
+                if automaton is None:
+                    raise ValueError("automaton must be provided")
+                traces = get_io_traces(automaton, traces)
+            return traces
+        return inner
+    return decorator
+
+
+@support_automaton_arg(True)
 def sample_with_length_limits(alphabet, nr_samples, min_len, max_len):
     return [choices(alphabet, k = randint(min_len, max_len)) for _ in range(nr_samples)]
 
 
+@support_automaton_arg(True)
 def sample_with_term_prob(alphabet, nr_samples, term_prob):
     ret = []
     for _ in range(nr_samples):
@@ -18,23 +53,10 @@ def sample_with_term_prob(alphabet, nr_samples, term_prob):
     return ret
 
 
+@support_automaton_arg(False)
 def get_complete_sample(automaton: DeterministicAutomaton):
     alphabet = automaton.get_input_alphabet()
     automaton.compute_prefixes()
     char_set = automaton.compute_characterization_set()
     infixes = [(x,) for x in alphabet] + [tuple()]
     return [state.prefix + infix + suffix for state in automaton.states for suffix in char_set for infix in infixes]
-
-
-def get_io_traces(automaton: Automaton, input_traces: list) -> list:
-    moore_automata = (MooreMachine, NDMooreMachine, Mdp, MarkovChain)
-    is_moore = isinstance(automaton, moore_automata)
-
-    traces = []
-    for input_trace in input_traces:
-        output_trace = automaton.execute_sequence(automaton.initial_state, input_trace)
-        trace = list(zip(input_trace, output_trace))
-        if is_moore:
-            trace = [automaton.initial_state.output] + trace
-        traces.append(trace)
-    return traces

From 1139d21d8a5b72f368d930474e234b7a332ecae1 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 10:02:24 +0100
Subject: [PATCH 02/23] Fixed compatibility issue and rectified red_state order

---
 .../general_passive/GeneralizedStateMerging.py  | 17 ++++++++++++-----
 aalpy/learning_algs/general_passive/Node.py     |  5 +++++
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 2370aed9a16..010675d94b1 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -4,7 +4,7 @@
 from collections import deque
 
 from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \
-    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator
+    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders
 from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility
 
 
@@ -70,8 +70,11 @@ def __init__(self, *,
         self.score_calc: ScoreCalculation = score_calc
 
         if node_order is None:
-            node_order = Node.__lt__
-        self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1)
+            node_order = NodeOrders.Default
+        if node_order is NodeOrders.NoCompare or node_order is NodeOrders.Default:
+            self.node_order = node_order
+        else:
+            self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1)
 
         self.pta_preprocessing = pta_preprocessing or (lambda x: x)
         self.postprocessing = postprocessing or (lambda x: x)
@@ -128,7 +131,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
             # no blue states left -> done
             if len(blue_states) == 0:
                 break
-            blue_states.sort(key=self.node_order)
+            if self.node_order is not NodeOrders.NoCompare:
+                blue_states.sort(key=self.node_order)
+                # red states are always sorted using default order on original prefix
+                if self.node_order is not NodeOrders.Default:
+                    red_states.sort(key=self.node_order)
 
             # loop over blue states
             promotion = False
@@ -158,7 +165,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
 
                 # no merge candidates for this blue state -> promote
                 if all(part.score is False for part in current_candidates.values()):
-                    insort(red_states, blue_state, key=self.node_order)
+                    red_states.append(blue_state)
                     instrumentation.log_promote(blue_state)
                     promotion = True
                     break
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index be663be7b2e..91f099f19e5 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -1,3 +1,4 @@
+import functools
 import math
 import pathlib
 from functools import total_ordering
@@ -402,3 +403,7 @@ def local_log_likelihood_contribution(self):
 
     def count(self):
         return sum(trans.count for _, trans in self.transition_iterator())
+
+class NodeOrders:
+    NoCompare = lambda n: 0
+    Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1)
\ No newline at end of file

From e7868156c3397c79ee84d10a83740f925897d6f0 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 10:31:23 +0100
Subject: [PATCH 03/23] deleted unnecessary definition of eq and hash for Node
 class

---
 aalpy/learning_algs/general_passive/Node.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 91f099f19e5..197adadf3a0 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -110,12 +110,6 @@ def __lt__(self, other, compare_length_only=False):
         except TypeError:
             return [str(x) for x in own_p] < [str(x) for x in other_p]
 
-    def __eq__(self, other):
-        return self is other  # TODO hack, does this lead to problems down the line?
-
-    def __hash__(self):
-        return id(self)  # TODO This is a hack
-
     # TODO implicit prefixes as currently implemented require O(length) time for prefix calculations (e.g. to determine the minimal blue node)
     # other options would be to have more efficient explicit prefixes such as shared list representations
     def get_prefix_length(self):

From 50850539dc2469745eaa8e63584c8a25b8cb0031 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 10:37:29 +0100
Subject: [PATCH 04/23] fixed car alarm model

---
 DotModels/car_alarm.dot | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/DotModels/car_alarm.dot b/DotModels/car_alarm.dot
index 7cb052e4b35..bc5829d710a 100644
--- a/DotModels/car_alarm.dot
+++ b/DotModels/car_alarm.dot
@@ -5,22 +5,18 @@ q3_locked_closed [label="A"];
 q5_unlocked_closed [label="N"];
 q6_unlocked_open [label="N"];
 q7_locked_open [label="N"];
-q4_faulty [label="N"];
-q1_locked_closed -> q2_locked_open  [label="d:1"];
-q1_locked_closed -> q5_unlocked_closed  [label="l:1"];
-q2_locked_open -> q3_locked_closed  [label="d:1"];
-q2_locked_open -> q6_unlocked_open  [label="l:1"];
-q3_locked_closed -> q2_locked_open  [label="d:1"];
-q3_locked_closed -> q5_unlocked_closed  [label="l:1"];
-q5_unlocked_closed -> q6_unlocked_open  [label="d:1"];
-q5_unlocked_closed -> q1_locked_closed  [label="l:1"];
-q6_unlocked_open -> q5_unlocked_closed  [label="d:1"];
-q6_unlocked_open -> q7_locked_open  [label="l:1"];
-q7_locked_open -> q4_faulty  [label="d:1"];
-q7_locked_open -> q6_unlocked_open  [label="l:1"];
-q4_faulty -> q2_locked_open  [label="d:0.9"];
-q4_faulty -> q7_locked_open  [label="d:0.1"];
-q4_faulty -> q5_unlocked_closed  [label="l:1"];
+q1_locked_closed -> q2_locked_open  [label="d"];
+q1_locked_closed -> q5_unlocked_closed  [label="l"];
+q2_locked_open -> q3_locked_closed  [label="d"];
+q2_locked_open -> q6_unlocked_open  [label="l"];
+q3_locked_closed -> q2_locked_open  [label="d"];
+q3_locked_closed -> q5_unlocked_closed  [label="l"];
+q5_unlocked_closed -> q6_unlocked_open  [label="d"];
+q5_unlocked_closed -> q1_locked_closed  [label="l"];
+q6_unlocked_open -> q5_unlocked_closed  [label="d"];
+q6_unlocked_open -> q7_locked_open  [label="l"];
+q7_locked_open -> q1_locked_closed  [label="d"];
+q7_locked_open -> q6_unlocked_open  [label="l"];
 __start0 [label="", shape=none];
 __start0 -> q1_locked_closed  [label=""];
 }

From c6f8a4bd2f7de31a23667dcd710f8996c4ef089a Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 10:55:42 +0100
Subject: [PATCH 05/23] check compatibility with target type on
 Node.to_automaton()

---
 aalpy/learning_algs/general_passive/Node.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 197adadf3a0..0e3f75e2a94 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -191,7 +191,7 @@ def is_tree(self):
         return True
 
     def to_automaton(self, output_behavior: OutputBehavior, transition_behavior: TransitionBehavior,
-                     check_behavior=False, set_prefix=False) -> Automaton:
+                     check_behavior=True, set_prefix=False) -> Automaton:
         nodes = self.get_all_nodes()
 
         if check_behavior:

From 3c50afbfce8be8dcc6177357d84bc28552bb5d27 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 11:01:49 +0100
Subject: [PATCH 06/23] clarified error messages of GSM

---
 .../learning_algs/general_passive/GeneralizedStateMerging.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 010675d94b1..3adc00a1a03 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -54,10 +54,10 @@ def __init__(self, *,
                  depth_first=False):
 
         if output_behavior not in OutputBehaviorRange:
-            raise ValueError(f"invalid output behavior {output_behavior}")
+            raise ValueError(f"invalid output behavior {output_behavior}. should be in {OutputBehaviorRange}")
         self.output_behavior: OutputBehavior = output_behavior
         if transition_behavior not in TransitionBehaviorRange:
-            raise ValueError(f"invalid transition behavior {transition_behavior}")
+            raise ValueError(f"invalid transition behavior {transition_behavior}. should be in {TransitionBehaviorRange}")
         self.transition_behavior: TransitionBehavior = transition_behavior
 
         if score_calc is None:

From c7bc17189d443f6cac72cc5a3233dd37985f8ed2 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 16:58:19 +0100
Subject: [PATCH 07/23] cosmetics

---
 .../general_passive/GeneralizedStateMerging.py              | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 3adc00a1a03..9b8218825f0 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -146,7 +146,6 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
                 # calculate partitions resulting from merges with red states if necessary
                 current_candidates: Dict[Node, Partitioning] = dict()
                 perfect_partitioning = None
-
                 red_state = None
                 for red_state in red_states:
                     partition = partition_candidates.get((red_state, blue_state))
@@ -156,8 +155,8 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
                         perfect_partitioning = partition
                         break
                     current_candidates[red_state] = partition
-
                 assert red_state is not None
+
                 # partition with perfect score found: don't consider anything else
                 if perfect_partitioning:
                     partition_candidates = {(red_state, blue_state): perfect_partitioning}
@@ -254,9 +253,9 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
         blue_in_sym, blue_out_sym = blue.prefix_access_pair
         blue_parent.transitions[blue_in_sym][blue_out_sym].target = red
 
+        # loop over implied merges
         q: deque[Tuple[Node, Node]] = deque([(red, blue)])
         pop = q.pop if self.depth_first else q.popleft
-
         while len(q) != 0:
             red, blue = pop()
             partition = update_partition(red, blue)
@@ -265,6 +264,7 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
                 if self.compute_local_compatibility(partition, blue) is False:
                     return partitioning
 
+            # create implied merges for all common successors
             for in_sym, blue_transitions in blue.transitions.items():
                 partition_transitions = partition.get_or_create_transitions(in_sym)
                 for out_sym, blue_transition in blue_transitions.items():

From 2f8cab8ab9a6f0df39b3b1d51a76a2b00d7de737 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Mon, 10 Feb 2025 17:01:02 +0100
Subject: [PATCH 08/23] Added support for learning from examples (mealy only)

---
 .../GeneralizedStateMerging.py                |  28 ++++-
 aalpy/learning_algs/general_passive/Node.py   | 103 ++++++++++++++----
 2 files changed, 105 insertions(+), 26 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 9b8218825f0..99de117da78 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -4,7 +4,7 @@
 from collections import deque
 
 from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \
-    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders
+    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output
 from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility
 
 
@@ -94,7 +94,7 @@ def compute_local_compatibility(self, a: Node, b: Node):
 
     # TODO: make more generic by adding the option to use a different algorithm than red blue
     #  for selecting potential merge candidates. Maybe using inheritance with abstract `run`.
-    def run(self, data, convert=True, instrumentation: Instrumentation = None):
+    def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format="auto"):
         if instrumentation is None:
             instrumentation = Instrumentation()
         instrumentation.reset(self)
@@ -102,7 +102,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
         if isinstance(data, Node):
             root = data
         else:
-            root = Node.createPTA(data, self.output_behavior)
+            root = Node.createPTA(data, self.output_behavior, data_format)
 
         root = self.pta_preprocessing(root)
         instrumentation.pta_construction_done(root)
@@ -182,6 +182,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
             best_candidate = max(partition_candidates.values(), key=lambda part: part.score)
             for real_node, partition_node in best_candidate.red_mapping.items():
                 real_node.transitions = partition_node.transitions
+                real_node.prefix_access_pair = partition_node.prefix_access_pair
                 for access_pair, t_info in real_node.transition_iterator():
                     if t_info.target not in red_states:
                         t_info.target.predecessor = real_node
@@ -269,6 +270,20 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
                 partition_transitions = partition.get_or_create_transitions(in_sym)
                 for out_sym, blue_transition in blue_transitions.items():
                     partition_transition = partition_transitions.get(out_sym)
+                    # handle unknown output
+                    if partition_transition is None:
+                        if out_sym is unknown_output and len(partition_transitions) != 0:
+                            assert len(partition_transitions) == 1
+                            partition_transition = list(partition_transitions.values())[0]
+                        if unknown_output in partition_transitions:
+                            partition_transition = partition_transitions.pop(unknown_output)
+                            partition_transitions[out_sym] = partition_transition
+                            # re-hook access pair
+                            succ_part = update_partition(partition_transition.target, None)
+                            succ_pre_part = update_partition(succ_part.predecessor, None)
+                            if self.output_behavior == "moore" or succ_pre_part is partition:
+                                succ_part.prefix_access_pair = (succ_part.prefix_access_pair[0], out_sym)
+                    # add pairs
                     if partition_transition is not None:
                         q.append((partition_transition.target, blue_transition.target))
                         partition_transition.count += blue_transition.count
@@ -294,6 +309,7 @@ def run_GSM(data, *,
             depth_first=False,
             instrumentation=None,
             convert=True,
+            data_format="auto",
             ):
     """
     TODO
@@ -325,12 +341,14 @@ def run_GSM(data, *,
 
         convert:
 
+        data_format:
+
 
     Returns:
 
 
     """
-    # instantiate the gsm
+    # instantiate gsm
     gsm = GeneralizedStateMerging(
         output_behavior=output_behavior,
         transition_behavior=transition_behavior,
@@ -345,4 +363,4 @@ def run_GSM(data, *,
     )
 
     # run the algorithm
-    return gsm.run(data=data, instrumentation=instrumentation, convert=convert)
+    return gsm.run(data=data, instrumentation=instrumentation, convert=convert, data_format=data_format)
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 0e3f75e2a94..4c2882df914 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -2,7 +2,7 @@
 import math
 import pathlib
 from functools import total_ordering
-from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional
+from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional, Sequence
 import pydot
 from copy import copy
 
@@ -19,13 +19,18 @@
 TransitionBehavior = str
 TransitionBehaviorRange = ["deterministic", "nondeterministic", "stochastic"]
 
+DataFormat = str
+DataFormatRange = ["auto", "traces", "examples"]
+
 IOPair = Tuple[Any, Any]
-IOTrace = List[IOPair]
-IOExample = Tuple[Iterable[Any], Any]
+IOTrace = Sequence[IOPair]
+IOExample = Tuple[Sequence[Any], Any]
 
 StateFunction = Callable[['Node'], str]
 TransitionFunction = Callable[['Node', Any, Any], str]
 
+unknown_output = object()
+
 
 def generate_values(base: list, step: Callable, backing_set=True):
     if backing_set:
@@ -65,6 +70,21 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) ->
         a_val = a.get(key, default)
         yield key, a_val, b_val
 
+# TODO maybe reuse this in classic RPNI
+def detect_data_format(data):
+    if not isinstance(data, Sequence):
+        raise ValueError("wrong input format. expected sequence type.")
+    for data_point in data:
+        if len(data_point) != 2:
+            return "traces"
+        o1, o2 = data_point
+        if not isinstance(o1, Sequence):
+            return "traces"
+        if not isinstance(o2, Sequence):
+            return "examples"
+    if len(data) == 0:
+        return "traces"
+    raise ValueError("ambiguous data format. data format needs to be specified explicitly.")
 
 # TODO maybe split this for maintainability (and perfomance?)
 class TransitionInfo:
@@ -327,13 +347,10 @@ def node_naming(node: Node):
             file_ext = 'dot'
         graph.write(path=str(path) + "." + file_ext, prog=engine, format=format)
 
-    def add_data(self, data):
-        for seq in data:
-            self.add_trace(seq)
 
-    def add_trace(self, data: IOTrace):
+    def add_trace(self, trace: IOTrace):
         curr_node: Node = self
-        for in_sym, out_sym in data:
+        for in_sym, out_sym in trace:
             transitions = curr_node.get_or_create_transitions(in_sym)
             info = transitions.get(out_sym)
             if info is None:
@@ -345,19 +362,59 @@ def add_trace(self, data: IOTrace):
                 node = info.target
             curr_node = node
 
-    def add_example(self, data: IOExample):
-        # TODO add support for example based algorithms
-        raise NotImplementedError()
+    def add_example(self, example: IOExample):
+        inputs, output = example
+        curr_node: Node = self
+        in_sym = None
+
+        # step through inputs and add transitions
+        for in_sym in inputs:
+            transitions = curr_node.get_or_create_transitions(in_sym)
+            t_infos = list(transitions.values())
+            if len(t_infos) == 0:
+                node = Node((in_sym, unknown_output), curr_node)
+                t_info = TransitionInfo(node, 1, node, 1)
+                transitions[unknown_output] = t_info
+            elif len(t_infos) == 1:
+                t_info = t_infos[0]
+                t_info.count += 1
+                t_info.original_count += 1
+                node = t_info.target
+            else:
+                # This should never happen
+                raise ValueError("nondeterminism encountered for GSM with examples. not supported")
+            curr_node = node
+
+        # set last output
+        curr_node.prefix_access_pair = (curr_node.prefix_access_pair[0], output)
+        pred = curr_node.predecessor
+        if pred:
+            transitions = pred.transitions[in_sym]
+            if unknown_output in transitions:
+                transitions[output] = transitions[unknown_output]
+                del transitions[unknown_output]
+            if output not in transitions:
+                raise ValueError("nondeterminism encountered for GSM with examples. not supported")
+
 
     @staticmethod
-    def createPTA(data, output_behavior) -> 'Node':
-        if output_behavior == "moore":
-            initial_output = data[0][0]
-            data = (d[1:] for d in data)
-        else:
-            initial_output = None
-        root_node = Node((None, initial_output), None)
-        root_node.add_data(data)
+    def createPTA(data, output_behavior, data_format="auto") -> 'Node':
+        if data_format not in DataFormatRange:
+            raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}")
+        if data_format == "auto":
+            data_format = detect_data_format(data)
+
+        root_node = Node((None, None), None)
+        if data_format == "examples":
+            for example in data:
+                root_node.add_example(example)
+        if data_format == "traces":
+            if output_behavior == "moore":
+                initial_output = data[0][0]
+                root_node.prefix_access_pair = (None, initial_output)
+                data = (d[1:] for d in data)
+            for trace in data:
+                root_node.add_trace(trace)
         return root_node
 
     def is_locally_deterministic(self):
@@ -367,8 +424,12 @@ def is_deterministic(self):
         return all(node.is_locally_deterministic() for node in self.get_all_nodes())
 
     def deterministic_compatible(self, other: 'Node'):
-        common_keys = filter(lambda key: key in self.transitions.keys(), other.transitions.keys())
-        return all(list(self.transitions[key].keys()) == list(other.transitions[key].keys()) for key in common_keys)
+        for _, trans_self, trans_other in intersection_iterator(self.transitions, other.transitions):
+            if unknown_output in trans_self or unknown_output in trans_other:
+                continue
+            if list(trans_self.keys()) != list(trans_other.keys()):
+                return False
+        return True
 
     def is_moore(self):
         output_dict = dict()

From a1475dd57034d6cf1c278a8afd37fb59c16753ef Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Tue, 11 Feb 2025 11:40:50 +0100
Subject: [PATCH 09/23] fixed wrong initial output + better messages

---
 aalpy/learning_algs/general_passive/Node.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 4c2882df914..1fe540eb461 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -29,8 +29,17 @@
 StateFunction = Callable[['Node'], str]
 TransitionFunction = Callable[['Node', Any, Any], str]
 
-unknown_output = object()
+class SpecialValue:
+    def __init__(self, value):
+        self.value = value
 
+    def __str__(self):
+        return str(self.value)
+
+    def __repr__(self):
+        return str(self.value)
+
+unknown_output = SpecialValue("Output Unknown")
 
 def generate_values(base: list, step: Callable, backing_set=True):
     if backing_set:
@@ -404,7 +413,7 @@ def createPTA(data, output_behavior, data_format="auto") -> 'Node':
         if data_format == "auto":
             data_format = detect_data_format(data)
 
-        root_node = Node((None, None), None)
+        root_node = Node((None, unknown_output), None)
         if data_format == "examples":
             for example in data:
                 root_node.add_example(example)

From 691e0ae0d6a9c84315c6ebff0bae428a952610ff Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Tue, 11 Feb 2025 13:22:12 +0100
Subject: [PATCH 10/23] adapt moore checks for unknown output

---
 aalpy/learning_algs/general_passive/Node.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 1fe540eb461..3cb5fb7f9ec 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -441,18 +441,17 @@ def deterministic_compatible(self, other: 'Node'):
         return True
 
     def is_moore(self):
-        output_dict = dict()
         for node in self.get_all_nodes():
             for (in_sym, out_sym), transition in node.transition_iterator():
-                child = transition.target
-                if child in output_dict.keys() and output_dict[child] != out_sym:
+                child_output = transition.target.get_prefix_output()
+                if out_sym is not unknown_output and child_output != out_sym:
                     return False
-                else:
-                    output_dict[child] = out_sym
         return True
 
     def moore_compatible(self, other: 'Node'):
-        return self.get_prefix_output() == other.get_prefix_output()
+        so = self.get_prefix_output()
+        oo = other.get_prefix_output()
+        return so == oo or so is unknown_output or oo is unknown_output
 
     def local_log_likelihood_contribution(self):
         llc = 0

From 2bdf572c93815bfcf87886bc11f520cf27b28e77 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Tue, 11 Feb 2025 14:01:01 +0100
Subject: [PATCH 11/23] fix corner case for unknown outputs with moore behavior

---
 .../general_passive/GeneralizedStateMerging.py        | 11 +++++++----
 aalpy/learning_algs/general_passive/Node.py           |  5 ++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 99de117da78..a339198b6bc 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -186,7 +186,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation=None, data_fo
                 for access_pair, t_info in real_node.transition_iterator():
                     if t_info.target not in red_states:
                         t_info.target.predecessor = real_node
-                        t_info.target.prefix_access_pair = access_pair  # not sure whether this is actually required
+                        # t_info.target.prefix_access_pair = access_pair  # not sure whether this is actually required
             instrumentation.log_merge(best_candidate)
             # FUTURE: optimizations for compatibility tests where merges can be orthogonal
             # FUTURE: caching for aggregating compatibility tests
@@ -254,6 +254,10 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
         blue_in_sym, blue_out_sym = blue.prefix_access_pair
         blue_parent.transitions[blue_in_sym][blue_out_sym].target = red
 
+        if blue_out_sym is not unknown_output and self.output_behavior == "moore":
+            partition = update_partition(red, None)
+            partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym)
+
         # loop over implied merges
         q: deque[Tuple[Node, Node]] = deque([(red, blue)])
         pop = q.pop if self.depth_first else q.popleft
@@ -280,9 +284,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
                             partition_transitions[out_sym] = partition_transition
                             # re-hook access pair
                             succ_part = update_partition(partition_transition.target, None)
-                            succ_pre_part = update_partition(succ_part.predecessor, None)
-                            if self.output_behavior == "moore" or succ_pre_part is partition:
-                                succ_part.prefix_access_pair = (succ_part.prefix_access_pair[0], out_sym)
+                            if self.output_behavior == "moore" or succ_part.predecessor is red:
+                                succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym)
                     # add pairs
                     if partition_transition is not None:
                         q.append((partition_transition.target, blue_transition.target))
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 3cb5fb7f9ec..19c7eda3547 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -152,6 +152,9 @@ def get_prefix_length(self):
     def get_prefix_output(self):
         return self.prefix_access_pair[1]
 
+    def get_prefix_input(self):
+        return self.prefix_access_pair[0]
+
     def get_prefix(self, include_output=True):
         node = self
         prefix = []
@@ -395,7 +398,7 @@ def add_example(self, example: IOExample):
             curr_node = node
 
         # set last output
-        curr_node.prefix_access_pair = (curr_node.prefix_access_pair[0], output)
+        curr_node.prefix_access_pair = (curr_node.get_prefix_input(), output)
         pred = curr_node.predecessor
         if pred:
             transitions = pred.transitions[in_sym]

From d72cd825a87f793e8d9ce933c7b38bc56eff1e31 Mon Sep 17 00:00:00 2001
From: Edi Muskardin <edi.muskardin@silicon-austria.com>
Date: Tue, 11 Feb 2025 16:02:03 +0100
Subject: [PATCH 12/23] update auto-format detection

---
 aalpy/learning_algs/general_passive/Node.py | 34 +++++++++++++--------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 19c7eda3547..23698570011 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -29,6 +29,7 @@
 StateFunction = Callable[['Node'], str]
 TransitionFunction = Callable[['Node', Any, Any], str]
 
+
 class SpecialValue:
     def __init__(self, value):
         self.value = value
@@ -39,8 +40,10 @@ def __str__(self):
     def __repr__(self):
         return str(self.value)
 
+
 unknown_output = SpecialValue("Output Unknown")
 
+
 def generate_values(base: list, step: Callable, backing_set=True):
     if backing_set:
         result = list(base)
@@ -79,21 +82,29 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) ->
         a_val = a.get(key, default)
         yield key, a_val, b_val
 
+
 # TODO maybe reuse this in classic RPNI
 def detect_data_format(data):
     if not isinstance(data, Sequence):
         raise ValueError("wrong input format. expected sequence type.")
-    for data_point in data:
-        if len(data_point) != 2:
-            return "traces"
-        o1, o2 = data_point
-        if not isinstance(o1, Sequence):
-            return "traces"
-        if not isinstance(o2, Sequence):
-            return "examples"
     if len(data) == 0:
         return "traces"
-    raise ValueError("ambiguous data format. data format needs to be specified explicitly.")
+
+    detected_format = None
+    for data_point in data:
+
+        if len(data_point) == 2 and isinstance(data_point[0], Sequence):
+            data_point_format = 'examples'
+        else:
+            data_point_format = 'traces'
+
+        if detected_format is None:
+            detected_format = data_point_format
+        elif data_point_format != detected_format:
+            raise ValueError("ambiguous data format. data format needs to be specified explicitly.")
+
+    return detected_format
+
 
 # TODO maybe split this for maintainability (and perfomance?)
 class TransitionInfo:
@@ -359,7 +370,6 @@ def node_naming(node: Node):
             file_ext = 'dot'
         graph.write(path=str(path) + "." + file_ext, prog=engine, format=format)
 
-
     def add_trace(self, trace: IOTrace):
         curr_node: Node = self
         for in_sym, out_sym in trace:
@@ -408,7 +418,6 @@ def add_example(self, example: IOExample):
             if output not in transitions:
                 raise ValueError("nondeterminism encountered for GSM with examples. not supported")
 
-
     @staticmethod
     def createPTA(data, output_behavior, data_format="auto") -> 'Node':
         if data_format not in DataFormatRange:
@@ -470,6 +479,7 @@ def local_log_likelihood_contribution(self):
     def count(self):
         return sum(trans.count for _, trans in self.transition_iterator())
 
+
 class NodeOrders:
     NoCompare = lambda n: 0
-    Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1)
\ No newline at end of file
+    Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1)

From 3e5b8b0b3164aa228b25245e4f9f3f3e830ab42c Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Tue, 11 Feb 2025 17:24:34 +0100
Subject: [PATCH 13/23] fixed GSM bug due to unknown outputs

---
 .../learning_algs/general_passive/GeneralizedStateMerging.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index a339198b6bc..7a6767b81f0 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -254,8 +254,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
         blue_in_sym, blue_out_sym = blue.prefix_access_pair
         blue_parent.transitions[blue_in_sym][blue_out_sym].target = red
 
-        if blue_out_sym is not unknown_output and self.output_behavior == "moore":
-            partition = update_partition(red, None)
+        partition = update_partition(red, None)
+        if partition.get_prefix_output() is unknown_output and self.output_behavior == "moore":
             partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym)
 
         # loop over implied merges

From 4c41e6b0cde7a089b7e1f078fbc2948376123728 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 08:40:55 +0100
Subject: [PATCH 14/23] update auto-format detection

---
 aalpy/learning_algs/general_passive/Node.py | 43 +++++++++++++--------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 23698570011..2df264fc3f6 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -84,26 +84,35 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) ->
 
 
 # TODO maybe reuse this in classic RPNI
-def detect_data_format(data):
-    if not isinstance(data, Sequence):
-        raise ValueError("wrong input format. expected sequence type.")
+def detect_data_format(data, check_consistency=True):
+    accepted_types = (Tuple, List)
+    data_format = None
+    def check_data_format(value):
+        if data_format is None or data_format == value:
+            return value
+        raise ValueError("inconsistent data")
+
+    if not isinstance(data, accepted_types):
+        raise ValueError("wrong input format. expected tuple or list.")
     if len(data) == 0:
         return "traces"
-
-    detected_format = None
     for data_point in data:
-
-        if len(data_point) == 2 and isinstance(data_point[0], Sequence):
-            data_point_format = 'examples'
-        else:
-            data_point_format = 'traces'
-
-        if detected_format is None:
-            detected_format = data_point_format
-        elif data_point_format != detected_format:
-            raise ValueError("ambiguous data format. data format needs to be specified explicitly.")
-
-    return detected_format
+        if len(data_point) != 2:
+            data_format = check_data_format("traces")
+            if not check_consistency:
+                return data_format
+        o1, o2 = data_point
+        if not isinstance(o1, accepted_types):
+            data_format = check_data_format("traces")
+            if not check_consistency:
+                return data_format
+        if not isinstance(o2, accepted_types):
+            data_format = check_data_format("examples")
+            if not check_consistency:
+                return data_format
+    if data_format is None:
+        raise ValueError("ambiguous data format. data format needs to be specified explicitly.")
+    return data_format
 
 
 # TODO maybe split this for maintainability (and perfomance?)

From 47f2084ff63d3cf9e7820df7a9f406c3f0d52d52 Mon Sep 17 00:00:00 2001
From: Edi Muskardin <edi.muskardin@silicon-austria.com>
Date: Wed, 12 Feb 2025 09:37:13 +0100
Subject: [PATCH 15/23] update random data generation

---
 aalpy/utils/HelperFunctions.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py
index 54e3cd2ecdb..76ac07c8727 100644
--- a/aalpy/utils/HelperFunctions.py
+++ b/aalpy/utils/HelperFunctions.py
@@ -301,7 +301,7 @@ def convert_i_o_traces_for_RPNI(sequences, automaton_type="mealy"):
 
     for s in sequences:
         if automaton_type in ["moore", "dfa"]:
-            rpni_sequences.add((tuple(),s[0]))
+            rpni_sequences.add((tuple(), s[0]))
             s = s[1:]
         for i in range(len(s)):
             inputs = tuple([io[0] for io in s[:i + 1]])
@@ -349,11 +349,14 @@ def is_balanced(input_seq, vpa_alphabet):
     return counter == 0
 
 
-def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16):
+def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16,
+                                             sequance_type='single_output'):
+    assert sequance_type in {'io_trace', 'single_output'}
 
     alphabet = model.get_input_alphabet()
-    input_output_sequances = []
-    while len(input_output_sequances) < num_sequances:
+    dataset = []
+
+    while len(dataset) < num_sequances:
         sequance = []
         for _ in range(random.randint(min_seq_len, max_seq_len)):
             sequance.append(random.choice(alphabet))
@@ -361,9 +364,12 @@ def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_
         model.reset_to_initial()
         outputs = model.execute_sequence(model.initial_state, sequance)
 
-        input_output_sequances.append(list(zip(sequance, outputs)))
+        if sequance_type == 'io_trace':
+            dataset.append(list(zip(sequance, outputs)))
+        else:
+            dataset.append((sequance, outputs[-1]))
 
-    return input_output_sequances
+    return dataset
 
 
 def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16, max_attempts=None):
@@ -396,7 +402,7 @@ def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16,
             sequance += (chosen_input,)
 
             output = vpa.step(chosen_input)
-            #if vpa.is_balanced(sequance):
+            # if vpa.is_balanced(sequance):
             data_set.add((sequance, output))
 
     data_set = list(data_set)

From 85e27d2404c54a67ebad5c3703ddc2f9bb0c7336 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 09:54:05 +0100
Subject: [PATCH 16/23] one more fix for GSM + Examples

---
 aalpy/learning_algs/general_passive/GeneralizedStateMerging.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 7a6767b81f0..2113da3883e 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -280,11 +280,12 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
                             assert len(partition_transitions) == 1
                             partition_transition = list(partition_transitions.values())[0]
                         if unknown_output in partition_transitions:
+                            assert len(partition_transitions) == 1
                             partition_transition = partition_transitions.pop(unknown_output)
                             partition_transitions[out_sym] = partition_transition
                             # re-hook access pair
                             succ_part = update_partition(partition_transition.target, None)
-                            if self.output_behavior == "moore" or succ_part.predecessor is red:
+                            if succ_part.get_prefix_output() is unknown_output and (self.output_behavior == "moore" or succ_part.predecessor is red):
                                 succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym)
                     # add pairs
                     if partition_transition is not None:

From 7021b902729398be7a40580a4d1b5e1ebf80406e Mon Sep 17 00:00:00 2001
From: Edi Muskardin <edi.muskardin@silicon-austria.com>
Date: Wed, 12 Feb 2025 10:27:54 +0100
Subject: [PATCH 17/23] expose EDSM

---
 aalpy/learning_algs/__init__.py               |  3 +-
 .../general_passive/GsmAlgorithms.py          | 62 +++++++++++++++++++
 aalpy/utils/HelperFunctions.py                | 23 +++++++
 3 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100644 aalpy/learning_algs/general_passive/GsmAlgorithms.py

diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py
index 81892586e52..5041af0bf31 100644
--- a/aalpy/learning_algs/__init__.py
+++ b/aalpy/learning_algs/__init__.py
@@ -10,4 +10,5 @@
 from .stochastic_passive.ActiveAleriga import run_active_Alergia
 from .deterministic_passive.RPNI import run_RPNI, run_PAPNI
 from .deterministic_passive.active_RPNI import run_active_RPNI
-from .general_passive.GeneralizedStateMerging import run_GSM
\ No newline at end of file
+from .general_passive.GeneralizedStateMerging import run_GSM
+from .general_passive.GsmAlgorithms import run_EDSM
\ No newline at end of file
diff --git a/aalpy/learning_algs/general_passive/GsmAlgorithms.py b/aalpy/learning_algs/general_passive/GsmAlgorithms.py
new file mode 100644
index 00000000000..f43c2c4dff9
--- /dev/null
+++ b/aalpy/learning_algs/general_passive/GsmAlgorithms.py
@@ -0,0 +1,62 @@
+from typing import Dict, Union
+
+from aalpy import DeterministicAutomaton
+from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+from aalpy.learning_algs.general_passive.Node import Node
+from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation
+from aalpy.utils.HelperFunctions import dfa_from_moore
+
+
+def run_EDSM(data, automaton_type, input_completeness=None, print_info=True) -> Union[DeterministicAutomaton, None]:
+    """
+    Run Evidence Driven State Merging.
+
+    Args:
+        data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...]
+        automaton_type: either 'dfa', 'mealy', 'moore'. Note that for 'mealy' machine learning, data has to be prefix-closed.
+        input_completeness: either None, 'sink_state', or 'self_loop'. If None, learned model could be input incomplete,
+        sink_state will lead all undefined inputs form some state to the sink state, whereas self_loop will simply create
+        a self loop. In case of Mealy learning output of the added transition will be 'epsilon'.
+        print_info: print learning progress and runtime information
+
+    Returns:
+
+        Model conforming to the data, or None if data is non-deterministic.
+
+    """
+    assert automaton_type in {'dfa', 'mealy', 'moore'}
+    assert input_completeness in {None, 'self_loop', 'sink_state'}
+
+    def EDSM_score(part: Dict[Node, Node]):
+        nr_partitions = len(set(part.values()))
+        nr_merged = len(part)
+        return nr_merged - nr_partitions
+
+    score = ScoreCalculation(score_function=EDSM_score)
+
+    internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type
+
+    if print_info:
+        print(f'Running EDSM.')
+
+    learned_model = run_GSM(data, output_behavior=internal_automaton_type,
+                            transition_behavior="deterministic",
+                            score_calc=score)
+
+    if print_info:
+        print(f'EDSM learned {learned_model.size} state automaton.')
+
+    if automaton_type == 'dfa':
+        learned_model = dfa_from_moore(learned_model)
+
+    if not learned_model.is_input_complete():
+        if not input_completeness:
+            if print_info:
+                print('Warning: Learned Model is not input complete (inputs not defined for all states). '
+                      'Consider calling .make_input_complete()')
+        else:
+            if print_info:
+                print(f'Learned model was not input complete. Adapting it with {input_completeness} transitions.')
+            learned_model.make_input_complete(input_completeness)
+
+    return learned_model
diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py
index 76ac07c8727..c38e0b175ba 100644
--- a/aalpy/utils/HelperFunctions.py
+++ b/aalpy/utils/HelperFunctions.py
@@ -415,3 +415,26 @@ def product_with_possible_empty_iterable(*iterables, repeat=1):
     """
     non_empty_iterables = [it for it in iterables if it]
     return product(*non_empty_iterables, repeat=repeat)
+
+
+def dfa_from_moore(moore_model):
+    from aalpy.automata import Dfa, DfaState
+
+    dfa_state_map = dict()
+    # define states
+    for moore_state in moore_model.states:
+        if moore_state.output not in {True, False, None}:
+            raise ValueError('Cannot convert Moore model with unrestricted output domain to DFA. '
+                             f'Output domain should be {True, False, None}. Problematic output: {moore_state.output}'
+                             )
+
+        is_accepting = moore_state.output if moore_state.output is not None else False
+        dfa_state_map[moore_state.state_id] = DfaState(moore_state.state_id, is_accepting)
+
+    # define transitions
+    for moore_state in moore_model.states:
+        for i, reached_state in moore_state.transitions.items():
+            dfa_state_map[moore_state.state_id].transitions[i] = dfa_state_map[reached_state.state_id]
+
+    initial_state = dfa_state_map[moore_model.initial_state.state_id]
+    return Dfa(initial_state, list(dfa_state_map.values()))

From d53b25524f274f160e08af8854cf0a53435d954f Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 11:10:39 +0100
Subject: [PATCH 18/23] extracted common method for resolving unknown output in
 prefix

---
 .../general_passive/GeneralizedStateMerging.py           | 8 ++++----
 aalpy/learning_algs/general_passive/Node.py              | 9 ++++++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index 2113da3883e..bb645801c6c 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -255,8 +255,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
         blue_parent.transitions[blue_in_sym][blue_out_sym].target = red
 
         partition = update_partition(red, None)
-        if partition.get_prefix_output() is unknown_output and self.output_behavior == "moore":
-            partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym)
+        if self.output_behavior == "moore":
+            partition.resolve_unknown_prefix_output(blue_out_sym)
 
         # loop over implied merges
         q: deque[Tuple[Node, Node]] = deque([(red, blue)])
@@ -285,8 +285,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
                             partition_transitions[out_sym] = partition_transition
                             # re-hook access pair
                             succ_part = update_partition(partition_transition.target, None)
-                            if succ_part.get_prefix_output() is unknown_output and (self.output_behavior == "moore" or succ_part.predecessor is red):
-                                succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym)
+                            if self.output_behavior == "moore" or succ_part.predecessor is red:
+                                succ_part.resolve_unknown_prefix_output(out_sym)
                     # add pairs
                     if partition_transition is not None:
                         q.append((partition_transition.target, blue_transition.target))
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 2df264fc3f6..905e025a206 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -175,6 +175,10 @@ def get_prefix_output(self):
     def get_prefix_input(self):
         return self.prefix_access_pair[0]
 
+    def resolve_unknown_prefix_output(self, value):
+        if self.get_prefix_output() is unknown_output:
+            self.prefix_access_pair = (self.get_prefix_input(), value)
+
     def get_prefix(self, include_output=True):
         node = self
         prefix = []
@@ -417,13 +421,12 @@ def add_example(self, example: IOExample):
             curr_node = node
 
         # set last output
-        curr_node.prefix_access_pair = (curr_node.get_prefix_input(), output)
+        curr_node.resolve_unknown_prefix_output(output)
         pred = curr_node.predecessor
         if pred:
             transitions = pred.transitions[in_sym]
             if unknown_output in transitions:
-                transitions[output] = transitions[unknown_output]
-                del transitions[unknown_output]
+                transitions[output] = transitions.pop(unknown_output)
             if output not in transitions:
                 raise ValueError("nondeterminism encountered for GSM with examples. not supported")
 

From a251f76100b81b7c97193e0a8ed3200afaa3ce09 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 11:15:17 +0100
Subject: [PATCH 19/23] simplified instrumentation class

---
 .../general_passive/Instrumentation.py        | 22 ++-----------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Instrumentation.py b/aalpy/learning_algs/general_passive/Instrumentation.py
index edc91c6fc79..50112ad8550 100644
--- a/aalpy/learning_algs/general_passive/Instrumentation.py
+++ b/aalpy/learning_algs/general_passive/Instrumentation.py
@@ -1,5 +1,4 @@
 import time
-from functools import wraps
 from typing import Dict, Optional
 
 from aalpy.learning_algs.general_passive.GeneralizedStateMerging import Instrumentation, Partitioning, \
@@ -8,19 +7,6 @@
 
 
 class ProgressReport(Instrumentation):
-    @staticmethod
-    def min_lvl(lvl):
-        def decorator(fn):
-            @wraps(fn)
-            def wrapper(this, *args, **kw):
-                if this.lvl < lvl:
-                    return
-                fn(this, *args, **kw)
-
-            return wrapper
-
-        return decorator
-
     def __init__(self, lvl):
         super().__init__()
         self.lvl = lvl
@@ -45,10 +31,9 @@ def reset(self, gsm: GeneralizedStateMerging):
 
         self.previous_time = time.time()
 
-    @min_lvl(1)
     def pta_construction_done(self, root):
         print(f'PTA Construction Time: {round(time.time() - self.previous_time, 2)}')
-        if self.lvl != 1:
+        if 1 < self.lvl:
             states = root.get_all_nodes()
             leafs = [state for state in states if len(state.transitions.keys()) == 0]
             depth = [state.get_prefix_length() for state in leafs]
@@ -60,24 +45,21 @@ def pta_construction_done(self, root):
     def print_status(self):
         reset_char = "\33[2K\r"
         print_str = reset_char + f'Current automaton size: {self.nr_red_states}'
-        if self.lvl != 1 and not self.gsm.compatibility_on_futures:
+        if 1 < self.lvl and not self.gsm.compatibility_on_futures:
             print_str += f' Merged: {self.nr_merged_states_total} Remaining: {self.pta_size - self.nr_red_states - self.nr_merged_states_total}'
         print(print_str, end="")
 
-    @min_lvl(1)
     def log_promote(self, node: Node):
         self.log.append(["promote", (node.get_prefix(),)])
         self.nr_red_states += 1
         self.print_status()
 
-    @min_lvl(1)
     def log_merge(self, part: Partitioning):
         self.log.append(["merge", (part.red.get_prefix(), part.blue.get_prefix())])
         self.nr_merged_states_total += len(part.full_mapping) - len(part.red_mapping)
         self.nr_merged_states += 1
         self.print_status()
 
-    @min_lvl(1)
     def learning_done(self, root, red_states):
         print(f'\nLearning Time: {round(time.time() - self.previous_time, 2)}')
         print(f'Learned {len(red_states)} state automaton via {self.nr_merged_states} merges.')

From 8c87a61ae6b86ba544aaf875435eceacdf55e17c Mon Sep 17 00:00:00 2001
From: Edi Muskardin <edi.muskardin@silicon-austria.com>
Date: Wed, 12 Feb 2025 12:05:49 +0100
Subject: [PATCH 20/23] make sure that Unknown inputs are treated as None

---
 .../general_passive/GeneralizedStateMerging.py           | 3 +--
 aalpy/learning_algs/general_passive/GsmAlgorithms.py     | 9 ++-------
 aalpy/learning_algs/general_passive/Node.py              | 2 +-
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index bb645801c6c..c2319f10259 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -1,7 +1,6 @@
 import functools
-from bisect import insort
-from typing import Dict, Tuple, Callable, List, Optional
 from collections import deque
+from typing import Dict, Tuple, Callable, List, Optional
 
 from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \
     OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output
diff --git a/aalpy/learning_algs/general_passive/GsmAlgorithms.py b/aalpy/learning_algs/general_passive/GsmAlgorithms.py
index f43c2c4dff9..e77bf03eed0 100644
--- a/aalpy/learning_algs/general_passive/GsmAlgorithms.py
+++ b/aalpy/learning_algs/general_passive/GsmAlgorithms.py
@@ -2,6 +2,7 @@
 
 from aalpy import DeterministicAutomaton
 from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
+from aalpy.learning_algs.general_passive.Instrumentation import ProgressReport
 from aalpy.learning_algs.general_passive.Node import Node
 from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation
 from aalpy.utils.HelperFunctions import dfa_from_moore
@@ -36,15 +37,9 @@ def EDSM_score(part: Dict[Node, Node]):
 
     internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type
 
-    if print_info:
-        print(f'Running EDSM.')
-
     learned_model = run_GSM(data, output_behavior=internal_automaton_type,
                             transition_behavior="deterministic",
-                            score_calc=score)
-
-    if print_info:
-        print(f'EDSM learned {learned_model.size} state automaton.')
+                            score_calc=score, instrumentation=ProgressReport(2))
 
     if automaton_type == 'dfa':
         learned_model = dfa_from_moore(learned_model)
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 905e025a206..8e3a3066d2c 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -41,7 +41,7 @@ def __repr__(self):
         return str(self.value)
 
 
-unknown_output = SpecialValue("Output Unknown")
+unknown_output = None #SpecialValue("Output Unknown")
 
 
 def generate_values(base: list, step: Callable, backing_set=True):

From 53f80334ed2b0e1b39eadf9aa45234c231a6014b Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 11:45:06 +0100
Subject: [PATCH 21/23] additional checks for data format (trees, reject
 examples + non-det)

---
 .../general_passive/GeneralizedStateMerging.py    | 15 ++++++++-------
 aalpy/learning_algs/general_passive/Node.py       | 12 ++++++++----
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
index c2319f10259..197dfa50bb6 100644
--- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
+++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
@@ -3,7 +3,7 @@
 from typing import Dict, Tuple, Callable, List, Optional
 
 from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \
-    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output
+    OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output, detect_data_format
 from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility
 
 
@@ -93,15 +93,16 @@ def compute_local_compatibility(self, a: Node, b: Node):
 
     # TODO: make more generic by adding the option to use a different algorithm than red blue
     #  for selecting potential merge candidates. Maybe using inheritance with abstract `run`.
-    def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format="auto"):
+    def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format=None):
         if instrumentation is None:
             instrumentation = Instrumentation()
         instrumentation.reset(self)
 
-        if isinstance(data, Node):
-            root = data
-        else:
-            root = Node.createPTA(data, self.output_behavior, data_format)
+        if data_format is None:
+            data_format = detect_data_format(data)
+        if data_format == "examples" and self.transition_behavior != "deterministic":
+            raise ValueError("learning from examples is not possible for nondeterministic systems")
+        root = Node.createPTA(data, self.output_behavior, data_format)
 
         root = self.pta_preprocessing(root)
         instrumentation.pta_construction_done(root)
@@ -312,7 +313,7 @@ def run_GSM(data, *,
             depth_first=False,
             instrumentation=None,
             convert=True,
-            data_format="auto",
+            data_format=None,
             ):
     """
     TODO
diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 8e3a3066d2c..294f23646f8 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -20,7 +20,7 @@
 TransitionBehaviorRange = ["deterministic", "nondeterministic", "stochastic"]
 
 DataFormat = str
-DataFormatRange = ["auto", "traces", "examples"]
+DataFormatRange = ["traces", "examples", "tree"]
 
 IOPair = Tuple[Any, Any]
 IOTrace = Sequence[IOPair]
@@ -92,6 +92,10 @@ def check_data_format(value):
             return value
         raise ValueError("inconsistent data")
 
+    if isinstance(data, Node):
+        if not data.is_tree():
+            raise ValueError("provided automaton is not a tree")
+        return "tree"
     if not isinstance(data, accepted_types):
         raise ValueError("wrong input format. expected tuple or list.")
     if len(data) == 0:
@@ -235,7 +239,7 @@ def generator(state: Node):
 
     def is_tree(self):
         q: List['Node'] = [self]
-        backing_set = set()
+        backing_set = {self}
         while len(q) != 0:
             current = q.pop(0)
             for _, child in current.transition_iterator():
@@ -434,9 +438,9 @@ def add_example(self, example: IOExample):
     def createPTA(data, output_behavior, data_format="auto") -> 'Node':
         if data_format not in DataFormatRange:
             raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}")
-        if data_format == "auto":
-            data_format = detect_data_format(data)
 
+        if data_format == "tree":
+            return data
         root_node = Node((None, unknown_output), None)
         if data_format == "examples":
             for example in data:

From 930e1a82c0a6a04abe07ff0b607e9c55a2eb6767 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 12:23:36 +0100
Subject: [PATCH 22/23] fixed createPTA dataformat default

---
 aalpy/learning_algs/general_passive/Node.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 294f23646f8..7195f867d64 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -435,7 +435,9 @@ def add_example(self, example: IOExample):
                 raise ValueError("nondeterminism encountered for GSM with examples. not supported")
 
     @staticmethod
-    def createPTA(data, output_behavior, data_format="auto") -> 'Node':
+    def createPTA(data, output_behavior, data_format=None) -> 'Node':
+        if data_format is None:
+            data_format = detect_data_format(data)
         if data_format not in DataFormatRange:
             raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}")
 

From fc31caf76a9785b0ab27abe5d6a5fc6878e99137 Mon Sep 17 00:00:00 2001
From: zwergziege <b.v.b@gmx.at>
Date: Wed, 12 Feb 2025 12:38:35 +0100
Subject: [PATCH 23/23] set unknown_output to None

---
 aalpy/learning_algs/general_passive/Node.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py
index 7195f867d64..4809d9ec288 100644
--- a/aalpy/learning_algs/general_passive/Node.py
+++ b/aalpy/learning_algs/general_passive/Node.py
@@ -29,19 +29,7 @@
 StateFunction = Callable[['Node'], str]
 TransitionFunction = Callable[['Node', Any, Any], str]
 
-
-class SpecialValue:
-    def __init__(self, value):
-        self.value = value
-
-    def __str__(self):
-        return str(self.value)
-
-    def __repr__(self):
-        return str(self.value)
-
-
-unknown_output = None #SpecialValue("Output Unknown")
+unknown_output = None # can be set to a special value if required
 
 
 def generate_values(base: list, step: Callable, backing_set=True):