From 9013e96c7facf54cc8069384442b26b6be373cae Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 09:26:20 +0100 Subject: [PATCH 01/23] Improved sampling utils --- aalpy/utils/Sampling.py | 50 +++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/aalpy/utils/Sampling.py b/aalpy/utils/Sampling.py index 14c60e36001..ba49b81ee85 100644 --- a/aalpy/utils/Sampling.py +++ b/aalpy/utils/Sampling.py @@ -1,13 +1,48 @@ +from functools import wraps from random import randint, choices, random from aalpy import MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain from aalpy.base import Automaton, DeterministicAutomaton +def get_io_traces(automaton: Automaton, input_traces: list) -> list: + moore_automata = (MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain) + is_moore = isinstance(automaton, moore_automata) + traces = [] + for input_trace in input_traces: + output_trace = automaton.execute_sequence(automaton.initial_state, input_trace) + trace = list(zip(input_trace, output_trace)) + if is_moore: + trace = [automaton.initial_state.output] + trace + traces.append(trace) + return traces + + +def support_automaton_arg(require_transform): + def decorator(f): + @wraps(f) + def inner(alphabet, *args, include_outputs=False, **kwargs): + automaton = None + if isinstance(alphabet, Automaton): + automaton = alphabet + if require_transform: + alphabet = alphabet.get_input_alphabet() + traces = f(alphabet, *args, **kwargs) + if include_outputs: + if automaton is None: + raise ValueError("automaton must be provided") + traces = get_io_traces(automaton, traces) + return traces + return inner + return decorator + + +@support_automaton_arg(True) def sample_with_length_limits(alphabet, nr_samples, min_len, max_len): return [choices(alphabet, k = randint(min_len, max_len)) for _ in range(nr_samples)] +@support_automaton_arg(True) def sample_with_term_prob(alphabet, nr_samples, term_prob): ret = [] for _ in range(nr_samples): @@ -18,23 +53,10 @@ def sample_with_term_prob(alphabet, nr_samples, term_prob): return ret +@support_automaton_arg(False) def get_complete_sample(automaton: DeterministicAutomaton): alphabet = automaton.get_input_alphabet() automaton.compute_prefixes() char_set = automaton.compute_characterization_set() infixes = [(x,) for x in alphabet] + [tuple()] return [state.prefix + infix + suffix for state in automaton.states for suffix in char_set for infix in infixes] - - -def get_io_traces(automaton: Automaton, input_traces: list) -> list: - moore_automata = (MooreMachine, NDMooreMachine, Mdp, MarkovChain) - is_moore = isinstance(automaton, moore_automata) - - traces = [] - for input_trace in input_traces: - output_trace = automaton.execute_sequence(automaton.initial_state, input_trace) - trace = list(zip(input_trace, output_trace)) - if is_moore: - trace = [automaton.initial_state.output] + trace - traces.append(trace) - return traces From 1139d21d8a5b72f368d930474e234b7a332ecae1 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 10:02:24 +0100 Subject: [PATCH 02/23] Fixed compatibility issue and rectified red_state order --- .../general_passive/GeneralizedStateMerging.py | 17 ++++++++++++----- aalpy/learning_algs/general_passive/Node.py | 5 +++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 2370aed9a16..010675d94b1 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -4,7 +4,7 @@ from collections import deque from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \ - OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator + OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility @@ -70,8 +70,11 @@ def __init__(self, *, self.score_calc: ScoreCalculation = score_calc if node_order is None: - node_order = Node.__lt__ - self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1) + node_order = NodeOrders.Default + if node_order is NodeOrders.NoCompare or node_order is NodeOrders.Default: + self.node_order = node_order + else: + self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1) self.pta_preprocessing = pta_preprocessing or (lambda x: x) self.postprocessing = postprocessing or (lambda x: x) @@ -128,7 +131,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # no blue states left -> done if len(blue_states) == 0: break - blue_states.sort(key=self.node_order) + if self.node_order is not NodeOrders.NoCompare: + blue_states.sort(key=self.node_order) + # red states are always sorted using default order on original prefix + if self.node_order is not NodeOrders.Default: + red_states.sort(key=self.node_order) # loop over blue states promotion = False @@ -158,7 +165,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # no merge candidates for this blue state -> promote if all(part.score is False for part in current_candidates.values()): - insort(red_states, blue_state, key=self.node_order) + red_states.append(blue_state) instrumentation.log_promote(blue_state) promotion = True break diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index be663be7b2e..91f099f19e5 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -1,3 +1,4 @@ +import functools import math import pathlib from functools import total_ordering @@ -402,3 +403,7 @@ def local_log_likelihood_contribution(self): def count(self): return sum(trans.count for _, trans in self.transition_iterator()) + +class NodeOrders: + NoCompare = lambda n: 0 + Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1) \ No newline at end of file From e7868156c3397c79ee84d10a83740f925897d6f0 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 10:31:23 +0100 Subject: [PATCH 03/23] deleted unnecessary definition of eq and hash for Node class --- aalpy/learning_algs/general_passive/Node.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 91f099f19e5..197adadf3a0 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -110,12 +110,6 @@ def __lt__(self, other, compare_length_only=False): except TypeError: return [str(x) for x in own_p] < [str(x) for x in other_p] - def __eq__(self, other): - return self is other # TODO hack, does this lead to problems down the line? - - def __hash__(self): - return id(self) # TODO This is a hack - # TODO implicit prefixes as currently implemented require O(length) time for prefix calculations (e.g. to determine the minimal blue node) # other options would be to have more efficient explicit prefixes such as shared list representations def get_prefix_length(self): From 50850539dc2469745eaa8e63584c8a25b8cb0031 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 10:37:29 +0100 Subject: [PATCH 04/23] fixed car alarm model --- DotModels/car_alarm.dot | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/DotModels/car_alarm.dot b/DotModels/car_alarm.dot index 7cb052e4b35..bc5829d710a 100644 --- a/DotModels/car_alarm.dot +++ b/DotModels/car_alarm.dot @@ -5,22 +5,18 @@ q3_locked_closed [label="A"]; q5_unlocked_closed [label="N"]; q6_unlocked_open [label="N"]; q7_locked_open [label="N"]; -q4_faulty [label="N"]; -q1_locked_closed -> q2_locked_open [label="d:1"]; -q1_locked_closed -> q5_unlocked_closed [label="l:1"]; -q2_locked_open -> q3_locked_closed [label="d:1"]; -q2_locked_open -> q6_unlocked_open [label="l:1"]; -q3_locked_closed -> q2_locked_open [label="d:1"]; -q3_locked_closed -> q5_unlocked_closed [label="l:1"]; -q5_unlocked_closed -> q6_unlocked_open [label="d:1"]; -q5_unlocked_closed -> q1_locked_closed [label="l:1"]; -q6_unlocked_open -> q5_unlocked_closed [label="d:1"]; -q6_unlocked_open -> q7_locked_open [label="l:1"]; -q7_locked_open -> q4_faulty [label="d:1"]; -q7_locked_open -> q6_unlocked_open [label="l:1"]; -q4_faulty -> q2_locked_open [label="d:0.9"]; -q4_faulty -> q7_locked_open [label="d:0.1"]; -q4_faulty -> q5_unlocked_closed [label="l:1"]; +q1_locked_closed -> q2_locked_open [label="d"]; +q1_locked_closed -> q5_unlocked_closed [label="l"]; +q2_locked_open -> q3_locked_closed [label="d"]; +q2_locked_open -> q6_unlocked_open [label="l"]; +q3_locked_closed -> q2_locked_open [label="d"]; +q3_locked_closed -> q5_unlocked_closed [label="l"]; +q5_unlocked_closed -> q6_unlocked_open [label="d"]; +q5_unlocked_closed -> q1_locked_closed [label="l"]; +q6_unlocked_open -> q5_unlocked_closed [label="d"]; +q6_unlocked_open -> q7_locked_open [label="l"]; +q7_locked_open -> q1_locked_closed [label="d"]; +q7_locked_open -> q6_unlocked_open [label="l"]; __start0 [label="", shape=none]; __start0 -> q1_locked_closed [label=""]; } From c6f8a4bd2f7de31a23667dcd710f8996c4ef089a Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 10:55:42 +0100 Subject: [PATCH 05/23] check compatibility with target type on Node.to_automaton() --- aalpy/learning_algs/general_passive/Node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 197adadf3a0..0e3f75e2a94 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -191,7 +191,7 @@ def is_tree(self): return True def to_automaton(self, output_behavior: OutputBehavior, transition_behavior: TransitionBehavior, - check_behavior=False, set_prefix=False) -> Automaton: + check_behavior=True, set_prefix=False) -> Automaton: nodes = self.get_all_nodes() if check_behavior: From 3c50afbfce8be8dcc6177357d84bc28552bb5d27 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 11:01:49 +0100 Subject: [PATCH 06/23] clarified error messages of GSM --- .../learning_algs/general_passive/GeneralizedStateMerging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 010675d94b1..3adc00a1a03 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -54,10 +54,10 @@ def __init__(self, *, depth_first=False): if output_behavior not in OutputBehaviorRange: - raise ValueError(f"invalid output behavior {output_behavior}") + raise ValueError(f"invalid output behavior {output_behavior}. should be in {OutputBehaviorRange}") self.output_behavior: OutputBehavior = output_behavior if transition_behavior not in TransitionBehaviorRange: - raise ValueError(f"invalid transition behavior {transition_behavior}") + raise ValueError(f"invalid transition behavior {transition_behavior}. should be in {TransitionBehaviorRange}") self.transition_behavior: TransitionBehavior = transition_behavior if score_calc is None: From c7bc17189d443f6cac72cc5a3233dd37985f8ed2 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 16:58:19 +0100 Subject: [PATCH 07/23] cosmetics --- .../general_passive/GeneralizedStateMerging.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 3adc00a1a03..9b8218825f0 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -146,7 +146,6 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # calculate partitions resulting from merges with red states if necessary current_candidates: Dict[Node, Partitioning] = dict() perfect_partitioning = None - red_state = None for red_state in red_states: partition = partition_candidates.get((red_state, blue_state)) @@ -156,8 +155,8 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): perfect_partitioning = partition break current_candidates[red_state] = partition - assert red_state is not None + # partition with perfect score found: don't consider anything else if perfect_partitioning: partition_candidates = {(red_state, blue_state): perfect_partitioning} @@ -254,9 +253,9 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: blue_in_sym, blue_out_sym = blue.prefix_access_pair blue_parent.transitions[blue_in_sym][blue_out_sym].target = red + # loop over implied merges q: deque[Tuple[Node, Node]] = deque([(red, blue)]) pop = q.pop if self.depth_first else q.popleft - while len(q) != 0: red, blue = pop() partition = update_partition(red, blue) @@ -265,6 +264,7 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: if self.compute_local_compatibility(partition, blue) is False: return partitioning + # create implied merges for all common successors for in_sym, blue_transitions in blue.transitions.items(): partition_transitions = partition.get_or_create_transitions(in_sym) for out_sym, blue_transition in blue_transitions.items(): From 2f8cab8ab9a6f0df39b3b1d51a76a2b00d7de737 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Mon, 10 Feb 2025 17:01:02 +0100 Subject: [PATCH 08/23] Added support for learning from examples (mealy only) --- .../GeneralizedStateMerging.py | 28 ++++- aalpy/learning_algs/general_passive/Node.py | 103 ++++++++++++++---- 2 files changed, 105 insertions(+), 26 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 9b8218825f0..99de117da78 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -4,7 +4,7 @@ from collections import deque from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \ - OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders + OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility @@ -94,7 +94,7 @@ def compute_local_compatibility(self, a: Node, b: Node): # TODO: make more generic by adding the option to use a different algorithm than red blue # for selecting potential merge candidates. Maybe using inheritance with abstract `run`. - def run(self, data, convert=True, instrumentation: Instrumentation = None): + def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format="auto"): if instrumentation is None: instrumentation = Instrumentation() instrumentation.reset(self) @@ -102,7 +102,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): if isinstance(data, Node): root = data else: - root = Node.createPTA(data, self.output_behavior) + root = Node.createPTA(data, self.output_behavior, data_format) root = self.pta_preprocessing(root) instrumentation.pta_construction_done(root) @@ -182,6 +182,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): best_candidate = max(partition_candidates.values(), key=lambda part: part.score) for real_node, partition_node in best_candidate.red_mapping.items(): real_node.transitions = partition_node.transitions + real_node.prefix_access_pair = partition_node.prefix_access_pair for access_pair, t_info in real_node.transition_iterator(): if t_info.target not in red_states: t_info.target.predecessor = real_node @@ -269,6 +270,20 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: partition_transitions = partition.get_or_create_transitions(in_sym) for out_sym, blue_transition in blue_transitions.items(): partition_transition = partition_transitions.get(out_sym) + # handle unknown output + if partition_transition is None: + if out_sym is unknown_output and len(partition_transitions) != 0: + assert len(partition_transitions) == 1 + partition_transition = list(partition_transitions.values())[0] + if unknown_output in partition_transitions: + partition_transition = partition_transitions.pop(unknown_output) + partition_transitions[out_sym] = partition_transition + # re-hook access pair + succ_part = update_partition(partition_transition.target, None) + succ_pre_part = update_partition(succ_part.predecessor, None) + if self.output_behavior == "moore" or succ_pre_part is partition: + succ_part.prefix_access_pair = (succ_part.prefix_access_pair[0], out_sym) + # add pairs if partition_transition is not None: q.append((partition_transition.target, blue_transition.target)) partition_transition.count += blue_transition.count @@ -294,6 +309,7 @@ def run_GSM(data, *, depth_first=False, instrumentation=None, convert=True, + data_format="auto", ): """ TODO @@ -325,12 +341,14 @@ def run_GSM(data, *, convert: + data_format: + Returns: """ - # instantiate the gsm + # instantiate gsm gsm = GeneralizedStateMerging( output_behavior=output_behavior, transition_behavior=transition_behavior, @@ -345,4 +363,4 @@ def run_GSM(data, *, ) # run the algorithm - return gsm.run(data=data, instrumentation=instrumentation, convert=convert) + return gsm.run(data=data, instrumentation=instrumentation, convert=convert, data_format=data_format) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 0e3f75e2a94..4c2882df914 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -2,7 +2,7 @@ import math import pathlib from functools import total_ordering -from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional +from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional, Sequence import pydot from copy import copy @@ -19,13 +19,18 @@ TransitionBehavior = str TransitionBehaviorRange = ["deterministic", "nondeterministic", "stochastic"] +DataFormat = str +DataFormatRange = ["auto", "traces", "examples"] + IOPair = Tuple[Any, Any] -IOTrace = List[IOPair] -IOExample = Tuple[Iterable[Any], Any] +IOTrace = Sequence[IOPair] +IOExample = Tuple[Sequence[Any], Any] StateFunction = Callable[['Node'], str] TransitionFunction = Callable[['Node', Any, Any], str] +unknown_output = object() + def generate_values(base: list, step: Callable, backing_set=True): if backing_set: @@ -65,6 +70,21 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) -> a_val = a.get(key, default) yield key, a_val, b_val +# TODO maybe reuse this in classic RPNI +def detect_data_format(data): + if not isinstance(data, Sequence): + raise ValueError("wrong input format. expected sequence type.") + for data_point in data: + if len(data_point) != 2: + return "traces" + o1, o2 = data_point + if not isinstance(o1, Sequence): + return "traces" + if not isinstance(o2, Sequence): + return "examples" + if len(data) == 0: + return "traces" + raise ValueError("ambiguous data format. data format needs to be specified explicitly.") # TODO maybe split this for maintainability (and perfomance?) class TransitionInfo: @@ -327,13 +347,10 @@ def node_naming(node: Node): file_ext = 'dot' graph.write(path=str(path) + "." + file_ext, prog=engine, format=format) - def add_data(self, data): - for seq in data: - self.add_trace(seq) - def add_trace(self, data: IOTrace): + def add_trace(self, trace: IOTrace): curr_node: Node = self - for in_sym, out_sym in data: + for in_sym, out_sym in trace: transitions = curr_node.get_or_create_transitions(in_sym) info = transitions.get(out_sym) if info is None: @@ -345,19 +362,59 @@ def add_trace(self, data: IOTrace): node = info.target curr_node = node - def add_example(self, data: IOExample): - # TODO add support for example based algorithms - raise NotImplementedError() + def add_example(self, example: IOExample): + inputs, output = example + curr_node: Node = self + in_sym = None + + # step through inputs and add transitions + for in_sym in inputs: + transitions = curr_node.get_or_create_transitions(in_sym) + t_infos = list(transitions.values()) + if len(t_infos) == 0: + node = Node((in_sym, unknown_output), curr_node) + t_info = TransitionInfo(node, 1, node, 1) + transitions[unknown_output] = t_info + elif len(t_infos) == 1: + t_info = t_infos[0] + t_info.count += 1 + t_info.original_count += 1 + node = t_info.target + else: + # This should never happen + raise ValueError("nondeterminism encountered for GSM with examples. not supported") + curr_node = node + + # set last output + curr_node.prefix_access_pair = (curr_node.prefix_access_pair[0], output) + pred = curr_node.predecessor + if pred: + transitions = pred.transitions[in_sym] + if unknown_output in transitions: + transitions[output] = transitions[unknown_output] + del transitions[unknown_output] + if output not in transitions: + raise ValueError("nondeterminism encountered for GSM with examples. not supported") + @staticmethod - def createPTA(data, output_behavior) -> 'Node': - if output_behavior == "moore": - initial_output = data[0][0] - data = (d[1:] for d in data) - else: - initial_output = None - root_node = Node((None, initial_output), None) - root_node.add_data(data) + def createPTA(data, output_behavior, data_format="auto") -> 'Node': + if data_format not in DataFormatRange: + raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}") + if data_format == "auto": + data_format = detect_data_format(data) + + root_node = Node((None, None), None) + if data_format == "examples": + for example in data: + root_node.add_example(example) + if data_format == "traces": + if output_behavior == "moore": + initial_output = data[0][0] + root_node.prefix_access_pair = (None, initial_output) + data = (d[1:] for d in data) + for trace in data: + root_node.add_trace(trace) return root_node def is_locally_deterministic(self): @@ -367,8 +424,12 @@ def is_deterministic(self): return all(node.is_locally_deterministic() for node in self.get_all_nodes()) def deterministic_compatible(self, other: 'Node'): - common_keys = filter(lambda key: key in self.transitions.keys(), other.transitions.keys()) - return all(list(self.transitions[key].keys()) == list(other.transitions[key].keys()) for key in common_keys) + for _, trans_self, trans_other in intersection_iterator(self.transitions, other.transitions): + if unknown_output in trans_self or unknown_output in trans_other: + continue + if list(trans_self.keys()) != list(trans_other.keys()): + return False + return True def is_moore(self): output_dict = dict() From a1475dd57034d6cf1c278a8afd37fb59c16753ef Mon Sep 17 00:00:00 2001 From: zwergziege Date: Tue, 11 Feb 2025 11:40:50 +0100 Subject: [PATCH 09/23] fixed wrong initial output + better messages --- aalpy/learning_algs/general_passive/Node.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 4c2882df914..1fe540eb461 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -29,8 +29,17 @@ StateFunction = Callable[['Node'], str] TransitionFunction = Callable[['Node', Any, Any], str] -unknown_output = object() +class SpecialValue: + def __init__(self, value): + self.value = value + def __str__(self): + return str(self.value) + + def __repr__(self): + return str(self.value) + +unknown_output = SpecialValue("Output Unknown") def generate_values(base: list, step: Callable, backing_set=True): if backing_set: @@ -404,7 +413,7 @@ def createPTA(data, output_behavior, data_format="auto") -> 'Node': if data_format == "auto": data_format = detect_data_format(data) - root_node = Node((None, None), None) + root_node = Node((None, unknown_output), None) if data_format == "examples": for example in data: root_node.add_example(example) From 691e0ae0d6a9c84315c6ebff0bae428a952610ff Mon Sep 17 00:00:00 2001 From: zwergziege Date: Tue, 11 Feb 2025 13:22:12 +0100 Subject: [PATCH 10/23] adapt moore checks for unknown output --- aalpy/learning_algs/general_passive/Node.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 1fe540eb461..3cb5fb7f9ec 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -441,18 +441,17 @@ def deterministic_compatible(self, other: 'Node'): return True def is_moore(self): - output_dict = dict() for node in self.get_all_nodes(): for (in_sym, out_sym), transition in node.transition_iterator(): - child = transition.target - if child in output_dict.keys() and output_dict[child] != out_sym: + child_output = transition.target.get_prefix_output() + if out_sym is not unknown_output and child_output != out_sym: return False - else: - output_dict[child] = out_sym return True def moore_compatible(self, other: 'Node'): - return self.get_prefix_output() == other.get_prefix_output() + so = self.get_prefix_output() + oo = other.get_prefix_output() + return so == oo or so is unknown_output or oo is unknown_output def local_log_likelihood_contribution(self): llc = 0 From 2bdf572c93815bfcf87886bc11f520cf27b28e77 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Tue, 11 Feb 2025 14:01:01 +0100 Subject: [PATCH 11/23] fix corner case for unknown outputs with moore behavior --- .../general_passive/GeneralizedStateMerging.py | 11 +++++++---- aalpy/learning_algs/general_passive/Node.py | 5 ++++- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 99de117da78..a339198b6bc 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -186,7 +186,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation=None, data_fo for access_pair, t_info in real_node.transition_iterator(): if t_info.target not in red_states: t_info.target.predecessor = real_node - t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required + # t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required instrumentation.log_merge(best_candidate) # FUTURE: optimizations for compatibility tests where merges can be orthogonal # FUTURE: caching for aggregating compatibility tests @@ -254,6 +254,10 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: blue_in_sym, blue_out_sym = blue.prefix_access_pair blue_parent.transitions[blue_in_sym][blue_out_sym].target = red + if blue_out_sym is not unknown_output and self.output_behavior == "moore": + partition = update_partition(red, None) + partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym) + # loop over implied merges q: deque[Tuple[Node, Node]] = deque([(red, blue)]) pop = q.pop if self.depth_first else q.popleft @@ -280,9 +284,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: partition_transitions[out_sym] = partition_transition # re-hook access pair succ_part = update_partition(partition_transition.target, None) - succ_pre_part = update_partition(succ_part.predecessor, None) - if self.output_behavior == "moore" or succ_pre_part is partition: - succ_part.prefix_access_pair = (succ_part.prefix_access_pair[0], out_sym) + if self.output_behavior == "moore" or succ_part.predecessor is red: + succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym) # add pairs if partition_transition is not None: q.append((partition_transition.target, blue_transition.target)) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 3cb5fb7f9ec..19c7eda3547 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -152,6 +152,9 @@ def get_prefix_length(self): def get_prefix_output(self): return self.prefix_access_pair[1] + def get_prefix_input(self): + return self.prefix_access_pair[0] + def get_prefix(self, include_output=True): node = self prefix = [] @@ -395,7 +398,7 @@ def add_example(self, example: IOExample): curr_node = node # set last output - curr_node.prefix_access_pair = (curr_node.prefix_access_pair[0], output) + curr_node.prefix_access_pair = (curr_node.get_prefix_input(), output) pred = curr_node.predecessor if pred: transitions = pred.transitions[in_sym] From d72cd825a87f793e8d9ce933c7b38bc56eff1e31 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Tue, 11 Feb 2025 16:02:03 +0100 Subject: [PATCH 12/23] update auto-format detection --- aalpy/learning_algs/general_passive/Node.py | 34 +++++++++++++-------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 19c7eda3547..23698570011 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -29,6 +29,7 @@ StateFunction = Callable[['Node'], str] TransitionFunction = Callable[['Node', Any, Any], str] + class SpecialValue: def __init__(self, value): self.value = value @@ -39,8 +40,10 @@ def __str__(self): def __repr__(self): return str(self.value) + unknown_output = SpecialValue("Output Unknown") + def generate_values(base: list, step: Callable, backing_set=True): if backing_set: result = list(base) @@ -79,21 +82,29 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) -> a_val = a.get(key, default) yield key, a_val, b_val + # TODO maybe reuse this in classic RPNI def detect_data_format(data): if not isinstance(data, Sequence): raise ValueError("wrong input format. expected sequence type.") - for data_point in data: - if len(data_point) != 2: - return "traces" - o1, o2 = data_point - if not isinstance(o1, Sequence): - return "traces" - if not isinstance(o2, Sequence): - return "examples" if len(data) == 0: return "traces" - raise ValueError("ambiguous data format. data format needs to be specified explicitly.") + + detected_format = None + for data_point in data: + + if len(data_point) == 2 and isinstance(data_point[0], Sequence): + data_point_format = 'examples' + else: + data_point_format = 'traces' + + if detected_format is None: + detected_format = data_point_format + elif data_point_format != detected_format: + raise ValueError("ambiguous data format. data format needs to be specified explicitly.") + + return detected_format + # TODO maybe split this for maintainability (and perfomance?) class TransitionInfo: @@ -359,7 +370,6 @@ def node_naming(node: Node): file_ext = 'dot' graph.write(path=str(path) + "." + file_ext, prog=engine, format=format) - def add_trace(self, trace: IOTrace): curr_node: Node = self for in_sym, out_sym in trace: @@ -408,7 +418,6 @@ def add_example(self, example: IOExample): if output not in transitions: raise ValueError("nondeterminism encountered for GSM with examples. not supported") - @staticmethod def createPTA(data, output_behavior, data_format="auto") -> 'Node': if data_format not in DataFormatRange: @@ -470,6 +479,7 @@ def local_log_likelihood_contribution(self): def count(self): return sum(trans.count for _, trans in self.transition_iterator()) + class NodeOrders: NoCompare = lambda n: 0 - Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1) \ No newline at end of file + Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1) From 3e5b8b0b3164aa228b25245e4f9f3f3e830ab42c Mon Sep 17 00:00:00 2001 From: zwergziege Date: Tue, 11 Feb 2025 17:24:34 +0100 Subject: [PATCH 13/23] fixed GSM bug due to unknown outputs --- .../learning_algs/general_passive/GeneralizedStateMerging.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index a339198b6bc..7a6767b81f0 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -254,8 +254,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: blue_in_sym, blue_out_sym = blue.prefix_access_pair blue_parent.transitions[blue_in_sym][blue_out_sym].target = red - if blue_out_sym is not unknown_output and self.output_behavior == "moore": - partition = update_partition(red, None) + partition = update_partition(red, None) + if partition.get_prefix_output() is unknown_output and self.output_behavior == "moore": partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym) # loop over implied merges From 4c41e6b0cde7a089b7e1f078fbc2948376123728 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 08:40:55 +0100 Subject: [PATCH 14/23] update auto-format detection --- aalpy/learning_algs/general_passive/Node.py | 43 +++++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 23698570011..2df264fc3f6 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -84,26 +84,35 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) -> # TODO maybe reuse this in classic RPNI -def detect_data_format(data): - if not isinstance(data, Sequence): - raise ValueError("wrong input format. expected sequence type.") +def detect_data_format(data, check_consistency=True): + accepted_types = (Tuple, List) + data_format = None + def check_data_format(value): + if data_format is None or data_format == value: + return value + raise ValueError("inconsistent data") + + if not isinstance(data, accepted_types): + raise ValueError("wrong input format. expected tuple or list.") if len(data) == 0: return "traces" - - detected_format = None for data_point in data: - - if len(data_point) == 2 and isinstance(data_point[0], Sequence): - data_point_format = 'examples' - else: - data_point_format = 'traces' - - if detected_format is None: - detected_format = data_point_format - elif data_point_format != detected_format: - raise ValueError("ambiguous data format. data format needs to be specified explicitly.") - - return detected_format + if len(data_point) != 2: + data_format = check_data_format("traces") + if not check_consistency: + return data_format + o1, o2 = data_point + if not isinstance(o1, accepted_types): + data_format = check_data_format("traces") + if not check_consistency: + return data_format + if not isinstance(o2, accepted_types): + data_format = check_data_format("examples") + if not check_consistency: + return data_format + if data_format is None: + raise ValueError("ambiguous data format. data format needs to be specified explicitly.") + return data_format # TODO maybe split this for maintainability (and perfomance?) From 47f2084ff63d3cf9e7820df7a9f406c3f0d52d52 Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 12 Feb 2025 09:37:13 +0100 Subject: [PATCH 15/23] update random data generation --- aalpy/utils/HelperFunctions.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py index 54e3cd2ecdb..76ac07c8727 100644 --- a/aalpy/utils/HelperFunctions.py +++ b/aalpy/utils/HelperFunctions.py @@ -301,7 +301,7 @@ def convert_i_o_traces_for_RPNI(sequences, automaton_type="mealy"): for s in sequences: if automaton_type in ["moore", "dfa"]: - rpni_sequences.add((tuple(),s[0])) + rpni_sequences.add((tuple(), s[0])) s = s[1:] for i in range(len(s)): inputs = tuple([io[0] for io in s[:i + 1]]) @@ -349,11 +349,14 @@ def is_balanced(input_seq, vpa_alphabet): return counter == 0 -def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16): +def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16, + sequance_type='single_output'): + assert sequance_type in {'io_trace', 'single_output'} alphabet = model.get_input_alphabet() - input_output_sequances = [] - while len(input_output_sequances) < num_sequances: + dataset = [] + + while len(dataset) < num_sequances: sequance = [] for _ in range(random.randint(min_seq_len, max_seq_len)): sequance.append(random.choice(alphabet)) @@ -361,9 +364,12 @@ def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_ model.reset_to_initial() outputs = model.execute_sequence(model.initial_state, sequance) - input_output_sequances.append(list(zip(sequance, outputs))) + if sequance_type == 'io_trace': + dataset.append(list(zip(sequance, outputs))) + else: + dataset.append((sequance, outputs[-1])) - return input_output_sequances + return dataset def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16, max_attempts=None): @@ -396,7 +402,7 @@ def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16, sequance += (chosen_input,) output = vpa.step(chosen_input) - #if vpa.is_balanced(sequance): + # if vpa.is_balanced(sequance): data_set.add((sequance, output)) data_set = list(data_set) From 85e27d2404c54a67ebad5c3703ddc2f9bb0c7336 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 09:54:05 +0100 Subject: [PATCH 16/23] one more fix for GSM + Examples --- aalpy/learning_algs/general_passive/GeneralizedStateMerging.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 7a6767b81f0..2113da3883e 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -280,11 +280,12 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: assert len(partition_transitions) == 1 partition_transition = list(partition_transitions.values())[0] if unknown_output in partition_transitions: + assert len(partition_transitions) == 1 partition_transition = partition_transitions.pop(unknown_output) partition_transitions[out_sym] = partition_transition # re-hook access pair succ_part = update_partition(partition_transition.target, None) - if self.output_behavior == "moore" or succ_part.predecessor is red: + if succ_part.get_prefix_output() is unknown_output and (self.output_behavior == "moore" or succ_part.predecessor is red): succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym) # add pairs if partition_transition is not None: From 7021b902729398be7a40580a4d1b5e1ebf80406e Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 12 Feb 2025 10:27:54 +0100 Subject: [PATCH 17/23] expose EDSM --- aalpy/learning_algs/__init__.py | 3 +- .../general_passive/GsmAlgorithms.py | 62 +++++++++++++++++++ aalpy/utils/HelperFunctions.py | 23 +++++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 aalpy/learning_algs/general_passive/GsmAlgorithms.py diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py index 81892586e52..5041af0bf31 100644 --- a/aalpy/learning_algs/__init__.py +++ b/aalpy/learning_algs/__init__.py @@ -10,4 +10,5 @@ from .stochastic_passive.ActiveAleriga import run_active_Alergia from .deterministic_passive.RPNI import run_RPNI, run_PAPNI from .deterministic_passive.active_RPNI import run_active_RPNI -from .general_passive.GeneralizedStateMerging import run_GSM \ No newline at end of file +from .general_passive.GeneralizedStateMerging import run_GSM +from .general_passive.GsmAlgorithms import run_EDSM \ No newline at end of file diff --git a/aalpy/learning_algs/general_passive/GsmAlgorithms.py b/aalpy/learning_algs/general_passive/GsmAlgorithms.py new file mode 100644 index 00000000000..f43c2c4dff9 --- /dev/null +++ b/aalpy/learning_algs/general_passive/GsmAlgorithms.py @@ -0,0 +1,62 @@ +from typing import Dict, Union + +from aalpy import DeterministicAutomaton +from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM +from aalpy.learning_algs.general_passive.Node import Node +from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation +from aalpy.utils.HelperFunctions import dfa_from_moore + + +def run_EDSM(data, automaton_type, input_completeness=None, print_info=True) -> Union[DeterministicAutomaton, None]: + """ + Run Evidence Driven State Merging. + + Args: + data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...] + automaton_type: either 'dfa', 'mealy', 'moore'. Note that for 'mealy' machine learning, data has to be prefix-closed. + input_completeness: either None, 'sink_state', or 'self_loop'. If None, learned model could be input incomplete, + sink_state will lead all undefined inputs form some state to the sink state, whereas self_loop will simply create + a self loop. In case of Mealy learning output of the added transition will be 'epsilon'. + print_info: print learning progress and runtime information + + Returns: + + Model conforming to the data, or None if data is non-deterministic. + + """ + assert automaton_type in {'dfa', 'mealy', 'moore'} + assert input_completeness in {None, 'self_loop', 'sink_state'} + + def EDSM_score(part: Dict[Node, Node]): + nr_partitions = len(set(part.values())) + nr_merged = len(part) + return nr_merged - nr_partitions + + score = ScoreCalculation(score_function=EDSM_score) + + internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type + + if print_info: + print(f'Running EDSM.') + + learned_model = run_GSM(data, output_behavior=internal_automaton_type, + transition_behavior="deterministic", + score_calc=score) + + if print_info: + print(f'EDSM learned {learned_model.size} state automaton.') + + if automaton_type == 'dfa': + learned_model = dfa_from_moore(learned_model) + + if not learned_model.is_input_complete(): + if not input_completeness: + if print_info: + print('Warning: Learned Model is not input complete (inputs not defined for all states). ' + 'Consider calling .make_input_complete()') + else: + if print_info: + print(f'Learned model was not input complete. Adapting it with {input_completeness} transitions.') + learned_model.make_input_complete(input_completeness) + + return learned_model diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py index 76ac07c8727..c38e0b175ba 100644 --- a/aalpy/utils/HelperFunctions.py +++ b/aalpy/utils/HelperFunctions.py @@ -415,3 +415,26 @@ def product_with_possible_empty_iterable(*iterables, repeat=1): """ non_empty_iterables = [it for it in iterables if it] return product(*non_empty_iterables, repeat=repeat) + + +def dfa_from_moore(moore_model): + from aalpy.automata import Dfa, DfaState + + dfa_state_map = dict() + # define states + for moore_state in moore_model.states: + if moore_state.output not in {True, False, None}: + raise ValueError('Cannot convert Moore model with unrestricted output domain to DFA. ' + f'Output domain should be {True, False, None}. Problematic output: {moore_state.output}' + ) + + is_accepting = moore_state.output if moore_state.output is not None else False + dfa_state_map[moore_state.state_id] = DfaState(moore_state.state_id, is_accepting) + + # define transitions + for moore_state in moore_model.states: + for i, reached_state in moore_state.transitions.items(): + dfa_state_map[moore_state.state_id].transitions[i] = dfa_state_map[reached_state.state_id] + + initial_state = dfa_state_map[moore_model.initial_state.state_id] + return Dfa(initial_state, list(dfa_state_map.values())) From d53b25524f274f160e08af8854cf0a53435d954f Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 11:10:39 +0100 Subject: [PATCH 18/23] extracted common method for resolving unknown output in prefix --- .../general_passive/GeneralizedStateMerging.py | 8 ++++---- aalpy/learning_algs/general_passive/Node.py | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 2113da3883e..bb645801c6c 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -255,8 +255,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: blue_parent.transitions[blue_in_sym][blue_out_sym].target = red partition = update_partition(red, None) - if partition.get_prefix_output() is unknown_output and self.output_behavior == "moore": - partition.prefix_access_pair = (partition.get_prefix_input(), blue_out_sym) + if self.output_behavior == "moore": + partition.resolve_unknown_prefix_output(blue_out_sym) # loop over implied merges q: deque[Tuple[Node, Node]] = deque([(red, blue)]) @@ -285,8 +285,8 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: partition_transitions[out_sym] = partition_transition # re-hook access pair succ_part = update_partition(partition_transition.target, None) - if succ_part.get_prefix_output() is unknown_output and (self.output_behavior == "moore" or succ_part.predecessor is red): - succ_part.prefix_access_pair = (succ_part.get_prefix_input(), out_sym) + if self.output_behavior == "moore" or succ_part.predecessor is red: + succ_part.resolve_unknown_prefix_output(out_sym) # add pairs if partition_transition is not None: q.append((partition_transition.target, blue_transition.target)) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 2df264fc3f6..905e025a206 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -175,6 +175,10 @@ def get_prefix_output(self): def get_prefix_input(self): return self.prefix_access_pair[0] + def resolve_unknown_prefix_output(self, value): + if self.get_prefix_output() is unknown_output: + self.prefix_access_pair = (self.get_prefix_input(), value) + def get_prefix(self, include_output=True): node = self prefix = [] @@ -417,13 +421,12 @@ def add_example(self, example: IOExample): curr_node = node # set last output - curr_node.prefix_access_pair = (curr_node.get_prefix_input(), output) + curr_node.resolve_unknown_prefix_output(output) pred = curr_node.predecessor if pred: transitions = pred.transitions[in_sym] if unknown_output in transitions: - transitions[output] = transitions[unknown_output] - del transitions[unknown_output] + transitions[output] = transitions.pop(unknown_output) if output not in transitions: raise ValueError("nondeterminism encountered for GSM with examples. not supported") From a251f76100b81b7c97193e0a8ed3200afaa3ce09 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 11:15:17 +0100 Subject: [PATCH 19/23] simplified instrumentation class --- .../general_passive/Instrumentation.py | 22 ++----------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Instrumentation.py b/aalpy/learning_algs/general_passive/Instrumentation.py index edc91c6fc79..50112ad8550 100644 --- a/aalpy/learning_algs/general_passive/Instrumentation.py +++ b/aalpy/learning_algs/general_passive/Instrumentation.py @@ -1,5 +1,4 @@ import time -from functools import wraps from typing import Dict, Optional from aalpy.learning_algs.general_passive.GeneralizedStateMerging import Instrumentation, Partitioning, \ @@ -8,19 +7,6 @@ class ProgressReport(Instrumentation): - @staticmethod - def min_lvl(lvl): - def decorator(fn): - @wraps(fn) - def wrapper(this, *args, **kw): - if this.lvl < lvl: - return - fn(this, *args, **kw) - - return wrapper - - return decorator - def __init__(self, lvl): super().__init__() self.lvl = lvl @@ -45,10 +31,9 @@ def reset(self, gsm: GeneralizedStateMerging): self.previous_time = time.time() - @min_lvl(1) def pta_construction_done(self, root): print(f'PTA Construction Time: {round(time.time() - self.previous_time, 2)}') - if self.lvl != 1: + if 1 < self.lvl: states = root.get_all_nodes() leafs = [state for state in states if len(state.transitions.keys()) == 0] depth = [state.get_prefix_length() for state in leafs] @@ -60,24 +45,21 @@ def pta_construction_done(self, root): def print_status(self): reset_char = "\33[2K\r" print_str = reset_char + f'Current automaton size: {self.nr_red_states}' - if self.lvl != 1 and not self.gsm.compatibility_on_futures: + if 1 < self.lvl and not self.gsm.compatibility_on_futures: print_str += f' Merged: {self.nr_merged_states_total} Remaining: {self.pta_size - self.nr_red_states - self.nr_merged_states_total}' print(print_str, end="") - @min_lvl(1) def log_promote(self, node: Node): self.log.append(["promote", (node.get_prefix(),)]) self.nr_red_states += 1 self.print_status() - @min_lvl(1) def log_merge(self, part: Partitioning): self.log.append(["merge", (part.red.get_prefix(), part.blue.get_prefix())]) self.nr_merged_states_total += len(part.full_mapping) - len(part.red_mapping) self.nr_merged_states += 1 self.print_status() - @min_lvl(1) def learning_done(self, root, red_states): print(f'\nLearning Time: {round(time.time() - self.previous_time, 2)}') print(f'Learned {len(red_states)} state automaton via {self.nr_merged_states} merges.') From 8c87a61ae6b86ba544aaf875435eceacdf55e17c Mon Sep 17 00:00:00 2001 From: Edi Muskardin Date: Wed, 12 Feb 2025 12:05:49 +0100 Subject: [PATCH 20/23] make sure that Unknown inputs are treated as None --- .../general_passive/GeneralizedStateMerging.py | 3 +-- aalpy/learning_algs/general_passive/GsmAlgorithms.py | 9 ++------- aalpy/learning_algs/general_passive/Node.py | 2 +- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index bb645801c6c..c2319f10259 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -1,7 +1,6 @@ import functools -from bisect import insort -from typing import Dict, Tuple, Callable, List, Optional from collections import deque +from typing import Dict, Tuple, Callable, List, Optional from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \ OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output diff --git a/aalpy/learning_algs/general_passive/GsmAlgorithms.py b/aalpy/learning_algs/general_passive/GsmAlgorithms.py index f43c2c4dff9..e77bf03eed0 100644 --- a/aalpy/learning_algs/general_passive/GsmAlgorithms.py +++ b/aalpy/learning_algs/general_passive/GsmAlgorithms.py @@ -2,6 +2,7 @@ from aalpy import DeterministicAutomaton from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM +from aalpy.learning_algs.general_passive.Instrumentation import ProgressReport from aalpy.learning_algs.general_passive.Node import Node from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation from aalpy.utils.HelperFunctions import dfa_from_moore @@ -36,15 +37,9 @@ def EDSM_score(part: Dict[Node, Node]): internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type - if print_info: - print(f'Running EDSM.') - learned_model = run_GSM(data, output_behavior=internal_automaton_type, transition_behavior="deterministic", - score_calc=score) - - if print_info: - print(f'EDSM learned {learned_model.size} state automaton.') + score_calc=score, instrumentation=ProgressReport(2)) if automaton_type == 'dfa': learned_model = dfa_from_moore(learned_model) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 905e025a206..8e3a3066d2c 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -41,7 +41,7 @@ def __repr__(self): return str(self.value) -unknown_output = SpecialValue("Output Unknown") +unknown_output = None #SpecialValue("Output Unknown") def generate_values(base: list, step: Callable, backing_set=True): From 53f80334ed2b0e1b39eadf9aa45234c231a6014b Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 11:45:06 +0100 Subject: [PATCH 21/23] additional checks for data format (trees, reject examples + non-det) --- .../general_passive/GeneralizedStateMerging.py | 15 ++++++++------- aalpy/learning_algs/general_passive/Node.py | 12 ++++++++---- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index c2319f10259..197dfa50bb6 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -3,7 +3,7 @@ from typing import Dict, Tuple, Callable, List, Optional from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \ - OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output + OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output, detect_data_format from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility @@ -93,15 +93,16 @@ def compute_local_compatibility(self, a: Node, b: Node): # TODO: make more generic by adding the option to use a different algorithm than red blue # for selecting potential merge candidates. Maybe using inheritance with abstract `run`. - def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format="auto"): + def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format=None): if instrumentation is None: instrumentation = Instrumentation() instrumentation.reset(self) - if isinstance(data, Node): - root = data - else: - root = Node.createPTA(data, self.output_behavior, data_format) + if data_format is None: + data_format = detect_data_format(data) + if data_format == "examples" and self.transition_behavior != "deterministic": + raise ValueError("learning from examples is not possible for nondeterministic systems") + root = Node.createPTA(data, self.output_behavior, data_format) root = self.pta_preprocessing(root) instrumentation.pta_construction_done(root) @@ -312,7 +313,7 @@ def run_GSM(data, *, depth_first=False, instrumentation=None, convert=True, - data_format="auto", + data_format=None, ): """ TODO diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 8e3a3066d2c..294f23646f8 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -20,7 +20,7 @@ TransitionBehaviorRange = ["deterministic", "nondeterministic", "stochastic"] DataFormat = str -DataFormatRange = ["auto", "traces", "examples"] +DataFormatRange = ["traces", "examples", "tree"] IOPair = Tuple[Any, Any] IOTrace = Sequence[IOPair] @@ -92,6 +92,10 @@ def check_data_format(value): return value raise ValueError("inconsistent data") + if isinstance(data, Node): + if not data.is_tree(): + raise ValueError("provided automaton is not a tree") + return "tree" if not isinstance(data, accepted_types): raise ValueError("wrong input format. expected tuple or list.") if len(data) == 0: @@ -235,7 +239,7 @@ def generator(state: Node): def is_tree(self): q: List['Node'] = [self] - backing_set = set() + backing_set = {self} while len(q) != 0: current = q.pop(0) for _, child in current.transition_iterator(): @@ -434,9 +438,9 @@ def add_example(self, example: IOExample): def createPTA(data, output_behavior, data_format="auto") -> 'Node': if data_format not in DataFormatRange: raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}") - if data_format == "auto": - data_format = detect_data_format(data) + if data_format == "tree": + return data root_node = Node((None, unknown_output), None) if data_format == "examples": for example in data: From 930e1a82c0a6a04abe07ff0b607e9c55a2eb6767 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 12:23:36 +0100 Subject: [PATCH 22/23] fixed createPTA dataformat default --- aalpy/learning_algs/general_passive/Node.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 294f23646f8..7195f867d64 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -435,7 +435,9 @@ def add_example(self, example: IOExample): raise ValueError("nondeterminism encountered for GSM with examples. not supported") @staticmethod - def createPTA(data, output_behavior, data_format="auto") -> 'Node': + def createPTA(data, output_behavior, data_format=None) -> 'Node': + if data_format is None: + data_format = detect_data_format(data) if data_format not in DataFormatRange: raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}") From fc31caf76a9785b0ab27abe5d6a5fc6878e99137 Mon Sep 17 00:00:00 2001 From: zwergziege Date: Wed, 12 Feb 2025 12:38:35 +0100 Subject: [PATCH 23/23] set unknown_output to None --- aalpy/learning_algs/general_passive/Node.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index 7195f867d64..4809d9ec288 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -29,19 +29,7 @@ StateFunction = Callable[['Node'], str] TransitionFunction = Callable[['Node', Any, Any], str] - -class SpecialValue: - def __init__(self, value): - self.value = value - - def __str__(self): - return str(self.value) - - def __repr__(self): - return str(self.value) - - -unknown_output = None #SpecialValue("Output Unknown") +unknown_output = None # can be set to a special value if required def generate_values(base: list, step: Callable, backing_set=True):