diff --git a/DotModels/car_alarm.dot b/DotModels/car_alarm.dot index 7cb052e4b35..bc5829d710a 100644 --- a/DotModels/car_alarm.dot +++ b/DotModels/car_alarm.dot @@ -5,22 +5,18 @@ q3_locked_closed [label="A"]; q5_unlocked_closed [label="N"]; q6_unlocked_open [label="N"]; q7_locked_open [label="N"]; -q4_faulty [label="N"]; -q1_locked_closed -> q2_locked_open [label="d:1"]; -q1_locked_closed -> q5_unlocked_closed [label="l:1"]; -q2_locked_open -> q3_locked_closed [label="d:1"]; -q2_locked_open -> q6_unlocked_open [label="l:1"]; -q3_locked_closed -> q2_locked_open [label="d:1"]; -q3_locked_closed -> q5_unlocked_closed [label="l:1"]; -q5_unlocked_closed -> q6_unlocked_open [label="d:1"]; -q5_unlocked_closed -> q1_locked_closed [label="l:1"]; -q6_unlocked_open -> q5_unlocked_closed [label="d:1"]; -q6_unlocked_open -> q7_locked_open [label="l:1"]; -q7_locked_open -> q4_faulty [label="d:1"]; -q7_locked_open -> q6_unlocked_open [label="l:1"]; -q4_faulty -> q2_locked_open [label="d:0.9"]; -q4_faulty -> q7_locked_open [label="d:0.1"]; -q4_faulty -> q5_unlocked_closed [label="l:1"]; +q1_locked_closed -> q2_locked_open [label="d"]; +q1_locked_closed -> q5_unlocked_closed [label="l"]; +q2_locked_open -> q3_locked_closed [label="d"]; +q2_locked_open -> q6_unlocked_open [label="l"]; +q3_locked_closed -> q2_locked_open [label="d"]; +q3_locked_closed -> q5_unlocked_closed [label="l"]; +q5_unlocked_closed -> q6_unlocked_open [label="d"]; +q5_unlocked_closed -> q1_locked_closed [label="l"]; +q6_unlocked_open -> q5_unlocked_closed [label="d"]; +q6_unlocked_open -> q7_locked_open [label="l"]; +q7_locked_open -> q1_locked_closed [label="d"]; +q7_locked_open -> q6_unlocked_open [label="l"]; __start0 [label="", shape=none]; __start0 -> q1_locked_closed [label=""]; } diff --git a/aalpy/learning_algs/__init__.py b/aalpy/learning_algs/__init__.py index 81892586e52..5041af0bf31 100644 --- a/aalpy/learning_algs/__init__.py +++ b/aalpy/learning_algs/__init__.py @@ -10,4 +10,5 @@ from .stochastic_passive.ActiveAleriga import run_active_Alergia from .deterministic_passive.RPNI import run_RPNI, run_PAPNI from .deterministic_passive.active_RPNI import run_active_RPNI -from .general_passive.GeneralizedStateMerging import run_GSM \ No newline at end of file +from .general_passive.GeneralizedStateMerging import run_GSM +from .general_passive.GsmAlgorithms import run_EDSM \ No newline at end of file diff --git a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py index 2370aed9a16..197dfa50bb6 100644 --- a/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py +++ b/aalpy/learning_algs/general_passive/GeneralizedStateMerging.py @@ -1,10 +1,9 @@ import functools -from bisect import insort -from typing import Dict, Tuple, Callable, List, Optional from collections import deque +from typing import Dict, Tuple, Callable, List, Optional from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \ - OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator + OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output, detect_data_format from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility @@ -54,10 +53,10 @@ def __init__(self, *, depth_first=False): if output_behavior not in OutputBehaviorRange: - raise ValueError(f"invalid output behavior {output_behavior}") + raise ValueError(f"invalid output behavior {output_behavior}. should be in {OutputBehaviorRange}") self.output_behavior: OutputBehavior = output_behavior if transition_behavior not in TransitionBehaviorRange: - raise ValueError(f"invalid transition behavior {transition_behavior}") + raise ValueError(f"invalid transition behavior {transition_behavior}. should be in {TransitionBehaviorRange}") self.transition_behavior: TransitionBehavior = transition_behavior if score_calc is None: @@ -70,8 +69,11 @@ def __init__(self, *, self.score_calc: ScoreCalculation = score_calc if node_order is None: - node_order = Node.__lt__ - self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1) + node_order = NodeOrders.Default + if node_order is NodeOrders.NoCompare or node_order is NodeOrders.Default: + self.node_order = node_order + else: + self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1) self.pta_preprocessing = pta_preprocessing or (lambda x: x) self.postprocessing = postprocessing or (lambda x: x) @@ -91,15 +93,16 @@ def compute_local_compatibility(self, a: Node, b: Node): # TODO: make more generic by adding the option to use a different algorithm than red blue # for selecting potential merge candidates. Maybe using inheritance with abstract `run`. - def run(self, data, convert=True, instrumentation: Instrumentation = None): + def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format=None): if instrumentation is None: instrumentation = Instrumentation() instrumentation.reset(self) - if isinstance(data, Node): - root = data - else: - root = Node.createPTA(data, self.output_behavior) + if data_format is None: + data_format = detect_data_format(data) + if data_format == "examples" and self.transition_behavior != "deterministic": + raise ValueError("learning from examples is not possible for nondeterministic systems") + root = Node.createPTA(data, self.output_behavior, data_format) root = self.pta_preprocessing(root) instrumentation.pta_construction_done(root) @@ -128,7 +131,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # no blue states left -> done if len(blue_states) == 0: break - blue_states.sort(key=self.node_order) + if self.node_order is not NodeOrders.NoCompare: + blue_states.sort(key=self.node_order) + # red states are always sorted using default order on original prefix + if self.node_order is not NodeOrders.Default: + red_states.sort(key=self.node_order) # loop over blue states promotion = False @@ -139,7 +146,6 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # calculate partitions resulting from merges with red states if necessary current_candidates: Dict[Node, Partitioning] = dict() perfect_partitioning = None - red_state = None for red_state in red_states: partition = partition_candidates.get((red_state, blue_state)) @@ -149,8 +155,8 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): perfect_partitioning = partition break current_candidates[red_state] = partition - assert red_state is not None + # partition with perfect score found: don't consider anything else if perfect_partitioning: partition_candidates = {(red_state, blue_state): perfect_partitioning} @@ -158,7 +164,7 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): # no merge candidates for this blue state -> promote if all(part.score is False for part in current_candidates.values()): - insort(red_states, blue_state, key=self.node_order) + red_states.append(blue_state) instrumentation.log_promote(blue_state) promotion = True break @@ -176,10 +182,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None): best_candidate = max(partition_candidates.values(), key=lambda part: part.score) for real_node, partition_node in best_candidate.red_mapping.items(): real_node.transitions = partition_node.transitions + real_node.prefix_access_pair = partition_node.prefix_access_pair for access_pair, t_info in real_node.transition_iterator(): if t_info.target not in red_states: t_info.target.predecessor = real_node - t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required + # t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required instrumentation.log_merge(best_candidate) # FUTURE: optimizations for compatibility tests where merges can be orthogonal # FUTURE: caching for aggregating compatibility tests @@ -247,9 +254,13 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: blue_in_sym, blue_out_sym = blue.prefix_access_pair blue_parent.transitions[blue_in_sym][blue_out_sym].target = red + partition = update_partition(red, None) + if self.output_behavior == "moore": + partition.resolve_unknown_prefix_output(blue_out_sym) + + # loop over implied merges q: deque[Tuple[Node, Node]] = deque([(red, blue)]) pop = q.pop if self.depth_first else q.popleft - while len(q) != 0: red, blue = pop() partition = update_partition(red, blue) @@ -258,10 +269,25 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node: if self.compute_local_compatibility(partition, blue) is False: return partitioning + # create implied merges for all common successors for in_sym, blue_transitions in blue.transitions.items(): partition_transitions = partition.get_or_create_transitions(in_sym) for out_sym, blue_transition in blue_transitions.items(): partition_transition = partition_transitions.get(out_sym) + # handle unknown output + if partition_transition is None: + if out_sym is unknown_output and len(partition_transitions) != 0: + assert len(partition_transitions) == 1 + partition_transition = list(partition_transitions.values())[0] + if unknown_output in partition_transitions: + assert len(partition_transitions) == 1 + partition_transition = partition_transitions.pop(unknown_output) + partition_transitions[out_sym] = partition_transition + # re-hook access pair + succ_part = update_partition(partition_transition.target, None) + if self.output_behavior == "moore" or succ_part.predecessor is red: + succ_part.resolve_unknown_prefix_output(out_sym) + # add pairs if partition_transition is not None: q.append((partition_transition.target, blue_transition.target)) partition_transition.count += blue_transition.count @@ -287,6 +313,7 @@ def run_GSM(data, *, depth_first=False, instrumentation=None, convert=True, + data_format=None, ): """ TODO @@ -318,12 +345,14 @@ def run_GSM(data, *, convert: + data_format: + Returns: """ - # instantiate the gsm + # instantiate gsm gsm = GeneralizedStateMerging( output_behavior=output_behavior, transition_behavior=transition_behavior, @@ -338,4 +367,4 @@ def run_GSM(data, *, ) # run the algorithm - return gsm.run(data=data, instrumentation=instrumentation, convert=convert) + return gsm.run(data=data, instrumentation=instrumentation, convert=convert, data_format=data_format) diff --git a/aalpy/learning_algs/general_passive/GsmAlgorithms.py b/aalpy/learning_algs/general_passive/GsmAlgorithms.py new file mode 100644 index 00000000000..e77bf03eed0 --- /dev/null +++ b/aalpy/learning_algs/general_passive/GsmAlgorithms.py @@ -0,0 +1,57 @@ +from typing import Dict, Union + +from aalpy import DeterministicAutomaton +from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM +from aalpy.learning_algs.general_passive.Instrumentation import ProgressReport +from aalpy.learning_algs.general_passive.Node import Node +from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation +from aalpy.utils.HelperFunctions import dfa_from_moore + + +def run_EDSM(data, automaton_type, input_completeness=None, print_info=True) -> Union[DeterministicAutomaton, None]: + """ + Run Evidence Driven State Merging. + + Args: + data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...] + automaton_type: either 'dfa', 'mealy', 'moore'. Note that for 'mealy' machine learning, data has to be prefix-closed. + input_completeness: either None, 'sink_state', or 'self_loop'. If None, learned model could be input incomplete, + sink_state will lead all undefined inputs form some state to the sink state, whereas self_loop will simply create + a self loop. In case of Mealy learning output of the added transition will be 'epsilon'. + print_info: print learning progress and runtime information + + Returns: + + Model conforming to the data, or None if data is non-deterministic. + + """ + assert automaton_type in {'dfa', 'mealy', 'moore'} + assert input_completeness in {None, 'self_loop', 'sink_state'} + + def EDSM_score(part: Dict[Node, Node]): + nr_partitions = len(set(part.values())) + nr_merged = len(part) + return nr_merged - nr_partitions + + score = ScoreCalculation(score_function=EDSM_score) + + internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type + + learned_model = run_GSM(data, output_behavior=internal_automaton_type, + transition_behavior="deterministic", + score_calc=score, instrumentation=ProgressReport(2)) + + if automaton_type == 'dfa': + learned_model = dfa_from_moore(learned_model) + + if not learned_model.is_input_complete(): + if not input_completeness: + if print_info: + print('Warning: Learned Model is not input complete (inputs not defined for all states). ' + 'Consider calling .make_input_complete()') + else: + if print_info: + print(f'Learned model was not input complete. Adapting it with {input_completeness} transitions.') + learned_model.make_input_complete(input_completeness) + + return learned_model diff --git a/aalpy/learning_algs/general_passive/Instrumentation.py b/aalpy/learning_algs/general_passive/Instrumentation.py index edc91c6fc79..50112ad8550 100644 --- a/aalpy/learning_algs/general_passive/Instrumentation.py +++ b/aalpy/learning_algs/general_passive/Instrumentation.py @@ -1,5 +1,4 @@ import time -from functools import wraps from typing import Dict, Optional from aalpy.learning_algs.general_passive.GeneralizedStateMerging import Instrumentation, Partitioning, \ @@ -8,19 +7,6 @@ class ProgressReport(Instrumentation): - @staticmethod - def min_lvl(lvl): - def decorator(fn): - @wraps(fn) - def wrapper(this, *args, **kw): - if this.lvl < lvl: - return - fn(this, *args, **kw) - - return wrapper - - return decorator - def __init__(self, lvl): super().__init__() self.lvl = lvl @@ -45,10 +31,9 @@ def reset(self, gsm: GeneralizedStateMerging): self.previous_time = time.time() - @min_lvl(1) def pta_construction_done(self, root): print(f'PTA Construction Time: {round(time.time() - self.previous_time, 2)}') - if self.lvl != 1: + if 1 < self.lvl: states = root.get_all_nodes() leafs = [state for state in states if len(state.transitions.keys()) == 0] depth = [state.get_prefix_length() for state in leafs] @@ -60,24 +45,21 @@ def pta_construction_done(self, root): def print_status(self): reset_char = "\33[2K\r" print_str = reset_char + f'Current automaton size: {self.nr_red_states}' - if self.lvl != 1 and not self.gsm.compatibility_on_futures: + if 1 < self.lvl and not self.gsm.compatibility_on_futures: print_str += f' Merged: {self.nr_merged_states_total} Remaining: {self.pta_size - self.nr_red_states - self.nr_merged_states_total}' print(print_str, end="") - @min_lvl(1) def log_promote(self, node: Node): self.log.append(["promote", (node.get_prefix(),)]) self.nr_red_states += 1 self.print_status() - @min_lvl(1) def log_merge(self, part: Partitioning): self.log.append(["merge", (part.red.get_prefix(), part.blue.get_prefix())]) self.nr_merged_states_total += len(part.full_mapping) - len(part.red_mapping) self.nr_merged_states += 1 self.print_status() - @min_lvl(1) def learning_done(self, root, red_states): print(f'\nLearning Time: {round(time.time() - self.previous_time, 2)}') print(f'Learned {len(red_states)} state automaton via {self.nr_merged_states} merges.') diff --git a/aalpy/learning_algs/general_passive/Node.py b/aalpy/learning_algs/general_passive/Node.py index be663be7b2e..4809d9ec288 100644 --- a/aalpy/learning_algs/general_passive/Node.py +++ b/aalpy/learning_algs/general_passive/Node.py @@ -1,7 +1,8 @@ +import functools import math import pathlib from functools import total_ordering -from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional +from typing import Dict, Any, List, Tuple, Iterable, Callable, Union, TypeVar, Iterator, Optional, Sequence import pydot from copy import copy @@ -18,13 +19,18 @@ TransitionBehavior = str TransitionBehaviorRange = ["deterministic", "nondeterministic", "stochastic"] +DataFormat = str +DataFormatRange = ["traces", "examples", "tree"] + IOPair = Tuple[Any, Any] -IOTrace = List[IOPair] -IOExample = Tuple[Iterable[Any], Any] +IOTrace = Sequence[IOPair] +IOExample = Tuple[Sequence[Any], Any] StateFunction = Callable[['Node'], str] TransitionFunction = Callable[['Node', Any, Any], str] +unknown_output = None # can be set to a special value if required + def generate_values(base: list, step: Callable, backing_set=True): if backing_set: @@ -65,6 +71,42 @@ def union_iterator(a: Dict[Key, Val], b: Dict[Key, Val], default: Val = None) -> yield key, a_val, b_val +# TODO maybe reuse this in classic RPNI +def detect_data_format(data, check_consistency=True): + accepted_types = (Tuple, List) + data_format = None + def check_data_format(value): + if data_format is None or data_format == value: + return value + raise ValueError("inconsistent data") + + if isinstance(data, Node): + if not data.is_tree(): + raise ValueError("provided automaton is not a tree") + return "tree" + if not isinstance(data, accepted_types): + raise ValueError("wrong input format. expected tuple or list.") + if len(data) == 0: + return "traces" + for data_point in data: + if len(data_point) != 2: + data_format = check_data_format("traces") + if not check_consistency: + return data_format + o1, o2 = data_point + if not isinstance(o1, accepted_types): + data_format = check_data_format("traces") + if not check_consistency: + return data_format + if not isinstance(o2, accepted_types): + data_format = check_data_format("examples") + if not check_consistency: + return data_format + if data_format is None: + raise ValueError("ambiguous data format. data format needs to be specified explicitly.") + return data_format + + # TODO maybe split this for maintainability (and perfomance?) class TransitionInfo: __slots__ = ["target", "count", "original_target", "original_count"] @@ -109,12 +151,6 @@ def __lt__(self, other, compare_length_only=False): except TypeError: return [str(x) for x in own_p] < [str(x) for x in other_p] - def __eq__(self, other): - return self is other # TODO hack, does this lead to problems down the line? - - def __hash__(self): - return id(self) # TODO This is a hack - # TODO implicit prefixes as currently implemented require O(length) time for prefix calculations (e.g. to determine the minimal blue node) # other options would be to have more efficient explicit prefixes such as shared list representations def get_prefix_length(self): @@ -128,6 +164,13 @@ def get_prefix_length(self): def get_prefix_output(self): return self.prefix_access_pair[1] + def get_prefix_input(self): + return self.prefix_access_pair[0] + + def resolve_unknown_prefix_output(self, value): + if self.get_prefix_output() is unknown_output: + self.prefix_access_pair = (self.get_prefix_input(), value) + def get_prefix(self, include_output=True): node = self prefix = [] @@ -184,7 +227,7 @@ def generator(state: Node): def is_tree(self): q: List['Node'] = [self] - backing_set = set() + backing_set = {self} while len(q) != 0: current = q.pop(0) for _, child in current.transition_iterator(): @@ -196,7 +239,7 @@ def is_tree(self): return True def to_automaton(self, output_behavior: OutputBehavior, transition_behavior: TransitionBehavior, - check_behavior=False, set_prefix=False) -> Automaton: + check_behavior=True, set_prefix=False) -> Automaton: nodes = self.get_all_nodes() if check_behavior: @@ -332,13 +375,9 @@ def node_naming(node: Node): file_ext = 'dot' graph.write(path=str(path) + "." + file_ext, prog=engine, format=format) - def add_data(self, data): - for seq in data: - self.add_trace(seq) - - def add_trace(self, data: IOTrace): + def add_trace(self, trace: IOTrace): curr_node: Node = self - for in_sym, out_sym in data: + for in_sym, out_sym in trace: transitions = curr_node.get_or_create_transitions(in_sym) info = transitions.get(out_sym) if info is None: @@ -350,19 +389,59 @@ def add_trace(self, data: IOTrace): node = info.target curr_node = node - def add_example(self, data: IOExample): - # TODO add support for example based algorithms - raise NotImplementedError() + def add_example(self, example: IOExample): + inputs, output = example + curr_node: Node = self + in_sym = None + + # step through inputs and add transitions + for in_sym in inputs: + transitions = curr_node.get_or_create_transitions(in_sym) + t_infos = list(transitions.values()) + if len(t_infos) == 0: + node = Node((in_sym, unknown_output), curr_node) + t_info = TransitionInfo(node, 1, node, 1) + transitions[unknown_output] = t_info + elif len(t_infos) == 1: + t_info = t_infos[0] + t_info.count += 1 + t_info.original_count += 1 + node = t_info.target + else: + # This should never happen + raise ValueError("nondeterminism encountered for GSM with examples. not supported") + curr_node = node + + # set last output + curr_node.resolve_unknown_prefix_output(output) + pred = curr_node.predecessor + if pred: + transitions = pred.transitions[in_sym] + if unknown_output in transitions: + transitions[output] = transitions.pop(unknown_output) + if output not in transitions: + raise ValueError("nondeterminism encountered for GSM with examples. not supported") @staticmethod - def createPTA(data, output_behavior) -> 'Node': - if output_behavior == "moore": - initial_output = data[0][0] - data = (d[1:] for d in data) - else: - initial_output = None - root_node = Node((None, initial_output), None) - root_node.add_data(data) + def createPTA(data, output_behavior, data_format=None) -> 'Node': + if data_format is None: + data_format = detect_data_format(data) + if data_format not in DataFormatRange: + raise ValueError(f"invalid data format {data_format}. should be in {DataFormatRange}") + + if data_format == "tree": + return data + root_node = Node((None, unknown_output), None) + if data_format == "examples": + for example in data: + root_node.add_example(example) + if data_format == "traces": + if output_behavior == "moore": + initial_output = data[0][0] + root_node.prefix_access_pair = (None, initial_output) + data = (d[1:] for d in data) + for trace in data: + root_node.add_trace(trace) return root_node def is_locally_deterministic(self): @@ -372,22 +451,25 @@ def is_deterministic(self): return all(node.is_locally_deterministic() for node in self.get_all_nodes()) def deterministic_compatible(self, other: 'Node'): - common_keys = filter(lambda key: key in self.transitions.keys(), other.transitions.keys()) - return all(list(self.transitions[key].keys()) == list(other.transitions[key].keys()) for key in common_keys) + for _, trans_self, trans_other in intersection_iterator(self.transitions, other.transitions): + if unknown_output in trans_self or unknown_output in trans_other: + continue + if list(trans_self.keys()) != list(trans_other.keys()): + return False + return True def is_moore(self): - output_dict = dict() for node in self.get_all_nodes(): for (in_sym, out_sym), transition in node.transition_iterator(): - child = transition.target - if child in output_dict.keys() and output_dict[child] != out_sym: + child_output = transition.target.get_prefix_output() + if out_sym is not unknown_output and child_output != out_sym: return False - else: - output_dict[child] = out_sym return True def moore_compatible(self, other: 'Node'): - return self.get_prefix_output() == other.get_prefix_output() + so = self.get_prefix_output() + oo = other.get_prefix_output() + return so == oo or so is unknown_output or oo is unknown_output def local_log_likelihood_contribution(self): llc = 0 @@ -402,3 +484,8 @@ def local_log_likelihood_contribution(self): def count(self): return sum(trans.count for _, trans in self.transition_iterator()) + + +class NodeOrders: + NoCompare = lambda n: 0 + Default = functools.cmp_to_key(lambda a, b: -1 if a < b else 1) diff --git a/aalpy/utils/HelperFunctions.py b/aalpy/utils/HelperFunctions.py index 54e3cd2ecdb..c38e0b175ba 100644 --- a/aalpy/utils/HelperFunctions.py +++ b/aalpy/utils/HelperFunctions.py @@ -301,7 +301,7 @@ def convert_i_o_traces_for_RPNI(sequences, automaton_type="mealy"): for s in sequences: if automaton_type in ["moore", "dfa"]: - rpni_sequences.add((tuple(),s[0])) + rpni_sequences.add((tuple(), s[0])) s = s[1:] for i in range(len(s)): inputs = tuple([io[0] for io in s[:i + 1]]) @@ -349,11 +349,14 @@ def is_balanced(input_seq, vpa_alphabet): return counter == 0 -def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16): +def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_len=1, max_seq_len=16, + sequance_type='single_output'): + assert sequance_type in {'io_trace', 'single_output'} alphabet = model.get_input_alphabet() - input_output_sequances = [] - while len(input_output_sequances) < num_sequances: + dataset = [] + + while len(dataset) < num_sequances: sequance = [] for _ in range(random.randint(min_seq_len, max_seq_len)): sequance.append(random.choice(alphabet)) @@ -361,9 +364,12 @@ def generate_input_output_data_from_automata(model, num_sequances=4000, min_seq_ model.reset_to_initial() outputs = model.execute_sequence(model.initial_state, sequance) - input_output_sequances.append(list(zip(sequance, outputs))) + if sequance_type == 'io_trace': + dataset.append(list(zip(sequance, outputs))) + else: + dataset.append((sequance, outputs[-1])) - return input_output_sequances + return dataset def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16, max_attempts=None): @@ -396,7 +402,7 @@ def generate_input_output_data_from_vpa(vpa, num_sequances=1000, max_seq_len=16, sequance += (chosen_input,) output = vpa.step(chosen_input) - #if vpa.is_balanced(sequance): + # if vpa.is_balanced(sequance): data_set.add((sequance, output)) data_set = list(data_set) @@ -409,3 +415,26 @@ def product_with_possible_empty_iterable(*iterables, repeat=1): """ non_empty_iterables = [it for it in iterables if it] return product(*non_empty_iterables, repeat=repeat) + + +def dfa_from_moore(moore_model): + from aalpy.automata import Dfa, DfaState + + dfa_state_map = dict() + # define states + for moore_state in moore_model.states: + if moore_state.output not in {True, False, None}: + raise ValueError('Cannot convert Moore model with unrestricted output domain to DFA. ' + f'Output domain should be {True, False, None}. Problematic output: {moore_state.output}' + ) + + is_accepting = moore_state.output if moore_state.output is not None else False + dfa_state_map[moore_state.state_id] = DfaState(moore_state.state_id, is_accepting) + + # define transitions + for moore_state in moore_model.states: + for i, reached_state in moore_state.transitions.items(): + dfa_state_map[moore_state.state_id].transitions[i] = dfa_state_map[reached_state.state_id] + + initial_state = dfa_state_map[moore_model.initial_state.state_id] + return Dfa(initial_state, list(dfa_state_map.values())) diff --git a/aalpy/utils/Sampling.py b/aalpy/utils/Sampling.py index 14c60e36001..ba49b81ee85 100644 --- a/aalpy/utils/Sampling.py +++ b/aalpy/utils/Sampling.py @@ -1,13 +1,48 @@ +from functools import wraps from random import randint, choices, random from aalpy import MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain from aalpy.base import Automaton, DeterministicAutomaton +def get_io_traces(automaton: Automaton, input_traces: list) -> list: + moore_automata = (MooreMachine, Dfa, NDMooreMachine, Mdp, MarkovChain) + is_moore = isinstance(automaton, moore_automata) + traces = [] + for input_trace in input_traces: + output_trace = automaton.execute_sequence(automaton.initial_state, input_trace) + trace = list(zip(input_trace, output_trace)) + if is_moore: + trace = [automaton.initial_state.output] + trace + traces.append(trace) + return traces + + +def support_automaton_arg(require_transform): + def decorator(f): + @wraps(f) + def inner(alphabet, *args, include_outputs=False, **kwargs): + automaton = None + if isinstance(alphabet, Automaton): + automaton = alphabet + if require_transform: + alphabet = alphabet.get_input_alphabet() + traces = f(alphabet, *args, **kwargs) + if include_outputs: + if automaton is None: + raise ValueError("automaton must be provided") + traces = get_io_traces(automaton, traces) + return traces + return inner + return decorator + + +@support_automaton_arg(True) def sample_with_length_limits(alphabet, nr_samples, min_len, max_len): return [choices(alphabet, k = randint(min_len, max_len)) for _ in range(nr_samples)] +@support_automaton_arg(True) def sample_with_term_prob(alphabet, nr_samples, term_prob): ret = [] for _ in range(nr_samples): @@ -18,23 +53,10 @@ def sample_with_term_prob(alphabet, nr_samples, term_prob): return ret +@support_automaton_arg(False) def get_complete_sample(automaton: DeterministicAutomaton): alphabet = automaton.get_input_alphabet() automaton.compute_prefixes() char_set = automaton.compute_characterization_set() infixes = [(x,) for x in alphabet] + [tuple()] return [state.prefix + infix + suffix for state in automaton.states for suffix in char_set for infix in infixes] - - -def get_io_traces(automaton: Automaton, input_traces: list) -> list: - moore_automata = (MooreMachine, NDMooreMachine, Mdp, MarkovChain) - is_moore = isinstance(automaton, moore_automata) - - traces = [] - for input_trace in input_traces: - output_trace = automaton.execute_sequence(automaton.initial_state, input_trace) - trace = list(zip(input_trace, output_trace)) - if is_moore: - trace = [automaton.initial_state.output] + trace - traces.append(trace) - return traces