Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9013e96
Improved sampling utils
zwergziege Feb 10, 2025
1139d21
Fixed compatibility issue and rectified red_state order
zwergziege Feb 10, 2025
e786815
deleted unnecessary definition of eq and hash for Node class
zwergziege Feb 10, 2025
5085053
fixed car alarm model
zwergziege Feb 10, 2025
c6f8a4b
check compatibility with target type on Node.to_automaton()
zwergziege Feb 10, 2025
3c50afb
clarified error messages of GSM
zwergziege Feb 10, 2025
c7bc171
cosmetics
zwergziege Feb 10, 2025
2f8cab8
Added support for learning from examples (mealy only)
zwergziege Feb 10, 2025
a1475dd
fixed wrong initial output + better messages
zwergziege Feb 11, 2025
691e0ae
adapt moore checks for unknown output
zwergziege Feb 11, 2025
2bdf572
fix corner case for unknown outputs with moore behavior
zwergziege Feb 11, 2025
d72cd82
update auto-format detection
Feb 11, 2025
3e5b8b0
fixed GSM bug due to unknown outputs
zwergziege Feb 11, 2025
4c41e6b
update auto-format detection
zwergziege Feb 12, 2025
47f2084
update random data generation
Feb 12, 2025
142d237
Merge remote-tracking branch 'origin/gsm-dev' into gsm-dev
Feb 12, 2025
85e27d2
one more fix for GSM + Examples
zwergziege Feb 12, 2025
7021b90
expose EDSM
Feb 12, 2025
d53b255
extracted common method for resolving unknown output in prefix
zwergziege Feb 12, 2025
a251f76
simplified instrumentation class
zwergziege Feb 12, 2025
8c87a61
make sure that Unknown inputs are treated as None
Feb 12, 2025
53f8033
additional checks for data format (trees, reject examples + non-det)
zwergziege Feb 12, 2025
930e1a8
fixed createPTA dataformat default
zwergziege Feb 12, 2025
fc31caf
set unknown_output to None
zwergziege Feb 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 12 additions & 16 deletions DotModels/car_alarm.dot
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,18 @@ q3_locked_closed [label="A"];
q5_unlocked_closed [label="N"];
q6_unlocked_open [label="N"];
q7_locked_open [label="N"];
q4_faulty [label="N"];
q1_locked_closed -> q2_locked_open [label="d:1"];
q1_locked_closed -> q5_unlocked_closed [label="l:1"];
q2_locked_open -> q3_locked_closed [label="d:1"];
q2_locked_open -> q6_unlocked_open [label="l:1"];
q3_locked_closed -> q2_locked_open [label="d:1"];
q3_locked_closed -> q5_unlocked_closed [label="l:1"];
q5_unlocked_closed -> q6_unlocked_open [label="d:1"];
q5_unlocked_closed -> q1_locked_closed [label="l:1"];
q6_unlocked_open -> q5_unlocked_closed [label="d:1"];
q6_unlocked_open -> q7_locked_open [label="l:1"];
q7_locked_open -> q4_faulty [label="d:1"];
q7_locked_open -> q6_unlocked_open [label="l:1"];
q4_faulty -> q2_locked_open [label="d:0.9"];
q4_faulty -> q7_locked_open [label="d:0.1"];
q4_faulty -> q5_unlocked_closed [label="l:1"];
q1_locked_closed -> q2_locked_open [label="d"];
q1_locked_closed -> q5_unlocked_closed [label="l"];
q2_locked_open -> q3_locked_closed [label="d"];
q2_locked_open -> q6_unlocked_open [label="l"];
q3_locked_closed -> q2_locked_open [label="d"];
q3_locked_closed -> q5_unlocked_closed [label="l"];
q5_unlocked_closed -> q6_unlocked_open [label="d"];
q5_unlocked_closed -> q1_locked_closed [label="l"];
q6_unlocked_open -> q5_unlocked_closed [label="d"];
q6_unlocked_open -> q7_locked_open [label="l"];
q7_locked_open -> q1_locked_closed [label="d"];
q7_locked_open -> q6_unlocked_open [label="l"];
__start0 [label="", shape=none];
__start0 -> q1_locked_closed [label=""];
}
3 changes: 2 additions & 1 deletion aalpy/learning_algs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
from .stochastic_passive.ActiveAleriga import run_active_Alergia
from .deterministic_passive.RPNI import run_RPNI, run_PAPNI
from .deterministic_passive.active_RPNI import run_active_RPNI
from .general_passive.GeneralizedStateMerging import run_GSM
from .general_passive.GeneralizedStateMerging import run_GSM
from .general_passive.GsmAlgorithms import run_EDSM
69 changes: 49 additions & 20 deletions aalpy/learning_algs/general_passive/GeneralizedStateMerging.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import functools
from bisect import insort
from typing import Dict, Tuple, Callable, List, Optional
from collections import deque
from typing import Dict, Tuple, Callable, List, Optional

from aalpy.learning_algs.general_passive.Node import Node, OutputBehavior, TransitionBehavior, TransitionInfo, \
OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator
OutputBehaviorRange, TransitionBehaviorRange, intersection_iterator, NodeOrders, unknown_output, detect_data_format
from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation, hoeffding_compatibility


Expand Down Expand Up @@ -54,10 +53,10 @@ def __init__(self, *,
depth_first=False):

if output_behavior not in OutputBehaviorRange:
raise ValueError(f"invalid output behavior {output_behavior}")
raise ValueError(f"invalid output behavior {output_behavior}. should be in {OutputBehaviorRange}")
self.output_behavior: OutputBehavior = output_behavior
if transition_behavior not in TransitionBehaviorRange:
raise ValueError(f"invalid transition behavior {transition_behavior}")
raise ValueError(f"invalid transition behavior {transition_behavior}. should be in {TransitionBehaviorRange}")
self.transition_behavior: TransitionBehavior = transition_behavior

if score_calc is None:
Expand All @@ -70,8 +69,11 @@ def __init__(self, *,
self.score_calc: ScoreCalculation = score_calc

if node_order is None:
node_order = Node.__lt__
self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1)
node_order = NodeOrders.Default
if node_order is NodeOrders.NoCompare or node_order is NodeOrders.Default:
self.node_order = node_order
else:
self.node_order = functools.cmp_to_key(lambda a, b: -1 if node_order(a, b) else 1)

self.pta_preprocessing = pta_preprocessing or (lambda x: x)
self.postprocessing = postprocessing or (lambda x: x)
Expand All @@ -91,15 +93,16 @@ def compute_local_compatibility(self, a: Node, b: Node):

# TODO: make more generic by adding the option to use a different algorithm than red blue
# for selecting potential merge candidates. Maybe using inheritance with abstract `run`.
def run(self, data, convert=True, instrumentation: Instrumentation = None):
def run(self, data, convert=True, instrumentation: Instrumentation=None, data_format=None):
if instrumentation is None:
instrumentation = Instrumentation()
instrumentation.reset(self)

if isinstance(data, Node):
root = data
else:
root = Node.createPTA(data, self.output_behavior)
if data_format is None:
data_format = detect_data_format(data)
if data_format == "examples" and self.transition_behavior != "deterministic":
raise ValueError("learning from examples is not possible for nondeterministic systems")
root = Node.createPTA(data, self.output_behavior, data_format)

root = self.pta_preprocessing(root)
instrumentation.pta_construction_done(root)
Expand Down Expand Up @@ -128,7 +131,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
# no blue states left -> done
if len(blue_states) == 0:
break
blue_states.sort(key=self.node_order)
if self.node_order is not NodeOrders.NoCompare:
blue_states.sort(key=self.node_order)
# red states are always sorted using default order on original prefix
if self.node_order is not NodeOrders.Default:
red_states.sort(key=self.node_order)

# loop over blue states
promotion = False
Expand All @@ -139,7 +146,6 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
# calculate partitions resulting from merges with red states if necessary
current_candidates: Dict[Node, Partitioning] = dict()
perfect_partitioning = None

red_state = None
for red_state in red_states:
partition = partition_candidates.get((red_state, blue_state))
Expand All @@ -149,16 +155,16 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
perfect_partitioning = partition
break
current_candidates[red_state] = partition

assert red_state is not None

# partition with perfect score found: don't consider anything else
if perfect_partitioning:
partition_candidates = {(red_state, blue_state): perfect_partitioning}
break

# no merge candidates for this blue state -> promote
if all(part.score is False for part in current_candidates.values()):
insort(red_states, blue_state, key=self.node_order)
red_states.append(blue_state)
instrumentation.log_promote(blue_state)
promotion = True
break
Expand All @@ -176,10 +182,11 @@ def run(self, data, convert=True, instrumentation: Instrumentation = None):
best_candidate = max(partition_candidates.values(), key=lambda part: part.score)
for real_node, partition_node in best_candidate.red_mapping.items():
real_node.transitions = partition_node.transitions
real_node.prefix_access_pair = partition_node.prefix_access_pair
for access_pair, t_info in real_node.transition_iterator():
if t_info.target not in red_states:
t_info.target.predecessor = real_node
t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required
# t_info.target.prefix_access_pair = access_pair # not sure whether this is actually required
instrumentation.log_merge(best_candidate)
# FUTURE: optimizations for compatibility tests where merges can be orthogonal
# FUTURE: caching for aggregating compatibility tests
Expand Down Expand Up @@ -247,9 +254,13 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
blue_in_sym, blue_out_sym = blue.prefix_access_pair
blue_parent.transitions[blue_in_sym][blue_out_sym].target = red

partition = update_partition(red, None)
if self.output_behavior == "moore":
partition.resolve_unknown_prefix_output(blue_out_sym)

# loop over implied merges
q: deque[Tuple[Node, Node]] = deque([(red, blue)])
pop = q.pop if self.depth_first else q.popleft

while len(q) != 0:
red, blue = pop()
partition = update_partition(red, blue)
Expand All @@ -258,10 +269,25 @@ def update_partition(red_node: Node, blue_node: Optional[Node]) -> Node:
if self.compute_local_compatibility(partition, blue) is False:
return partitioning

# create implied merges for all common successors
for in_sym, blue_transitions in blue.transitions.items():
partition_transitions = partition.get_or_create_transitions(in_sym)
for out_sym, blue_transition in blue_transitions.items():
partition_transition = partition_transitions.get(out_sym)
# handle unknown output
if partition_transition is None:
if out_sym is unknown_output and len(partition_transitions) != 0:
assert len(partition_transitions) == 1
partition_transition = list(partition_transitions.values())[0]
if unknown_output in partition_transitions:
assert len(partition_transitions) == 1
partition_transition = partition_transitions.pop(unknown_output)
partition_transitions[out_sym] = partition_transition
# re-hook access pair
succ_part = update_partition(partition_transition.target, None)
if self.output_behavior == "moore" or succ_part.predecessor is red:
succ_part.resolve_unknown_prefix_output(out_sym)
# add pairs
if partition_transition is not None:
q.append((partition_transition.target, blue_transition.target))
partition_transition.count += blue_transition.count
Expand All @@ -287,6 +313,7 @@ def run_GSM(data, *,
depth_first=False,
instrumentation=None,
convert=True,
data_format=None,
):
"""
TODO
Expand Down Expand Up @@ -318,12 +345,14 @@ def run_GSM(data, *,

convert:

data_format:


Returns:


"""
# instantiate the gsm
# instantiate gsm
gsm = GeneralizedStateMerging(
output_behavior=output_behavior,
transition_behavior=transition_behavior,
Expand All @@ -338,4 +367,4 @@ def run_GSM(data, *,
)

# run the algorithm
return gsm.run(data=data, instrumentation=instrumentation, convert=convert)
return gsm.run(data=data, instrumentation=instrumentation, convert=convert, data_format=data_format)
57 changes: 57 additions & 0 deletions aalpy/learning_algs/general_passive/GsmAlgorithms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from typing import Dict, Union

from aalpy import DeterministicAutomaton
from aalpy.learning_algs.general_passive.GeneralizedStateMerging import run_GSM
from aalpy.learning_algs.general_passive.Instrumentation import ProgressReport
from aalpy.learning_algs.general_passive.Node import Node
from aalpy.learning_algs.general_passive.ScoreFunctionsGSM import ScoreCalculation
from aalpy.utils.HelperFunctions import dfa_from_moore


def run_EDSM(data, automaton_type, input_completeness=None, print_info=True) -> Union[DeterministicAutomaton, None]:
"""
Run Evidence Driven State Merging.

Args:
data: sequence of input sequences and corresponding label. Eg. [[(i1,i2,i3, ...), label], ...]
automaton_type: either 'dfa', 'mealy', 'moore'. Note that for 'mealy' machine learning, data has to be prefix-closed.
input_completeness: either None, 'sink_state', or 'self_loop'. If None, learned model could be input incomplete,
sink_state will lead all undefined inputs form some state to the sink state, whereas self_loop will simply create
a self loop. In case of Mealy learning output of the added transition will be 'epsilon'.
print_info: print learning progress and runtime information

Returns:

Model conforming to the data, or None if data is non-deterministic.

"""
assert automaton_type in {'dfa', 'mealy', 'moore'}
assert input_completeness in {None, 'self_loop', 'sink_state'}

def EDSM_score(part: Dict[Node, Node]):
nr_partitions = len(set(part.values()))
nr_merged = len(part)
return nr_merged - nr_partitions

score = ScoreCalculation(score_function=EDSM_score)

internal_automaton_type = 'moore' if automaton_type != 'mealy' else automaton_type

learned_model = run_GSM(data, output_behavior=internal_automaton_type,
transition_behavior="deterministic",
score_calc=score, instrumentation=ProgressReport(2))

if automaton_type == 'dfa':
learned_model = dfa_from_moore(learned_model)

if not learned_model.is_input_complete():
if not input_completeness:
if print_info:
print('Warning: Learned Model is not input complete (inputs not defined for all states). '
'Consider calling .make_input_complete()')
else:
if print_info:
print(f'Learned model was not input complete. Adapting it with {input_completeness} transitions.')
learned_model.make_input_complete(input_completeness)

return learned_model
22 changes: 2 additions & 20 deletions aalpy/learning_algs/general_passive/Instrumentation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import time
from functools import wraps
from typing import Dict, Optional

from aalpy.learning_algs.general_passive.GeneralizedStateMerging import Instrumentation, Partitioning, \
Expand All @@ -8,19 +7,6 @@


class ProgressReport(Instrumentation):
@staticmethod
def min_lvl(lvl):
def decorator(fn):
@wraps(fn)
def wrapper(this, *args, **kw):
if this.lvl < lvl:
return
fn(this, *args, **kw)

return wrapper

return decorator

def __init__(self, lvl):
super().__init__()
self.lvl = lvl
Expand All @@ -45,10 +31,9 @@ def reset(self, gsm: GeneralizedStateMerging):

self.previous_time = time.time()

@min_lvl(1)
def pta_construction_done(self, root):
print(f'PTA Construction Time: {round(time.time() - self.previous_time, 2)}')
if self.lvl != 1:
if 1 < self.lvl:
states = root.get_all_nodes()
leafs = [state for state in states if len(state.transitions.keys()) == 0]
depth = [state.get_prefix_length() for state in leafs]
Expand All @@ -60,24 +45,21 @@ def pta_construction_done(self, root):
def print_status(self):
reset_char = "\33[2K\r"
print_str = reset_char + f'Current automaton size: {self.nr_red_states}'
if self.lvl != 1 and not self.gsm.compatibility_on_futures:
if 1 < self.lvl and not self.gsm.compatibility_on_futures:
print_str += f' Merged: {self.nr_merged_states_total} Remaining: {self.pta_size - self.nr_red_states - self.nr_merged_states_total}'
print(print_str, end="")

@min_lvl(1)
def log_promote(self, node: Node):
self.log.append(["promote", (node.get_prefix(),)])
self.nr_red_states += 1
self.print_status()

@min_lvl(1)
def log_merge(self, part: Partitioning):
self.log.append(["merge", (part.red.get_prefix(), part.blue.get_prefix())])
self.nr_merged_states_total += len(part.full_mapping) - len(part.red_mapping)
self.nr_merged_states += 1
self.print_status()

@min_lvl(1)
def learning_done(self, root, red_states):
print(f'\nLearning Time: {round(time.time() - self.previous_time, 2)}')
print(f'Learned {len(red_states)} state automaton via {self.nr_merged_states} merges.')
Expand Down
Loading
Loading