Skip to content

Active MDP learning of POMDPS can result in a dead state #35

@emuskardin

Description

@emuskardin
from aalpy.SULs import MdpSUL
from aalpy.automata import Mdp, MdpState
from aalpy.learning_algs import run_stochastic_Lstar
from aalpy.oracles import RandomWordEqOracle

states = []
for i in range(13):
    # curr_output = state_outputs.pop(0) if state_outputs else random.choice(outputs)
    if i == 3 or i == 6 or i == 9 or i == 12:
        states.append(MdpState(f'q{i}', output=True))
    else:
        states.append(MdpState(f'q{i}', output=False))

# 0
states[0].transitions['a'].append((states[1], 0.25))
states[0].transitions['a'].append((states[0], 0.75))

states[0].transitions['b'].append((states[4], 0.25))
states[0].transitions['b'].append((states[0], 0.75))

states[0].transitions['c'].append((states[7], 0.25))
states[0].transitions['c'].append((states[0], 0.75))

states[0].transitions['d'].append((states[10], 0.25))
states[0].transitions['d'].append((states[0], 0.75))

# 1
states[1].transitions['a'].append((states[2], 0.25))
states[1].transitions['a'].append((states[1], 0.75))

states[1].transitions['b'].append((states[1], 0.2))
states[1].transitions['b'].append((states[1], 0.8))

states[1].transitions['c'].append((states[1], 0.2))
states[1].transitions['c'].append((states[1], 0.8))

states[1].transitions['d'].append((states[1], 0.2))
states[1].transitions['d'].append((states[1], 0.8))

# 2
states[2].transitions['a'].append((states[3], 0.25))
states[2].transitions['a'].append((states[2], 0.75))

states[2].transitions['b'].append((states[2], 0.2))
states[2].transitions['b'].append((states[2], 0.8))

states[2].transitions['c'].append((states[2], 0.2))
states[2].transitions['c'].append((states[2], 0.8))

states[2].transitions['d'].append((states[2], 0.2))
states[2].transitions['d'].append((states[2], 0.8))

# 3
states[3].transitions['a'].append((states[3], 0.25))
states[3].transitions['a'].append((states[3], 0.75))

states[3].transitions['b'].append((states[3], 0.2))
states[3].transitions['b'].append((states[3], 0.8))

states[3].transitions['c'].append((states[3], 0.2))
states[3].transitions['c'].append((states[3], 0.8))

states[3].transitions['d'].append((states[3], 0.2))
states[3].transitions['d'].append((states[3], 0.8))

# 4
states[4].transitions['a'].append((states[4], 0.2))
states[4].transitions['a'].append((states[4], 0.8))

states[4].transitions['b'].append((states[5], 0.25))
states[4].transitions['b'].append((states[4], 0.75))

states[4].transitions['c'].append((states[4], 0.2))
states[4].transitions['c'].append((states[4], 0.8))

states[4].transitions['d'].append((states[4], 0.2))
states[4].transitions['d'].append((states[4], 0.8))

# 5
states[5].transitions['a'].append((states[5], 0.2))
states[5].transitions['a'].append((states[5], 0.8))

states[5].transitions['b'].append((states[6], 0.25))
states[5].transitions['b'].append((states[5], 0.75))

states[5].transitions['c'].append((states[5], 0.2))
states[5].transitions['c'].append((states[5], 0.8))

states[5].transitions['d'].append((states[5], 0.2))
states[5].transitions['d'].append((states[5], 0.8))

# 6
states[6].transitions['a'].append((states[6], 0.2))
states[6].transitions['a'].append((states[6], 0.8))

states[6].transitions['b'].append((states[6], 0.25))
states[6].transitions['b'].append((states[6], 0.75))

states[6].transitions['c'].append((states[6], 0.2))
states[6].transitions['c'].append((states[6], 0.8))

states[6].transitions['d'].append((states[6], 0.2))
states[6].transitions['d'].append((states[6], 0.8))

# 7
states[7].transitions['a'].append((states[7], 0.2))
states[7].transitions['a'].append((states[7], 0.8))

states[7].transitions['b'].append((states[8], 0.25))
states[7].transitions['b'].append((states[7], 0.75))

states[7].transitions['c'].append((states[7], 0.2))
states[7].transitions['c'].append((states[7], 0.8))

states[7].transitions['d'].append((states[7], 0.2))
states[7].transitions['d'].append((states[7], 0.8))

# 8
states[8].transitions['a'].append((states[8], 0.2))
states[8].transitions['a'].append((states[8], 0.8))

states[8].transitions['b'].append((states[9], 0.25))
states[8].transitions['b'].append((states[8], 0.75))

states[8].transitions['c'].append((states[8], 0.2))
states[8].transitions['c'].append((states[8], 0.8))

states[8].transitions['d'].append((states[8], 0.2))
states[8].transitions['d'].append((states[8], 0.8))

# 9
states[9].transitions['a'].append((states[9], 0.2))
states[9].transitions['a'].append((states[9], 0.8))

states[9].transitions['b'].append((states[9], 0.25))
states[9].transitions['b'].append((states[9], 0.75))

states[9].transitions['c'].append((states[9], 0.2))
states[9].transitions['c'].append((states[9], 0.8))

states[9].transitions['d'].append((states[9], 0.2))
states[9].transitions['d'].append((states[9], 0.8))

# 10
states[10].transitions['a'].append((states[10], 0.2))
states[10].transitions['a'].append((states[10], 0.8))

states[10].transitions['b'].append((states[11], 0.25))
states[10].transitions['b'].append((states[10], 0.75))

states[10].transitions['c'].append((states[10], 0.2))
states[10].transitions['c'].append((states[10], 0.8))

states[10].transitions['d'].append((states[10], 0.2))
states[10].transitions['d'].append((states[10], 0.8))

# 11
states[11].transitions['a'].append((states[11], 0.2))
states[11].transitions['a'].append((states[11], 0.8))

states[11].transitions['b'].append((states[11], 0.2))
states[11].transitions['b'].append((states[11], 0.8))

states[11].transitions['c'].append((states[12], 0.25))
states[11].transitions['c'].append((states[11], 0.75))

states[11].transitions['d'].append((states[11], 0.2))
states[11].transitions['d'].append((states[11], 0.8))

# 12
states[12].transitions['a'].append((states[12], 0.2))
states[12].transitions['a'].append((states[12], 0.8))

states[12].transitions['b'].append((states[12], 0.2))
states[12].transitions['b'].append((states[12], 0.8))

states[12].transitions['c'].append((states[12], 0.25))
states[12].transitions['c'].append((states[12], 0.75))

states[12].transitions['d'].append((states[12], 0.2))
states[12].transitions['d'].append((states[12], 0.8))

mdp = Mdp(states[0], states)  # , list(range(len_input))

al = mdp.get_input_alphabet()
sul = MdpSUL(mdp)

eq_oracle = RandomWordEqOracle(al, sul, num_walks=1000, min_walk_len=3, max_walk_len=6)

learned_model = run_stochastic_Lstar(al, sul, eq_oracle, automaton_type='mdp', min_rounds=60, max_rounds=100, cex_processing=None)

learned_model.visualize()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions