From f200cbc8606e403b9514fddf656331c2f4baa2a2 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Wed, 5 Nov 2025 11:14:44 +0100
Subject: [PATCH 01/20] Perms in linear functions

---
 rust/src/envs/linear_function.rs | 283 ++++++++++++++++++++++++++++++-
 1 file changed, 279 insertions(+), 4 deletions(-)
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index f04ff33..1e3fe6b 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -16,10 +16,12 @@ use pyo3::prelude::*;
 use rand::distributions::{Distribution, Uniform};
 
 use twisterl::rl::env::Env;
-use twisterl::python_interface::env::{PyBaseEnv, get_env_ref, get_env_mut};
+use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
 
+use std::collections::{HashMap, HashSet};
+
 // Define some internal representation
 #[derive(Clone)]
 pub struct LFState {
@@ -105,7 +107,9 @@ pub struct LinearFunction {
     pub difficulty: usize,
     pub gateset: Vec<Gate>,
     pub depth_slope: usize,
-    pub max_depth: usize
+    pub max_depth: usize,
+    pub obs_perms: Vec<Vec<usize>>,
+    pub act_perms: Vec<Vec<usize>>,
 }
 
 
@@ -119,7 +123,8 @@ impl LinearFunction {
     ) -> Self {
         let lf = LFState::new(num_qubits);
         let success = lf.solved();
-        LinearFunction {lf, depth:1, success, difficulty, gateset, depth_slope, max_depth }
+        let (obs_perms, act_perms) = compute_twists(num_qubits, &gateset);
+        LinearFunction {lf, depth:1, success, difficulty, gateset, depth_slope, max_depth, obs_perms, act_perms }
     }
     pub fn solved(&self) -> bool {
         self.lf.solved()
@@ -204,6 +209,10 @@ impl Env for LinearFunction {
         .filter_map(|(index, &value)| if value { Some(index) } else { None }) // Collect indices where the value is true
         .collect()    
     }
+
+    fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+        (self.obs_perms.clone(), self.act_perms.clone())
+    }
 }
 
 
@@ -224,4 +233,270 @@ impl PyLinearFunctionEnv {
         let env = Box::new(env);
         (PyLinearFunctionEnv, PyBaseEnv { env })
     }
-}
\ No newline at end of file
+}
+
+#[derive(Hash, Eq, PartialEq, Clone, Copy)]
+enum GateKind {
+    H,
+    S,
+    Sdg,
+    SX,
+    SXdg,
+    CX,
+    CZ,
+    Swap,
+}
+
+#[derive(Hash, Eq, PartialEq, Clone)]
+struct GateKey {
+    kind: GateKind,
+    qubits: Vec<usize>,
+}
+
+fn gate_kind(gate: &Gate) -> GateKind {
+    match gate {
+        Gate::H(_) => GateKind::H,
+        Gate::S(_) => GateKind::S,
+        Gate::Sdg(_) => GateKind::Sdg,
+        Gate::SX(_) => GateKind::SX,
+        Gate::SXdg(_) => GateKind::SXdg,
+        Gate::CX(_, _) => GateKind::CX,
+        Gate::CZ(_, _) => GateKind::CZ,
+        Gate::SWAP(_, _) => GateKind::Swap,
+    }
+}
+
+fn gate_qubits(gate: &Gate) -> Vec<usize> {
+    match gate {
+        Gate::H(q)
+        | Gate::S(q)
+        | Gate::Sdg(q)
+        | Gate::SX(q)
+        | Gate::SXdg(q) => vec![*q],
+        Gate::CX(q1, q2)
+        | Gate::CZ(q1, q2)
+        | Gate::SWAP(q1, q2) => vec![*q1, *q2],
+    }
+}
+
+fn canonical_key(kind: GateKind, mut qubits: Vec<usize>) -> GateKey {
+    if matches!(kind, GateKind::Swap) {
+        qubits.sort_unstable();
+    }
+    GateKey { kind, qubits }
+}
+
+fn two_qubit_targets(gate: &Gate) -> Option<(usize, usize)> {
+    match gate {
+        Gate::CX(q1, q2) | Gate::CZ(q1, q2) | Gate::SWAP(q1, q2) => Some((*q1, *q2)),
+        _ => None,
+    }
+}
+
+fn identity_obs_perm(num_qubits: usize) -> Vec<usize> {
+    let mut obs_perm = Vec::with_capacity(num_qubits * num_qubits);
+    for row in 0..num_qubits {
+        for col in 0..num_qubits {
+            obs_perm.push(row * num_qubits + col);
+        }
+    }
+    obs_perm
+}
+
+fn compute_twists(num_qubits: usize, gateset: &[Gate]) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+    if num_qubits == 0 {
+        return (Vec::new(), Vec::new());
+    }
+
+    let mut gate_index: HashMap<GateKey, usize> = HashMap::new();
+    for (idx, gate) in gateset.iter().enumerate() {
+        let kind = gate_kind(gate);
+        let qubits = gate_qubits(gate);
+        let key = canonical_key(kind, qubits);
+        gate_index.insert(key, idx);
+    }
+
+    let mut adjacency = vec![vec![false; num_qubits]; num_qubits];
+    let mut has_edge = false;
+
+    for gate in gateset {
+        if let Some((q1, q2)) = two_qubit_targets(gate) {
+            if q1 != q2 {
+                adjacency[q1][q2] = true;
+                adjacency[q2][q1] = true;
+                has_edge = true;
+            }
+        }
+    }
+
+    if !has_edge {
+        let obs_perm = identity_obs_perm(num_qubits);
+        let act_perm: Vec<usize> = (0..gateset.len()).collect();
+        return (vec![obs_perm], vec![act_perm]);
+    }
+
+    let degrees: Vec<usize> = adjacency
+        .iter()
+        .map(|row| row.iter().filter(|&&edge| edge).count())
+        .collect();
+
+    let signatures: Vec<NodeSignature> = (0..num_qubits)
+        .map(|idx| NodeSignature::new(idx, &degrees, &adjacency))
+        .collect();
+
+    let automorphisms = enumerate_automorphisms(&adjacency, &signatures);
+
+    let mut seen: HashSet<Vec<usize>> = HashSet::new();
+    let mut obs_perms: Vec<Vec<usize>> = Vec::new();
+    let mut act_perms: Vec<Vec<usize>> = Vec::new();
+
+    for mapping in automorphisms {
+        if mapping.len() != num_qubits {
+            continue;
+        }
+        if !seen.insert(mapping.clone()) {
+            continue;
+        }
+
+        let mut obs_perm = vec![0usize; num_qubits * num_qubits];
+        for row in 0..num_qubits {
+            for col in 0..num_qubits {
+                let idx_old = row * num_qubits + col;
+                obs_perm[idx_old] = mapping[row] * num_qubits + mapping[col];
+            }
+        }
+
+        let mut act_perm: Vec<usize> = Vec::with_capacity(gateset.len());
+        let mut valid = true;
+        for gate in gateset {
+            let kind = gate_kind(gate);
+            let mut qubits = gate_qubits(gate);
+            for q in qubits.iter_mut() {
+                *q = mapping[*q];
+            }
+            let key = canonical_key(kind, qubits);
+            if let Some(idx) = gate_index.get(&key) {
+                act_perm.push(*idx);
+            } else {
+                valid = false;
+                break;
+            }
+        }
+
+        if valid {
+            obs_perms.push(obs_perm);
+            act_perms.push(act_perm);
+        }
+    }
+
+    if obs_perms.is_empty() {
+        let obs_perm = identity_obs_perm(num_qubits);
+        let act_perm: Vec<usize> = (0..gateset.len()).collect();
+        obs_perms.push(obs_perm);
+        act_perms.push(act_perm);
+    }
+
+    (obs_perms, act_perms)
+}
+
+#[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
+struct NodeSignature {
+    degree: usize,
+    neighbor_degrees: Vec<usize>,
+}
+
+impl NodeSignature {
+    fn new(node: usize, degrees: &[usize], adjacency: &[Vec<bool>]) -> Self {
+        let mut neighbor_degrees: Vec<usize> = adjacency[node]
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, &connected)| if connected { Some(degrees[idx]) } else { None })
+            .collect();
+        neighbor_degrees.sort_unstable();
+        Self {
+            degree: degrees[node],
+            neighbor_degrees,
+        }
+    }
+}
+
+fn enumerate_automorphisms(
+    adjacency: &[Vec<bool>],
+    signatures: &[NodeSignature],
+) -> Vec<Vec<usize>> {
+    let n = adjacency.len();
+    let mut nodes_order: Vec<usize> = (0..n).collect();
+    nodes_order.sort_by(|&a, &b| signatures[a].cmp(&signatures[b]));
+
+    let mut perm = vec![usize::MAX; n];
+    let mut used = vec![false; n];
+    let mut results = Vec::new();
+
+    backtrack_automorphisms(
+        0,
+        &nodes_order,
+        signatures,
+        adjacency,
+        &mut perm,
+        &mut used,
+        &mut results,
+    );
+
+    results
+}
+
+fn backtrack_automorphisms(
+    idx: usize,
+    nodes_order: &[usize],
+    signatures: &[NodeSignature],
+    adjacency: &[Vec<bool>],
+    perm: &mut Vec<usize>,
+    used: &mut Vec<bool>,
+    results: &mut Vec<Vec<usize>>,
+) {
+    if idx == nodes_order.len() {
+        results.push(perm.clone());
+        return;
+    }
+
+    let node_from = nodes_order[idx];
+    let target_signature = &signatures[node_from];
+    let n = adjacency.len();
+
+    for node_to in 0..n {
+        if used[node_to] || &signatures[node_to] != target_signature {
+            continue;
+        }
+
+        let mut consistent = true;
+        for prev_idx in 0..idx {
+            let prev_from = nodes_order[prev_idx];
+            let prev_to = perm[prev_from];
+            if prev_to == usize::MAX {
+                continue;
+            }
+            if adjacency[node_from][prev_from] != adjacency[node_to][prev_to] {
+                consistent = false;
+                break;
+            }
+        }
+
+        if !consistent {
+            continue;
+        }
+
+        perm[node_from] = node_to;
+        used[node_to] = true;
+        backtrack_automorphisms(
+            idx + 1,
+            nodes_order,
+            signatures,
+            adjacency,
+            perm,
+            used,
+            results,
+        );
+        used[node_to] = false;
+        perm[node_from] = usize::MAX;
+    }
+}

From 8634e230b9f8cfef2dfc4a972a469d545a76bd08 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Wed, 5 Nov 2025 12:18:07 +0100
Subject: [PATCH 02/20] Perms in cliffords and permutations

---
 rust/src/envs/clifford.rs        |  14 +-
 rust/src/envs/linear_function.rs | 270 +------------------------------
 rust/src/envs/mod.rs             |   3 +-
 rust/src/envs/permutation.rs     |  27 +++-
 4 files changed, 38 insertions(+), 276 deletions(-)

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index dbc74fb..0b44296 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -16,9 +16,10 @@ use pyo3::prelude::*;
 use rand::distributions::{Distribution, Uniform};
 
 use twisterl::rl::env::Env;
-use twisterl::python_interface::env::{PyBaseEnv, get_env_ref, get_env_mut};
+use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
+use crate::envs::symmetry::compute_twists_clifford;
 
 
 #[derive(Clone)]
@@ -153,6 +154,8 @@ pub struct Clifford {
     pub gateset: Vec<Gate>,
     pub depth_slope: usize,
     pub max_depth: usize,
+    pub obs_perms: Vec<Vec<usize>>,
+    pub act_perms: Vec<Vec<usize>>,
 }
 
 impl Clifford {
@@ -165,7 +168,8 @@ impl Clifford {
     ) -> Self {
         let cf = CFState::new(num_qubits);
         let success = cf.solved();
-        Clifford { cf, depth: 1, success, difficulty, gateset, depth_slope, max_depth }
+        let (obs_perms, act_perms) = compute_twists_clifford(num_qubits, &gateset);
+        Clifford { cf, depth: 1, success, difficulty, gateset, depth_slope, max_depth, obs_perms, act_perms }
     }
     pub fn solved(&self) -> bool { self.cf.solved() }
 }
@@ -246,6 +250,10 @@ impl Env for Clifford {
             .filter_map(|(i, &v)| if v { Some(i) } else { None })
             .collect()
     }
+
+    fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+        (self.obs_perms.clone(), self.act_perms.clone())
+    }
 }
 
 #[pyclass(name="CliffordEnv", extends=PyBaseEnv)]
@@ -265,4 +273,4 @@ impl PyCliffordEnv {
         let env = Box::new(env);
         (PyCliffordEnv, PyBaseEnv { env })
     }
-}
\ No newline at end of file
+}
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 1e3fe6b..78c368f 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -20,7 +20,7 @@ use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
 
-use std::collections::{HashMap, HashSet};
+use crate::envs::symmetry::compute_twists_square;
 
 // Define some internal representation
 #[derive(Clone)]
@@ -123,7 +123,7 @@ impl LinearFunction {
     ) -> Self {
         let lf = LFState::new(num_qubits);
         let success = lf.solved();
-        let (obs_perms, act_perms) = compute_twists(num_qubits, &gateset);
+        let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
         LinearFunction {lf, depth:1, success, difficulty, gateset, depth_slope, max_depth, obs_perms, act_perms }
     }
     pub fn solved(&self) -> bool {
@@ -234,269 +234,3 @@ impl PyLinearFunctionEnv {
         (PyLinearFunctionEnv, PyBaseEnv { env })
     }
 }
-
-#[derive(Hash, Eq, PartialEq, Clone, Copy)]
-enum GateKind {
-    H,
-    S,
-    Sdg,
-    SX,
-    SXdg,
-    CX,
-    CZ,
-    Swap,
-}
-
-#[derive(Hash, Eq, PartialEq, Clone)]
-struct GateKey {
-    kind: GateKind,
-    qubits: Vec<usize>,
-}
-
-fn gate_kind(gate: &Gate) -> GateKind {
-    match gate {
-        Gate::H(_) => GateKind::H,
-        Gate::S(_) => GateKind::S,
-        Gate::Sdg(_) => GateKind::Sdg,
-        Gate::SX(_) => GateKind::SX,
-        Gate::SXdg(_) => GateKind::SXdg,
-        Gate::CX(_, _) => GateKind::CX,
-        Gate::CZ(_, _) => GateKind::CZ,
-        Gate::SWAP(_, _) => GateKind::Swap,
-    }
-}
-
-fn gate_qubits(gate: &Gate) -> Vec<usize> {
-    match gate {
-        Gate::H(q)
-        | Gate::S(q)
-        | Gate::Sdg(q)
-        | Gate::SX(q)
-        | Gate::SXdg(q) => vec![*q],
-        Gate::CX(q1, q2)
-        | Gate::CZ(q1, q2)
-        | Gate::SWAP(q1, q2) => vec![*q1, *q2],
-    }
-}
-
-fn canonical_key(kind: GateKind, mut qubits: Vec<usize>) -> GateKey {
-    if matches!(kind, GateKind::Swap) {
-        qubits.sort_unstable();
-    }
-    GateKey { kind, qubits }
-}
-
-fn two_qubit_targets(gate: &Gate) -> Option<(usize, usize)> {
-    match gate {
-        Gate::CX(q1, q2) | Gate::CZ(q1, q2) | Gate::SWAP(q1, q2) => Some((*q1, *q2)),
-        _ => None,
-    }
-}
-
-fn identity_obs_perm(num_qubits: usize) -> Vec<usize> {
-    let mut obs_perm = Vec::with_capacity(num_qubits * num_qubits);
-    for row in 0..num_qubits {
-        for col in 0..num_qubits {
-            obs_perm.push(row * num_qubits + col);
-        }
-    }
-    obs_perm
-}
-
-fn compute_twists(num_qubits: usize, gateset: &[Gate]) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
-    if num_qubits == 0 {
-        return (Vec::new(), Vec::new());
-    }
-
-    let mut gate_index: HashMap<GateKey, usize> = HashMap::new();
-    for (idx, gate) in gateset.iter().enumerate() {
-        let kind = gate_kind(gate);
-        let qubits = gate_qubits(gate);
-        let key = canonical_key(kind, qubits);
-        gate_index.insert(key, idx);
-    }
-
-    let mut adjacency = vec![vec![false; num_qubits]; num_qubits];
-    let mut has_edge = false;
-
-    for gate in gateset {
-        if let Some((q1, q2)) = two_qubit_targets(gate) {
-            if q1 != q2 {
-                adjacency[q1][q2] = true;
-                adjacency[q2][q1] = true;
-                has_edge = true;
-            }
-        }
-    }
-
-    if !has_edge {
-        let obs_perm = identity_obs_perm(num_qubits);
-        let act_perm: Vec<usize> = (0..gateset.len()).collect();
-        return (vec![obs_perm], vec![act_perm]);
-    }
-
-    let degrees: Vec<usize> = adjacency
-        .iter()
-        .map(|row| row.iter().filter(|&&edge| edge).count())
-        .collect();
-
-    let signatures: Vec<NodeSignature> = (0..num_qubits)
-        .map(|idx| NodeSignature::new(idx, &degrees, &adjacency))
-        .collect();
-
-    let automorphisms = enumerate_automorphisms(&adjacency, &signatures);
-
-    let mut seen: HashSet<Vec<usize>> = HashSet::new();
-    let mut obs_perms: Vec<Vec<usize>> = Vec::new();
-    let mut act_perms: Vec<Vec<usize>> = Vec::new();
-
-    for mapping in automorphisms {
-        if mapping.len() != num_qubits {
-            continue;
-        }
-        if !seen.insert(mapping.clone()) {
-            continue;
-        }
-
-        let mut obs_perm = vec![0usize; num_qubits * num_qubits];
-        for row in 0..num_qubits {
-            for col in 0..num_qubits {
-                let idx_old = row * num_qubits + col;
-                obs_perm[idx_old] = mapping[row] * num_qubits + mapping[col];
-            }
-        }
-
-        let mut act_perm: Vec<usize> = Vec::with_capacity(gateset.len());
-        let mut valid = true;
-        for gate in gateset {
-            let kind = gate_kind(gate);
-            let mut qubits = gate_qubits(gate);
-            for q in qubits.iter_mut() {
-                *q = mapping[*q];
-            }
-            let key = canonical_key(kind, qubits);
-            if let Some(idx) = gate_index.get(&key) {
-                act_perm.push(*idx);
-            } else {
-                valid = false;
-                break;
-            }
-        }
-
-        if valid {
-            obs_perms.push(obs_perm);
-            act_perms.push(act_perm);
-        }
-    }
-
-    if obs_perms.is_empty() {
-        let obs_perm = identity_obs_perm(num_qubits);
-        let act_perm: Vec<usize> = (0..gateset.len()).collect();
-        obs_perms.push(obs_perm);
-        act_perms.push(act_perm);
-    }
-
-    (obs_perms, act_perms)
-}
-
-#[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
-struct NodeSignature {
-    degree: usize,
-    neighbor_degrees: Vec<usize>,
-}
-
-impl NodeSignature {
-    fn new(node: usize, degrees: &[usize], adjacency: &[Vec<bool>]) -> Self {
-        let mut neighbor_degrees: Vec<usize> = adjacency[node]
-            .iter()
-            .enumerate()
-            .filter_map(|(idx, &connected)| if connected { Some(degrees[idx]) } else { None })
-            .collect();
-        neighbor_degrees.sort_unstable();
-        Self {
-            degree: degrees[node],
-            neighbor_degrees,
-        }
-    }
-}
-
-fn enumerate_automorphisms(
-    adjacency: &[Vec<bool>],
-    signatures: &[NodeSignature],
-) -> Vec<Vec<usize>> {
-    let n = adjacency.len();
-    let mut nodes_order: Vec<usize> = (0..n).collect();
-    nodes_order.sort_by(|&a, &b| signatures[a].cmp(&signatures[b]));
-
-    let mut perm = vec![usize::MAX; n];
-    let mut used = vec![false; n];
-    let mut results = Vec::new();
-
-    backtrack_automorphisms(
-        0,
-        &nodes_order,
-        signatures,
-        adjacency,
-        &mut perm,
-        &mut used,
-        &mut results,
-    );
-
-    results
-}
-
-fn backtrack_automorphisms(
-    idx: usize,
-    nodes_order: &[usize],
-    signatures: &[NodeSignature],
-    adjacency: &[Vec<bool>],
-    perm: &mut Vec<usize>,
-    used: &mut Vec<bool>,
-    results: &mut Vec<Vec<usize>>,
-) {
-    if idx == nodes_order.len() {
-        results.push(perm.clone());
-        return;
-    }
-
-    let node_from = nodes_order[idx];
-    let target_signature = &signatures[node_from];
-    let n = adjacency.len();
-
-    for node_to in 0..n {
-        if used[node_to] || &signatures[node_to] != target_signature {
-            continue;
-        }
-
-        let mut consistent = true;
-        for prev_idx in 0..idx {
-            let prev_from = nodes_order[prev_idx];
-            let prev_to = perm[prev_from];
-            if prev_to == usize::MAX {
-                continue;
-            }
-            if adjacency[node_from][prev_from] != adjacency[node_to][prev_to] {
-                consistent = false;
-                break;
-            }
-        }
-
-        if !consistent {
-            continue;
-        }
-
-        perm[node_from] = node_to;
-        used[node_to] = true;
-        backtrack_automorphisms(
-            idx + 1,
-            nodes_order,
-            signatures,
-            adjacency,
-            perm,
-            used,
-            results,
-        );
-        used[node_to] = false;
-        perm[node_from] = usize::MAX;
-    }
-}
diff --git a/rust/src/envs/mod.rs b/rust/src/envs/mod.rs
index 0218394..7beab6a 100644
--- a/rust/src/envs/mod.rs
+++ b/rust/src/envs/mod.rs
@@ -14,4 +14,5 @@ that they have been altered from the originals.
 pub mod clifford;
 pub mod linear_function;
 pub mod permutation;
-pub mod common;
\ No newline at end of file
+pub mod common;
+pub mod symmetry;
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index 5f58ef5..0771b5d 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -16,9 +16,10 @@ use pyo3::prelude::*;
 use rand::distributions::{Distribution, Uniform};
 
 use twisterl::rl::env::Env;
-use twisterl::python_interface::env::{PyBaseEnv, get_env_ref, get_env_mut};
+use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
+use crate::envs::symmetry::compute_twists_square;
 
 
 // This is the Env definition
@@ -32,7 +33,9 @@ pub struct Permutation {
     pub difficulty: usize,
     pub gateset: Vec<Gate>,
     pub depth_slope: usize,
-    pub max_depth: usize
+    pub max_depth: usize,
+    pub obs_perms: Vec<Vec<usize>>,
+    pub act_perms: Vec<Vec<usize>>,
 }
 
 
@@ -44,7 +47,19 @@ impl Permutation {
         depth_slope: usize,
         max_depth: usize,
     ) -> Self {
-        Permutation {state:(0..num_qubits).collect(), depth:1, success:true, num_qubits:num_qubits, difficulty:difficulty, gateset:gateset, depth_slope:depth_slope, max_depth:max_depth}
+        let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
+        Permutation {
+            state:(0..num_qubits).collect(),
+            depth:1,
+            success:true,
+            num_qubits,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            obs_perms,
+            act_perms,
+        }
     }
 
     pub fn solved(&self) -> bool {
@@ -132,6 +147,10 @@ impl Env for Permutation {
     fn observe(&self,) -> Vec<usize> {
         self.state.iter().enumerate().map(|(i, v)| i * self.num_qubits + v ).collect()  
     }
+
+    fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+        (self.obs_perms.clone(), self.act_perms.clone())
+    }
 }
 
 
@@ -152,4 +171,4 @@ impl PyPermutationEnv {
         let env = Box::new(env);
         (PyPermutationEnv, PyBaseEnv { env })
     }
-}
\ No newline at end of file
+}

From f16f26edc6b2ce9781b47a9226d6f3782f3a296a Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Wed, 5 Nov 2025 12:35:01 +0100
Subject: [PATCH 03/20] Added missing file

---
 rust/src/envs/symmetry.rs | 328 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 328 insertions(+)
 create mode 100644 rust/src/envs/symmetry.rs

diff --git a/rust/src/envs/symmetry.rs b/rust/src/envs/symmetry.rs
new file mode 100644
index 0000000..d32ab27
--- /dev/null
+++ b/rust/src/envs/symmetry.rs
@@ -0,0 +1,328 @@
+// -*- coding: utf-8 -*-
+/* 
+(C) Copyright 2025 IBM. All Rights Reserved.
+
+This code is licensed under the Apache License, Version 2.0. You may
+obtain a copy of this license in the LICENSE.txt file in the root directory
+of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+
+Any modifications or derivative works of this code must retain this
+copyright notice, and modified files need to carry a notice indicating
+that they have been altered from the originals.
+*/
+
+use std::collections::{HashMap, HashSet};
+
+use crate::envs::common::Gate;
+
+#[derive(Hash, Eq, PartialEq, Clone, Copy)]
+enum GateKind {
+    H,
+    S,
+    Sdg,
+    SX,
+    SXdg,
+    CX,
+    CZ,
+    Swap,
+}
+
+#[derive(Hash, Eq, PartialEq, Clone)]
+struct GateKey {
+    kind: GateKind,
+    qubits: Vec<usize>,
+}
+
+fn gate_kind(gate: &Gate) -> GateKind {
+    match gate {
+        Gate::H(_) => GateKind::H,
+        Gate::S(_) => GateKind::S,
+        Gate::Sdg(_) => GateKind::Sdg,
+        Gate::SX(_) => GateKind::SX,
+        Gate::SXdg(_) => GateKind::SXdg,
+        Gate::CX(_, _) => GateKind::CX,
+        Gate::CZ(_, _) => GateKind::CZ,
+        Gate::SWAP(_, _) => GateKind::Swap,
+    }
+}
+
+fn gate_qubits(gate: &Gate) -> Vec<usize> {
+    match gate {
+        Gate::H(q)
+        | Gate::S(q)
+        | Gate::Sdg(q)
+        | Gate::SX(q)
+        | Gate::SXdg(q) => vec![*q],
+        Gate::CX(q1, q2)
+        | Gate::CZ(q1, q2)
+        | Gate::SWAP(q1, q2) => vec![*q1, *q2],
+    }
+}
+
+fn canonical_key(kind: GateKind, mut qubits: Vec<usize>) -> GateKey {
+    if matches!(kind, GateKind::Swap) {
+        qubits.sort_unstable();
+    }
+    GateKey { kind, qubits }
+}
+
+fn two_qubit_targets(gate: &Gate) -> Option<(usize, usize)> {
+    match gate {
+        Gate::CX(q1, q2) | Gate::CZ(q1, q2) | Gate::SWAP(q1, q2) => Some((*q1, *q2)),
+        _ => None,
+    }
+}
+
+fn identity_perm(num_qubits: usize) -> Vec<usize> {
+    (0..num_qubits).collect()
+}
+
+#[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
+struct NodeSignature {
+    degree: usize,
+    neighbor_degrees: Vec<usize>,
+}
+
+impl NodeSignature {
+    fn new(node: usize, degrees: &[usize], adjacency: &[Vec<bool>]) -> Self {
+        let mut neighbor_degrees: Vec<usize> = adjacency[node]
+            .iter()
+            .enumerate()
+            .filter_map(|(idx, &connected)| if connected { Some(degrees[idx]) } else { None })
+            .collect();
+        neighbor_degrees.sort_unstable();
+        Self {
+            degree: degrees[node],
+            neighbor_degrees,
+        }
+    }
+}
+
+fn enumerate_automorphisms(
+    adjacency: &[Vec<bool>],
+    has_edge: bool,
+    signatures: &[NodeSignature],
+) -> Vec<Vec<usize>> {
+    let n = adjacency.len();
+    if n == 0 {
+        return vec![Vec::new()];
+    }
+
+    if !has_edge {
+        return vec![identity_perm(n)];
+    }
+
+    let mut nodes_order: Vec<usize> = (0..n).collect();
+    nodes_order.sort_by(|&a, &b| signatures[a].cmp(&signatures[b]));
+
+    let mut perm = vec![usize::MAX; n];
+    let mut used = vec![false; n];
+    let mut results = Vec::new();
+
+    backtrack_automorphisms(
+        0,
+        &nodes_order,
+        signatures,
+        adjacency,
+        &mut perm,
+        &mut used,
+        &mut results,
+    );
+
+    if results.is_empty() {
+        results.push(identity_perm(n));
+    }
+
+    results
+}
+
+fn backtrack_automorphisms(
+    idx: usize,
+    nodes_order: &[usize],
+    signatures: &[NodeSignature],
+    adjacency: &[Vec<bool>],
+    perm: &mut Vec<usize>,
+    used: &mut Vec<bool>,
+    results: &mut Vec<Vec<usize>>,
+) {
+    if idx == nodes_order.len() {
+        results.push(perm.clone());
+        return;
+    }
+
+    let node_from = nodes_order[idx];
+    let target_signature = &signatures[node_from];
+    let n = adjacency.len();
+
+    for node_to in 0..n {
+        if used[node_to] || &signatures[node_to] != target_signature {
+            continue;
+        }
+
+        let mut consistent = true;
+        for prev_idx in 0..idx {
+            let prev_from = nodes_order[prev_idx];
+            let prev_to = perm[prev_from];
+            if prev_to == usize::MAX {
+                continue;
+            }
+            if adjacency[node_from][prev_from] != adjacency[node_to][prev_to] {
+                consistent = false;
+                break;
+            }
+        }
+
+        if !consistent {
+            continue;
+        }
+
+        perm[node_from] = node_to;
+        used[node_to] = true;
+        backtrack_automorphisms(
+            idx + 1,
+            nodes_order,
+            signatures,
+            adjacency,
+            perm,
+            used,
+            results,
+        );
+        used[node_to] = false;
+        perm[node_from] = usize::MAX;
+    }
+}
+
+fn build_action_perm(
+    gateset: &[Gate],
+    gate_index: &HashMap<GateKey, usize>,
+    perm: &[usize],
+) -> Option<Vec<usize>> {
+    let mut act_perm: Vec<usize> = Vec::with_capacity(gateset.len());
+
+    for gate in gateset {
+        let kind = gate_kind(gate);
+        let mut qubits = gate_qubits(gate);
+        for q in qubits.iter_mut() {
+            if *q >= perm.len() {
+                return None;
+            }
+            *q = perm[*q];
+        }
+        let key = canonical_key(kind, qubits);
+        if let Some(idx) = gate_index.get(&key) {
+            act_perm.push(*idx);
+        } else {
+            return None;
+        }
+    }
+
+    Some(act_perm)
+}
+
+fn compute_twists_with_builder<F>(
+    num_qubits: usize,
+    gateset: &[Gate],
+    mut build_obs_perm: F,
+) -> (Vec<Vec<usize>>, Vec<Vec<usize>>)
+where
+    F: FnMut(&[usize]) -> Vec<usize>,
+{
+    if num_qubits == 0 {
+        return (Vec::new(), Vec::new());
+    }
+
+    let mut gate_index: HashMap<GateKey, usize> = HashMap::new();
+    for (idx, gate) in gateset.iter().enumerate() {
+        let kind = gate_kind(gate);
+        let qubits = gate_qubits(gate);
+        let key = canonical_key(kind, qubits);
+        gate_index.insert(key, idx);
+    }
+
+    let mut adjacency = vec![vec![false; num_qubits]; num_qubits];
+    let mut has_edge = false;
+
+    for gate in gateset {
+        if let Some((q1, q2)) = two_qubit_targets(gate) {
+            if q1 != q2 {
+                adjacency[q1][q2] = true;
+                adjacency[q2][q1] = true;
+                has_edge = true;
+            }
+        }
+    }
+
+    let degrees: Vec<usize> = adjacency
+        .iter()
+        .map(|row| row.iter().filter(|&&edge| edge).count())
+        .collect();
+    let signatures: Vec<NodeSignature> = (0..num_qubits)
+        .map(|idx| NodeSignature::new(idx, &degrees, &adjacency))
+        .collect();
+
+    let automorphisms = enumerate_automorphisms(&adjacency, has_edge, &signatures);
+
+    let mut seen: HashSet<Vec<usize>> = HashSet::new();
+    let mut obs_perms: Vec<Vec<usize>> = Vec::new();
+    let mut act_perms: Vec<Vec<usize>> = Vec::new();
+
+    for perm in automorphisms {
+        if !seen.insert(perm.clone()) {
+            continue;
+        }
+        if let Some(act_perm) = build_action_perm(gateset, &gate_index, &perm) {
+            obs_perms.push(build_obs_perm(&perm));
+            act_perms.push(act_perm);
+        }
+    }
+
+    if obs_perms.is_empty() {
+        let identity = identity_perm(num_qubits);
+        if let Some(act_perm) = build_action_perm(gateset, &gate_index, &identity) {
+            obs_perms.push(build_obs_perm(&identity));
+            act_perms.push(act_perm);
+        }
+    }
+
+    (obs_perms, act_perms)
+}
+
+fn obs_perm_square(num_qubits: usize, perm: &[usize]) -> Vec<usize> {
+    let mut obs_perm = vec![0usize; num_qubits * num_qubits];
+    for row in 0..num_qubits {
+        for col in 0..num_qubits {
+            let idx_old = row * num_qubits + col;
+            obs_perm[idx_old] = perm[row] * num_qubits + perm[col];
+        }
+    }
+    obs_perm
+}
+
+fn obs_perm_clifford(num_qubits: usize, perm: &[usize]) -> Vec<usize> {
+    let dim = 2 * num_qubits;
+    let mut obs_perm = vec![0usize; dim * dim];
+    for row in 0..dim {
+        let mapped_row = if row < num_qubits {
+            perm[row]
+        } else {
+            num_qubits + perm[row - num_qubits]
+        };
+        for col in 0..dim {
+            let mapped_col = if col < num_qubits {
+                perm[col]
+            } else {
+                num_qubits + perm[col - num_qubits]
+            };
+            obs_perm[row * dim + col] = mapped_row * dim + mapped_col;
+        }
+    }
+    obs_perm
+}
+
+pub fn compute_twists_square(num_qubits: usize, gateset: &[Gate]) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+    compute_twists_with_builder(num_qubits, gateset, |perm| obs_perm_square(num_qubits, perm))
+}
+
+pub fn compute_twists_clifford(num_qubits: usize, gateset: &[Gate]) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
+    compute_twists_with_builder(num_qubits, gateset, |perm| obs_perm_clifford(num_qubits, perm))
+}

From 763c44fbdfc174a9f069ac59b8562fa8ebb30261 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Thu, 6 Nov 2025 12:51:06 +0100
Subject: [PATCH 04/20] Add gate metrics to lf

---
 rust/Cargo.lock                  |   6 +-
 rust/Cargo.toml                  |   2 +-
 rust/src/envs/clifford.rs        |   4 +
 rust/src/envs/linear_function.rs | 262 +++++++++++++++++++++++++++++--
 rust/src/envs/permutation.rs     |   4 +
 5 files changed, 260 insertions(+), 18 deletions(-)

diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index f725571..187e4c2 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -351,7 +351,7 @@ dependencies = [
  "pyo3",
  "rand",
  "rayon",
- "twisterl-rs",
+ "twisterl",
 ]
 
 [[package]]
@@ -465,10 +465,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
-name = "twisterl-rs"
+name = "twisterl"
 version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25a14d819575b07f13110e7ebfe96f7be712a567bb0126e22a624aaf30fda9e3"
 dependencies = [
  "anyhow",
  "dyn-clone",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index a283d00..8350317 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -14,7 +14,7 @@ nalgebra = "0.33.0"
 rand = "0.8.4"
 rayon = "1.1.0"
 petgraph = "0.6.5"
-twisterl = {package = "twisterl-rs", version = "0.1.0", features = ["python_bindings"]}
+twisterl = {path = "../../../rust/", features = ["python_bindings"]}
 
 [profile.release]
 opt-level = 3
diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 0b44296..83e9d00 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -242,6 +242,10 @@ impl Env for Clifford {
         }
     }
 
+    fn success(&self) -> bool {
+        self.success
+    }
+
     fn observe(&self) -> Vec<usize> {
         self.cf
             .data
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 78c368f..50ecfde 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -21,6 +21,7 @@ use twisterl::python_interface::env::PyBaseEnv;
 use crate::envs::common::Gate;
 
 use crate::envs::symmetry::compute_twists_square;
+use std::collections::{HashMap, HashSet};
 
 // Define some internal representation
 #[derive(Clone)]
@@ -110,6 +111,10 @@ pub struct LinearFunction {
     pub max_depth: usize,
     pub obs_perms: Vec<Vec<usize>>,
     pub act_perms: Vec<Vec<usize>>,
+    metrics: MetricsTracker,
+    metrics_values: MetricsCounts,
+    metrics_weights: MetricsWeights,
+    reward_value: f32,
 }
 
 
@@ -120,11 +125,28 @@ impl LinearFunction {
         gateset: Vec<Gate>,
         depth_slope: usize,
         max_depth: usize,
+        metrics_weights: MetricsWeights,
     ) -> Self {
         let lf = LFState::new(num_qubits);
         let success = lf.solved();
         let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
-        LinearFunction {lf, depth:1, success, difficulty, gateset, depth_slope, max_depth, obs_perms, act_perms }
+        let metrics = MetricsTracker::new(num_qubits);
+        let metrics_values = metrics.snapshot();
+        LinearFunction {
+            lf,
+            depth: 1,
+            success,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            obs_perms,
+            act_perms,
+            metrics,
+            metrics_values,
+            metrics_weights,
+            reward_value: if success { 1.0 } else { 0.0 },
+        }
     }
     pub fn solved(&self) -> bool {
         self.lf.solved()
@@ -157,6 +179,9 @@ impl Env for LinearFunction {
         self.lf.data = state.iter().map(|&x| x>0).collect();
         self.depth = self.max_depth;
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        //self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn reset(&mut self) {
@@ -164,6 +189,9 @@ impl Env for LinearFunction {
         self.lf = LFState::new(self.lf.size);
         self.depth = self.max_depth;
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
 
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
@@ -175,16 +203,33 @@ impl Env for LinearFunction {
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn step(&mut self, action: usize)  {
-        match self.gateset[action] {
-            Gate::CX(q1, q2) => self.lf.cx(q1, q2),
-            Gate::SWAP(q1, q2) => self.lf.swap(q1, q2),
-            _ => {}
-        }        
+        let mut penalty = 0.0f32;
+
+        if action < self.gateset.len() {
+            let gate = &self.gateset[action];
+            let previous = self.metrics_values.clone();
+            self.metrics.apply_gate(gate);
+            let new_metrics = self.metrics.snapshot();
+            penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
+            self.metrics_values = new_metrics;
+
+            match gate {
+                &Gate::CX(q1, q2) => self.lf.cx(q1, q2),
+                &Gate::SWAP(q1, q2) => self.lf.swap(q1, q2),
+                _ => {}
+            }
+        }
+
         self.depth = self.depth.saturating_sub(1); // Prevent underflow
         self.success = self.solved();
+        let achieved = if self.success { 1.0 } else { 0.0 };
+        self.reward_value = achieved - penalty;
     }
     
     fn masks(&self) -> Vec<bool> {
@@ -196,11 +241,11 @@ impl Env for LinearFunction {
     }
 
     fn reward(&self) -> f32 {
-        if self.success {
-            1.0
-        } else {
-            if self.depth == 0 { -0.5 } else { -0.5/(self.max_depth as f32) }
-        }
+        self.reward_value
+    }
+
+    fn success(&self) -> bool {
+        self.success
     }
 
     fn observe(&self,) -> Vec<usize> {
@@ -215,6 +260,188 @@ impl Env for LinearFunction {
     }
 }
 
+#[derive(Clone)]
+struct MetricsTracker {
+    num_qubits: usize,
+    n_cnots: usize,
+    n_gates: usize,
+    cnot_layers: HashSet<usize>,
+    layers: HashSet<usize>,
+    last_gates: Vec<isize>,
+    last_cxs: Vec<isize>,
+}
+
+impl MetricsTracker {
+    fn new(num_qubits: usize) -> Self {
+        Self {
+            num_qubits,
+            n_cnots: 0,
+            n_gates: 0,
+            cnot_layers: HashSet::new(),
+            layers: HashSet::new(),
+            last_gates: vec![-1; num_qubits],
+            last_cxs: vec![-1; num_qubits],
+        }
+    }
+
+    fn reset(&mut self) {
+        self.n_cnots = 0;
+        self.n_gates = 0;
+        self.cnot_layers.clear();
+        self.layers.clear();
+        for val in self.last_gates.iter_mut() {
+            *val = -1;
+        }
+        for val in self.last_cxs.iter_mut() {
+            *val = -1;
+        }
+    }
+
+    fn snapshot(&self) -> MetricsCounts {
+        MetricsCounts {
+            n_cnots: self.n_cnots,
+            n_layers_cnots: self.cnot_layers.len(),
+            n_layers: self.layers.len(),
+            n_gates: self.n_gates,
+        }
+    }
+
+    fn apply_gate(&mut self, gate: &Gate) {
+        match gate {
+            Gate::CX(c, t) => self.cx(*c, *t),
+            Gate::SWAP(c, t) => {
+                self.cx(*c, *t);
+                self.cx(*t, *c);
+                self.cx(*c, *t);
+            }
+            Gate::CZ(c, t) => {
+                self.single_qubit(*t);
+                self.cx(*c, *t);
+                self.single_qubit(*t);
+            }
+            Gate::H(q) | Gate::S(q) | Gate::Sdg(q) | Gate::SX(q) | Gate::SXdg(q) => {
+                self.single_qubit(*q);
+            }
+        }
+    }
+
+    fn single_qubit(&mut self, q: usize) {
+        if q >= self.num_qubits {
+            return;
+        }
+        self.n_gates += 1;
+        let gate_layer = self.last_gates[q] + 1;
+        self.last_gates[q] = gate_layer;
+        if gate_layer >= 0 {
+            self.layers.insert(gate_layer as usize);
+        }
+    }
+
+    fn cx(&mut self, control: usize, target: usize) {
+        if control >= self.num_qubits || target >= self.num_qubits {
+            return;
+        }
+        self.n_cnots += 1;
+        self.n_gates += 1;
+
+        let gate_layer = (self.last_gates[control].max(self.last_gates[target])) + 1;
+        self.last_gates[control] = gate_layer;
+        self.last_gates[target] = gate_layer;
+
+        if gate_layer >= 0 {
+            self.layers.insert(gate_layer as usize);
+        }
+
+        let cx_layer = (self.last_cxs[control].max(self.last_cxs[target])) + 1;
+        self.last_cxs[control] = cx_layer;
+        self.last_cxs[target] = cx_layer;
+
+        if cx_layer >= 0 {
+            self.cnot_layers.insert(cx_layer as usize);
+        }
+    }
+}
+
+#[derive(Clone)]
+struct MetricsCounts {
+    n_cnots: usize,
+    n_layers_cnots: usize,
+    n_layers: usize,
+    n_gates: usize,
+}
+
+impl MetricsCounts {
+    fn weighted_delta(&self, previous: &Self, weights: &MetricsWeights) -> f32 {
+        let delta_cnots = self.n_cnots.saturating_sub(previous.n_cnots) as f32;
+        let delta_layers_cnots =
+            self.n_layers_cnots.saturating_sub(previous.n_layers_cnots) as f32;
+        let delta_layers = self.n_layers.saturating_sub(previous.n_layers) as f32;
+        let delta_gates = self.n_gates.saturating_sub(previous.n_gates) as f32;
+
+        weights.n_cnots * delta_cnots
+            + weights.n_layers_cnots * delta_layers_cnots
+            + weights.n_layers * delta_layers
+            + weights.n_gates * delta_gates
+    }
+}
+
+#[derive(Clone)]
+pub struct MetricsWeights {
+    n_cnots: f32,
+    n_layers_cnots: f32,
+    n_layers: f32,
+    n_gates: f32,
+}
+
+impl Default for MetricsWeights {
+    fn default() -> Self {
+        Self {
+            n_cnots: 0.01,
+            n_layers_cnots: 0.0,
+            n_layers: 0.0,
+            n_gates: 0.0001,
+        }
+    }
+}
+
+impl MetricsWeights {
+    fn from_hashmap(map: Option<HashMap<String, f32>>) -> Self {
+        let mut weights = Self::default();
+        if let Some(values) = map {
+            for (key, value) in values {
+                match key.as_str() {
+                    "n_cnots" => weights.n_cnots = value,
+                    "n_layers_cnots" => weights.n_layers_cnots = value,
+                    "n_layers" => weights.n_layers = value,
+                    "n_gates" => weights.n_gates = value,
+                    _ => {}
+                }
+            }
+        }
+        weights
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cx_gate_is_self_inverse() {
+        let gateset = vec![Gate::CX(0, 1)];
+        let metrics_weights = MetricsWeights::default();
+        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights);
+        env.depth = env.max_depth;
+
+        env.step(0);
+        assert!(!env.solved());
+
+        env.step(0);
+        assert!(env.solved());
+        assert!(env.reward() <= 1.0);
+    }
+}
+
 
 #[pyclass(name="LinearFunctionEnv", extends=PyBaseEnv)]
 pub struct PyLinearFunctionEnv;
@@ -227,9 +454,18 @@ impl PyLinearFunctionEnv {
         difficulty: usize,
         gateset: Vec<Gate>,
         depth_slope: usize,
-        max_depth: usize
+        max_depth: usize,
+        metrics_weights: Option<HashMap<String, f32>>
     ) -> (Self, PyBaseEnv) {
-        let env = LinearFunction::new(num_qubits, difficulty, gateset, depth_slope, max_depth);
+        let weights = MetricsWeights::from_hashmap(metrics_weights);
+        let env = LinearFunction::new(
+            num_qubits,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            weights,
+        );
         let env = Box::new(env);
         (PyLinearFunctionEnv, PyBaseEnv { env })
     }
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index 0771b5d..e29fd4f 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -144,6 +144,10 @@ impl Env for Permutation {
         }
     }
 
+    fn success(&self) -> bool {
+        self.success
+    }
+    
     fn observe(&self,) -> Vec<usize> {
         self.state.iter().enumerate().map(|(i, v)| i * self.num_qubits + v ).collect()  
     }

From 6f91d95e551463b7ea7ee2a9fa34a5a285df2aab Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Tue, 11 Nov 2025 13:55:29 +0100
Subject: [PATCH 05/20] Metrics in cliffords and permutations

---
 rust/src/envs/clifford.rs        |  82 ++++++++++----
 rust/src/envs/linear_function.rs | 165 +--------------------------
 rust/src/envs/metrics.rs         | 184 +++++++++++++++++++++++++++++++
 rust/src/envs/mod.rs             |   1 +
 rust/src/envs/permutation.rs     |  60 +++++++---
 5 files changed, 296 insertions(+), 196 deletions(-)
 create mode 100644 rust/src/envs/metrics.rs

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 83e9d00..c43e741 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -19,7 +19,9 @@ use twisterl::rl::env::Env;
 use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
+use crate::envs::metrics::{MetricsCounts, MetricsTracker, MetricsWeights};
 use crate::envs::symmetry::compute_twists_clifford;
+use std::collections::HashMap;
 
 
 #[derive(Clone)]
@@ -156,6 +158,10 @@ pub struct Clifford {
     pub max_depth: usize,
     pub obs_perms: Vec<Vec<usize>>,
     pub act_perms: Vec<Vec<usize>>,
+    metrics: MetricsTracker,
+    metrics_values: MetricsCounts,
+    metrics_weights: MetricsWeights,
+    reward_value: f32,
 }
 
 impl Clifford {
@@ -165,11 +171,28 @@ impl Clifford {
         gateset: Vec<Gate>,
         depth_slope: usize,
         max_depth: usize,
+        metrics_weights: MetricsWeights,
     ) -> Self {
         let cf = CFState::new(num_qubits);
         let success = cf.solved();
         let (obs_perms, act_perms) = compute_twists_clifford(num_qubits, &gateset);
-        Clifford { cf, depth: 1, success, difficulty, gateset, depth_slope, max_depth, obs_perms, act_perms }
+        let metrics = MetricsTracker::new(num_qubits);
+        let metrics_values = metrics.snapshot();
+        Clifford {
+            cf,
+            depth: 1,
+            success,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            obs_perms,
+            act_perms,
+            metrics,
+            metrics_values,
+            metrics_weights,
+            reward_value: if success { 1.0 } else { 0.0 },
+        }
     }
     pub fn solved(&self) -> bool { self.cf.solved() }
 }
@@ -193,12 +216,18 @@ impl Env for Clifford {
         self.cf.data = state.iter().map(|&x| x > 0).collect();
         self.depth = self.max_depth;
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn reset(&mut self) {
         self.cf = CFState::new(self.cf.n);
         self.depth = self.max_depth;
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
 
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
@@ -209,21 +238,38 @@ impl Env for Clifford {
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn step(&mut self, action: usize) {
-        match self.gateset[action] {
-            Gate::H(q)      => self.cf.h(q),
-            Gate::S(q)      => self.cf.s(q),
-            Gate::Sdg(q)    => self.cf.sdg(q),   // identical to S modulo global phase (ignored)
-            Gate::SX(q)     => self.cf.sx(q),
-            Gate::SXdg(q)   => self.cf.sxdg(q),  // identical to SX modulo global phase (ignored)
-            Gate::CX(c, t)  => self.cf.cx(c, t),
-            Gate::CZ(a, b)  => self.cf.cz(a, b),
-            Gate::SWAP(a,b) => self.cf.swap(a, b),
+        let mut penalty = 0.0f32;
+
+        if action < self.gateset.len() {
+            let gate = &self.gateset[action];
+            let previous = self.metrics_values.clone();
+            self.metrics.apply_gate(gate);
+            let new_metrics = self.metrics.snapshot();
+            penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
+            self.metrics_values = new_metrics;
+
+            match gate {
+                Gate::H(q) => self.cf.h(*q),
+                Gate::S(q) => self.cf.s(*q),
+                Gate::Sdg(q) => self.cf.sdg(*q), // identical to S modulo global phase (ignored)
+                Gate::SX(q) => self.cf.sx(*q),
+                Gate::SXdg(q) => self.cf.sxdg(*q), // identical to SX modulo global phase (ignored)
+                Gate::CX(c, t) => self.cf.cx(*c, *t),
+                Gate::CZ(a, b) => self.cf.cz(*a, *b),
+                Gate::SWAP(a, b) => self.cf.swap(*a, *b),
+            }
         }
+
         self.depth = self.depth.saturating_sub(1);
         self.success = self.solved();
+        let achieved = if self.success { 1.0 } else { 0.0 };
+        self.reward_value = achieved - penalty;
     }
 
     fn masks(&self) -> Vec<bool> {
@@ -232,15 +278,7 @@ impl Env for Clifford {
 
     fn is_final(&self) -> bool { self.depth == 0 || self.success }
 
-    fn reward(&self) -> f32 {
-        if self.success {
-            1.0
-        } else if self.depth == 0 {
-            -0.5
-        } else {
-            -0.5 / (self.max_depth as f32)
-        }
-    }
+    fn reward(&self) -> f32 { self.reward_value }
 
     fn success(&self) -> bool {
         self.success
@@ -271,9 +309,11 @@ impl PyCliffordEnv {
         difficulty: usize,
         gateset: Vec<Gate>,
         depth_slope: usize,
-        max_depth: usize
+        max_depth: usize,
+        metrics_weights: Option<HashMap<String, f32>>,
     ) -> (Self, PyBaseEnv) {
-        let env = Clifford::new(num_qubits, difficulty, gateset, depth_slope, max_depth);
+        let weights = MetricsWeights::from_hashmap(metrics_weights);
+        let env = Clifford::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights);
         let env = Box::new(env);
         (PyCliffordEnv, PyBaseEnv { env })
     }
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 50ecfde..548bc60 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -20,8 +20,9 @@ use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
 
+use crate::envs::metrics::{MetricsCounts, MetricsTracker, MetricsWeights};
 use crate::envs::symmetry::compute_twists_square;
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 
 // Define some internal representation
 #[derive(Clone)]
@@ -260,167 +261,7 @@ impl Env for LinearFunction {
     }
 }
 
-#[derive(Clone)]
-struct MetricsTracker {
-    num_qubits: usize,
-    n_cnots: usize,
-    n_gates: usize,
-    cnot_layers: HashSet<usize>,
-    layers: HashSet<usize>,
-    last_gates: Vec<isize>,
-    last_cxs: Vec<isize>,
-}
-
-impl MetricsTracker {
-    fn new(num_qubits: usize) -> Self {
-        Self {
-            num_qubits,
-            n_cnots: 0,
-            n_gates: 0,
-            cnot_layers: HashSet::new(),
-            layers: HashSet::new(),
-            last_gates: vec![-1; num_qubits],
-            last_cxs: vec![-1; num_qubits],
-        }
-    }
-
-    fn reset(&mut self) {
-        self.n_cnots = 0;
-        self.n_gates = 0;
-        self.cnot_layers.clear();
-        self.layers.clear();
-        for val in self.last_gates.iter_mut() {
-            *val = -1;
-        }
-        for val in self.last_cxs.iter_mut() {
-            *val = -1;
-        }
-    }
-
-    fn snapshot(&self) -> MetricsCounts {
-        MetricsCounts {
-            n_cnots: self.n_cnots,
-            n_layers_cnots: self.cnot_layers.len(),
-            n_layers: self.layers.len(),
-            n_gates: self.n_gates,
-        }
-    }
-
-    fn apply_gate(&mut self, gate: &Gate) {
-        match gate {
-            Gate::CX(c, t) => self.cx(*c, *t),
-            Gate::SWAP(c, t) => {
-                self.cx(*c, *t);
-                self.cx(*t, *c);
-                self.cx(*c, *t);
-            }
-            Gate::CZ(c, t) => {
-                self.single_qubit(*t);
-                self.cx(*c, *t);
-                self.single_qubit(*t);
-            }
-            Gate::H(q) | Gate::S(q) | Gate::Sdg(q) | Gate::SX(q) | Gate::SXdg(q) => {
-                self.single_qubit(*q);
-            }
-        }
-    }
-
-    fn single_qubit(&mut self, q: usize) {
-        if q >= self.num_qubits {
-            return;
-        }
-        self.n_gates += 1;
-        let gate_layer = self.last_gates[q] + 1;
-        self.last_gates[q] = gate_layer;
-        if gate_layer >= 0 {
-            self.layers.insert(gate_layer as usize);
-        }
-    }
-
-    fn cx(&mut self, control: usize, target: usize) {
-        if control >= self.num_qubits || target >= self.num_qubits {
-            return;
-        }
-        self.n_cnots += 1;
-        self.n_gates += 1;
-
-        let gate_layer = (self.last_gates[control].max(self.last_gates[target])) + 1;
-        self.last_gates[control] = gate_layer;
-        self.last_gates[target] = gate_layer;
-
-        if gate_layer >= 0 {
-            self.layers.insert(gate_layer as usize);
-        }
-
-        let cx_layer = (self.last_cxs[control].max(self.last_cxs[target])) + 1;
-        self.last_cxs[control] = cx_layer;
-        self.last_cxs[target] = cx_layer;
-
-        if cx_layer >= 0 {
-            self.cnot_layers.insert(cx_layer as usize);
-        }
-    }
-}
-
-#[derive(Clone)]
-struct MetricsCounts {
-    n_cnots: usize,
-    n_layers_cnots: usize,
-    n_layers: usize,
-    n_gates: usize,
-}
-
-impl MetricsCounts {
-    fn weighted_delta(&self, previous: &Self, weights: &MetricsWeights) -> f32 {
-        let delta_cnots = self.n_cnots.saturating_sub(previous.n_cnots) as f32;
-        let delta_layers_cnots =
-            self.n_layers_cnots.saturating_sub(previous.n_layers_cnots) as f32;
-        let delta_layers = self.n_layers.saturating_sub(previous.n_layers) as f32;
-        let delta_gates = self.n_gates.saturating_sub(previous.n_gates) as f32;
-
-        weights.n_cnots * delta_cnots
-            + weights.n_layers_cnots * delta_layers_cnots
-            + weights.n_layers * delta_layers
-            + weights.n_gates * delta_gates
-    }
-}
-
-#[derive(Clone)]
-pub struct MetricsWeights {
-    n_cnots: f32,
-    n_layers_cnots: f32,
-    n_layers: f32,
-    n_gates: f32,
-}
-
-impl Default for MetricsWeights {
-    fn default() -> Self {
-        Self {
-            n_cnots: 0.01,
-            n_layers_cnots: 0.0,
-            n_layers: 0.0,
-            n_gates: 0.0001,
-        }
-    }
-}
-
-impl MetricsWeights {
-    fn from_hashmap(map: Option<HashMap<String, f32>>) -> Self {
-        let mut weights = Self::default();
-        if let Some(values) = map {
-            for (key, value) in values {
-                match key.as_str() {
-                    "n_cnots" => weights.n_cnots = value,
-                    "n_layers_cnots" => weights.n_layers_cnots = value,
-                    "n_layers" => weights.n_layers = value,
-                    "n_gates" => weights.n_gates = value,
-                    _ => {}
-                }
-            }
-        }
-        weights
-    }
-}
+// metrics implementation shared via crate::envs::metrics
 
 #[cfg(test)]
 mod tests {
diff --git a/rust/src/envs/metrics.rs b/rust/src/envs/metrics.rs
new file mode 100644
index 0000000..929696d
--- /dev/null
+++ b/rust/src/envs/metrics.rs
@@ -0,0 +1,184 @@
+// -*- coding: utf-8 -*-
+/*
+(C) Copyright 2025 IBM. All Rights Reserved.
+
+This code is licensed under the Apache License, Version 2.0. You may
+obtain a copy of this license in the LICENSE.txt file in the root directory
+of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+
+Any modifications or derivative works of this code must retain this
+copyright notice, and modified files need to carry a notice indicating
+that they have been altered from the originals.
+*/
+
+use std::collections::{HashMap, HashSet};
+
+use crate::envs::common::Gate;
+
+#[derive(Clone)]
+pub struct MetricsTracker {
+    num_qubits: usize,
+    n_cnots: usize,
+    n_gates: usize,
+    cnot_layers: HashSet<usize>,
+    layers: HashSet<usize>,
+    last_gates: Vec<isize>,
+    last_cxs: Vec<isize>,
+}
+
+impl MetricsTracker {
+    pub fn new(num_qubits: usize) -> Self {
+        Self {
+            num_qubits,
+            n_cnots: 0,
+            n_gates: 0,
+            cnot_layers: HashSet::new(),
+            layers: HashSet::new(),
+            last_gates: vec![-1; num_qubits],
+            last_cxs: vec![-1; num_qubits],
+        }
+    }
+
+    pub fn reset(&mut self) {
+        self.n_cnots = 0;
+        self.n_gates = 0;
+        self.cnot_layers.clear();
+        self.layers.clear();
+        for val in self.last_gates.iter_mut() {
+            *val = -1;
+        }
+        for val in self.last_cxs.iter_mut() {
+            *val = -1;
+        }
+    }
+
+    pub fn snapshot(&self) -> MetricsCounts {
+        MetricsCounts {
+            n_cnots: self.n_cnots,
+            n_layers_cnots: self.cnot_layers.len(),
+            n_layers: self.layers.len(),
+            n_gates: self.n_gates,
+        }
+    }
+
+    pub fn apply_gate(&mut self, gate: &Gate) {
+        match gate {
+            Gate::CX(c, t) => self.cx(*c, *t),
+            Gate::SWAP(c, t) => {
+                self.cx(*c, *t);
+                self.cx(*t, *c);
+                self.cx(*c, *t);
+            }
+            Gate::CZ(c, t) => {
+                self.single_qubit(*t);
+                self.cx(*c, *t);
+                self.single_qubit(*t);
+            }
+            Gate::H(q) | Gate::S(q) | Gate::Sdg(q) | Gate::SX(q) | Gate::SXdg(q) => {
+                self.single_qubit(*q);
+            }
+        }
+    }
+
+    fn single_qubit(&mut self, target: usize) {
+        if target >= self.num_qubits {
+            return;
+        }
+
+        self.n_gates += 1;
+
+        if self.last_gates[target] >= 0 {
+            self.layers.insert(self.last_gates[target] as usize);
+        }
+        self.last_gates[target] += 1;
+    }
+
+    fn cx(&mut self, control: usize, target: usize) {
+        if control == target
+            || control >= self.num_qubits
+            || target >= self.num_qubits
+        {
+            return;
+        }
+
+        self.n_cnots += 1;
+        self.n_gates += 1;
+
+        let gate_layer = (self.last_gates[control].max(self.last_gates[target])) + 1;
+        self.last_gates[control] = gate_layer;
+        self.last_gates[target] = gate_layer;
+
+        if gate_layer >= 0 {
+            self.layers.insert(gate_layer as usize);
+        }
+
+        let cx_layer = (self.last_cxs[control].max(self.last_cxs[target])) + 1;
+        self.last_cxs[control] = cx_layer;
+        self.last_cxs[target] = cx_layer;
+
+        if cx_layer >= 0 {
+            self.cnot_layers.insert(cx_layer as usize);
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct MetricsCounts {
+    n_cnots: usize,
+    n_layers_cnots: usize,
+    n_layers: usize,
+    n_gates: usize,
+}
+
+impl MetricsCounts {
+    pub fn weighted_delta(&self, previous: &Self, weights: &MetricsWeights) -> f32 {
+        let delta_cnots = self.n_cnots.saturating_sub(previous.n_cnots) as f32;
+        let delta_layers_cnots =
+            self.n_layers_cnots.saturating_sub(previous.n_layers_cnots) as f32;
+        let delta_layers = self.n_layers.saturating_sub(previous.n_layers) as f32;
+        let delta_gates = self.n_gates.saturating_sub(previous.n_gates) as f32;
+
+        weights.n_cnots * delta_cnots
+            + weights.n_layers_cnots * delta_layers_cnots
+            + weights.n_layers * delta_layers
+            + weights.n_gates * delta_gates
+    }
+}
+
+#[derive(Clone)]
+pub struct MetricsWeights {
+    pub n_cnots: f32,
+    pub n_layers_cnots: f32,
+    pub n_layers: f32,
+    pub n_gates: f32,
+}
+
+impl Default for MetricsWeights {
+    fn default() -> Self {
+        Self {
+            n_cnots: 0.01,
+            n_layers_cnots: 0.0,
+            n_layers: 0.0,
+            n_gates: 0.0001,
+        }
+    }
+}
+
+impl MetricsWeights {
+    pub fn from_hashmap(map: Option<HashMap<String, f32>>) -> Self {
+        let mut weights = Self::default();
+        if let Some(values) = map {
+            for (key, value) in values {
+                match key.as_str() {
+                    "n_cnots" => weights.n_cnots = value,
+                    "n_layers_cnots" => weights.n_layers_cnots = value,
+                    "n_layers" => weights.n_layers = value,
+                    "n_gates" => weights.n_gates = value,
+                    _ => {}
+                }
+            }
+        }
+        weights
+    }
+}
+
diff --git a/rust/src/envs/mod.rs b/rust/src/envs/mod.rs
index 7beab6a..7a92e30 100644
--- a/rust/src/envs/mod.rs
+++ b/rust/src/envs/mod.rs
@@ -16,3 +16,4 @@ pub mod linear_function;
 pub mod permutation;
 pub mod common;
 pub mod symmetry;
+pub mod metrics;
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index e29fd4f..b85ebdd 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -19,7 +19,9 @@ use twisterl::rl::env::Env;
 use twisterl::python_interface::env::PyBaseEnv;
 
 use crate::envs::common::Gate;
+use crate::envs::metrics::{MetricsCounts, MetricsTracker, MetricsWeights};
 use crate::envs::symmetry::compute_twists_square;
+use std::collections::HashMap;
 
 
 // This is the Env definition
@@ -36,6 +38,10 @@ pub struct Permutation {
     pub max_depth: usize,
     pub obs_perms: Vec<Vec<usize>>,
     pub act_perms: Vec<Vec<usize>>,
+    metrics: MetricsTracker,
+    metrics_values: MetricsCounts,
+    metrics_weights: MetricsWeights,
+    reward_value: f32,
 }
 
 
@@ -46,12 +52,16 @@ impl Permutation {
         gateset: Vec<Gate>,
         depth_slope: usize,
         max_depth: usize,
+        metrics_weights: MetricsWeights,
     ) -> Self {
         let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
+        let metrics = MetricsTracker::new(num_qubits);
+        let metrics_values = metrics.snapshot();
+        let success = true;
         Permutation {
             state:(0..num_qubits).collect(),
             depth:1,
-            success:true,
+            success,
             num_qubits,
             difficulty,
             gateset,
@@ -59,6 +69,10 @@ impl Permutation {
             max_depth,
             obs_perms,
             act_perms,
+            metrics,
+            metrics_values,
+            metrics_weights,
+            reward_value: 1.0,
         }
     }
 
@@ -101,11 +115,19 @@ impl Env for Permutation {
 
         self.depth = self.max_depth;  
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn reset(&mut self) {
         // Reset the state to the target
         self.state = (0..self.num_qubits).collect();
+        self.depth = self.max_depth;
+        self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
 
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
@@ -117,15 +139,31 @@ impl Env for Permutation {
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
     }
 
     fn step(&mut self, action: usize)  {
-        match self.gateset[action] {
-            Gate::SWAP(q1, q2) => (self.state[q2], self.state[q1]) = (self.state[q1], self.state[q2]),
-            _ => {}
+        let mut penalty = 0.0f32;
+
+        if action < self.gateset.len() {
+            let gate = &self.gateset[action];
+            let previous = self.metrics_values.clone();
+            self.metrics.apply_gate(gate);
+            let new_metrics = self.metrics.snapshot();
+            penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
+            self.metrics_values = new_metrics;
+
+            match gate {
+                Gate::SWAP(q1, q2) => (self.state[*q2], self.state[*q1]) = (self.state[*q1], self.state[*q2]),
+                _ => {}
+            }
         }
         self.depth = self.depth.saturating_sub(1); // Prevent underflow
         self.success = self.solved();
+        let achieved = if self.success { 1.0 } else { 0.0 };
+        self.reward_value = achieved - penalty;
     }
     
     fn masks(&self) -> Vec<bool> {
@@ -136,13 +174,7 @@ impl Env for Permutation {
         self.depth == 0 || self.success
     }
 
-    fn reward(&self) -> f32 {
-        if self.success {
-            1.0
-        } else {
-            if self.depth == 0 { -0.5 } else { -0.5/(self.max_depth as f32) }
-        }
-    }
+    fn reward(&self) -> f32 { self.reward_value }
 
     fn success(&self) -> bool {
         self.success
@@ -169,9 +201,11 @@ impl PyPermutationEnv {
         difficulty: usize,
         gateset: Vec<Gate>,
         depth_slope: usize,
-        max_depth: usize
+        max_depth: usize,
+        metrics_weights: Option<HashMap<String, f32>>,
     ) -> (Self, PyBaseEnv) {
-        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth);
+        let weights = MetricsWeights::from_hashmap(metrics_weights);
+        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights);
         let env = Box::new(env);
         (PyPermutationEnv, PyBaseEnv { env })
     }

From c312e0f2ee7aea0a22c11a4ec0905d190d206944 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 14 Nov 2025 17:01:26 +0100
Subject: [PATCH 06/20] Test inverts in cliffords

---
 rust/src/envs/clifford.rs | 101 ++++++++++++++++++++++++++++++++------
 1 file changed, 86 insertions(+), 15 deletions(-)

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index c43e741..450dcbf 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -14,6 +14,7 @@ that they have been altered from the originals.
 use pyo3::prelude::*;
 
 use rand::distributions::{Distribution, Uniform};
+use rand::Rng;
 
 use twisterl::rl::env::Env;
 use twisterl::python_interface::env::PyBaseEnv;
@@ -142,6 +143,35 @@ impl CFState {
         }
         true
     }
+
+    fn inverse(&self) -> Self {
+        let dim = self.dim();
+        let mut mat = self.clone();
+        let mut inv = CFState::new(self.n);
+
+        for col in 0..dim {
+            if !mat.get(col, col) {
+                let pivot = ((col + 1)..dim).find(|&row| mat.get(row, col));
+                let pivot = pivot.expect("CFState is singular; cannot invert");
+                mat.swap_rows(col, pivot);
+                inv.swap_rows(col, pivot);
+            }
+
+            for row in 0..dim {
+                if row != col && mat.get(row, col) {
+                    mat.row_xor(row, col);
+                    inv.row_xor(row, col);
+                }
+            }
+        }
+
+        debug_assert!(mat.solved(), "CFState inverse computation failed");
+        inv
+    }
+
+    fn invert(&mut self) {
+        *self = self.inverse();
+    }
 }
 
 // -------- Env: Clifford synthesis over the symplectic tableau (phase ignored) --------
@@ -162,6 +192,8 @@ pub struct Clifford {
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
+    add_inverts: bool,
+    inverted: bool,
 }
 
 impl Clifford {
@@ -172,6 +204,7 @@ impl Clifford {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: MetricsWeights,
+        add_inverts: bool,
     ) -> Self {
         let cf = CFState::new(num_qubits);
         let success = cf.solved();
@@ -192,9 +225,34 @@ impl Clifford {
             metrics_values,
             metrics_weights,
             reward_value: if success { 1.0 } else { 0.0 },
+            add_inverts,
+            inverted: false,
         }
     }
     pub fn solved(&self) -> bool { self.cf.solved() }
+
+    fn apply_gate_to_state(&mut self, gate: &Gate) {
+        match gate {
+            Gate::H(q) => self.cf.h(*q),
+            Gate::S(q) => self.cf.s(*q),
+            Gate::Sdg(q) => self.cf.sdg(*q), // identical to S modulo global phase (ignored)
+            Gate::SX(q) => self.cf.sx(*q),
+            Gate::SXdg(q) => self.cf.sxdg(*q), // identical to SX modulo global phase (ignored)
+            Gate::CX(c, t) => self.cf.cx(*c, *t),
+            Gate::CZ(a, b) => self.cf.cz(*a, *b),
+            Gate::SWAP(a, b) => self.cf.swap(*a, *b),
+        }
+    }
+
+    fn maybe_random_invert(&mut self) {
+        if !self.add_inverts {
+            return;
+        }
+        if rand::thread_rng().gen_bool(0.5) {
+            self.cf.invert();
+            self.inverted = !self.inverted;
+        }
+    }
 }
 
 impl Env for Clifford {
@@ -219,6 +277,7 @@ impl Env for Clifford {
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn reset(&mut self) {
@@ -234,39 +293,33 @@ impl Env for Clifford {
 
         for _ in 0..self.difficulty {
             let action = action_range.sample(&mut rng);
-            self.step(action);
+            if let Some(gate) = self.gateset.get(action).cloned() {
+                self.apply_gate_to_state(&gate);
+            }
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn step(&mut self, action: usize) {
         let mut penalty = 0.0f32;
 
-        if action < self.gateset.len() {
-            let gate = &self.gateset[action];
+        if let Some(gate) = self.gateset.get(action).cloned() {
             let previous = self.metrics_values.clone();
-            self.metrics.apply_gate(gate);
+            self.metrics.apply_gate(&gate);
             let new_metrics = self.metrics.snapshot();
             penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
             self.metrics_values = new_metrics;
 
-            match gate {
-                Gate::H(q) => self.cf.h(*q),
-                Gate::S(q) => self.cf.s(*q),
-                Gate::Sdg(q) => self.cf.sdg(*q), // identical to S modulo global phase (ignored)
-                Gate::SX(q) => self.cf.sx(*q),
-                Gate::SXdg(q) => self.cf.sxdg(*q), // identical to SX modulo global phase (ignored)
-                Gate::CX(c, t) => self.cf.cx(*c, *t),
-                Gate::CZ(a, b) => self.cf.cz(*a, *b),
-                Gate::SWAP(a, b) => self.cf.swap(*a, *b),
-            }
+            self.apply_gate_to_state(&gate);
         }
 
         self.depth = self.depth.saturating_sub(1);
+        self.maybe_random_invert();
         self.success = self.solved();
         let achieved = if self.success { 1.0 } else { 0.0 };
         self.reward_value = achieved - penalty;
@@ -304,6 +357,15 @@ pub struct PyCliffordEnv;
 #[pymethods]
 impl PyCliffordEnv {
     #[new]
+    #[pyo3(signature = (
+        num_qubits,
+        difficulty,
+        gateset,
+        depth_slope,
+        max_depth,
+        metrics_weights=None,
+        add_inverts=None,
+    ))]
     pub fn new(
         num_qubits: usize,
         difficulty: usize,
@@ -311,9 +373,18 @@ impl PyCliffordEnv {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: Option<HashMap<String, f32>>,
+        add_inverts: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
-        let env = Clifford::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights);
+        let env = Clifford::new(
+            num_qubits,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            weights,
+            add_inverts.unwrap_or(true),
+        );
         let env = Box::new(env);
         (PyCliffordEnv, PyBaseEnv { env })
     }

From b3929ac4627adbefa9b9fcea5a05a6a6080c9ed2 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 21 Nov 2025 11:08:28 +0100
Subject: [PATCH 07/20] Metrics in envs

---
 src/qiskit_gym/envs/synthesis.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 19e7ad6..dff488f 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -43,6 +43,7 @@ def from_coupling_map(
         difficulty: int = 1,
         depth_slope: int = 2,
         max_depth: int = 128,
+        metrics_weights: dict[str, float] | None = None,
     ):
         if basis_gates is None:
             basis_gates = tuple(cls.allowed_gates)
@@ -72,6 +73,7 @@ def from_coupling_map(
             "gateset": gateset,
             "depth_slope": depth_slope,
             "max_depth": max_depth,
+            "metrics_weights": metrics_weights,
         }
         return cls(**config)
 
@@ -106,6 +108,7 @@ def __init__(
         difficulty: int = 1,
         depth_slope: int = 2,
         max_depth: int = 128,
+        metrics_weights: dict[str, float] | None = None,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -113,6 +116,7 @@ def __init__(
             "gateset": gateset,
             "depth_slope": depth_slope,
             "max_depth": max_depth,
+            "metrics_weights": metrics_weights,
         })
 
     def get_state(self, input: QuantumCircuit | Clifford):
@@ -138,6 +142,7 @@ def __init__(
         difficulty: int = 1,
         depth_slope: int = 2,
         max_depth: int = 128,
+        metrics_weights: dict[str, float] | None = None,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -145,6 +150,7 @@ def __init__(
             "gateset": gateset,
             "depth_slope": depth_slope,
             "max_depth": max_depth,
+            "metrics_weights": metrics_weights,
         })
     
     def get_state(self, input: QuantumCircuit | LinearFunction):
@@ -172,6 +178,7 @@ def __init__(
         difficulty: int = 1,
         depth_slope: int = 2,
         max_depth: int = 128,
+        metrics_weights: dict[str, float] | None = None,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -179,6 +186,7 @@ def __init__(
             "gateset": gateset,
             "depth_slope": depth_slope,
             "max_depth": max_depth,
+            "metrics_weights": metrics_weights,
         })
 
     def get_state(self, input: QuantumCircuit | PermutationGate | Iterable[int]):

From aaf52f3fdafe5f040adf8863e719db648b698136 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Thu, 27 Nov 2025 09:51:59 +0100
Subject: [PATCH 08/20] inv in perms

---
 rust/src/envs/permutation.rs     | 30 +++++++++++++++++++++++++++++-
 src/qiskit_gym/envs/synthesis.py |  4 ++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index b85ebdd..f5063c3 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -14,6 +14,7 @@ that they have been altered from the originals.
 use pyo3::prelude::*;
 
 use rand::distributions::{Distribution, Uniform};
+use rand::Rng;
 
 use twisterl::rl::env::Env;
 use twisterl::python_interface::env::PyBaseEnv;
@@ -38,6 +39,7 @@ pub struct Permutation {
     pub max_depth: usize,
     pub obs_perms: Vec<Vec<usize>>,
     pub act_perms: Vec<Vec<usize>>,
+    pub add_inverts: bool,
     metrics: MetricsTracker,
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
@@ -53,8 +55,11 @@ impl Permutation {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: MetricsWeights,
+        add_inverts: bool,
     ) -> Self {
+        println!("Computing perms");
         let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
+        println!("Computing perms finshed");
         let metrics = MetricsTracker::new(num_qubits);
         let metrics_values = metrics.snapshot();
         let success = true;
@@ -69,6 +74,7 @@ impl Permutation {
             max_depth,
             obs_perms,
             act_perms,
+            add_inverts,
             metrics,
             metrics_values,
             metrics_weights,
@@ -76,6 +82,16 @@ impl Permutation {
         }
     }
 
+    /// Compute the inverse of a permutation
+    /// For a permutation perm, returns inv such that perm[inv[i]] = i for all i
+    fn invert_perm(perm: &[usize]) -> Vec<usize> {
+        let mut inv = vec![0; perm.len()];
+        for (i, &val) in perm.iter().enumerate() {
+            inv[val] = i;
+        }
+        inv
+    }
+
     pub fn solved(&self) -> bool {
         for i in 0..self.state.len() {
             if self.state[i] != i {return false}
@@ -160,6 +176,15 @@ impl Env for Permutation {
                 _ => {}
             }
         }
+
+        // Randomly invert the permutation with 50% probability during training
+        if self.add_inverts {
+            let mut rng = rand::thread_rng();
+            if rng.gen::<f32>() > 0.5 {
+                self.state = Self::invert_perm(&self.state);
+            }
+        }
+
         self.depth = self.depth.saturating_sub(1); // Prevent underflow
         self.success = self.solved();
         let achieved = if self.success { 1.0 } else { 0.0 };
@@ -196,6 +221,7 @@ pub struct PyPermutationEnv;
 #[pymethods]
 impl PyPermutationEnv {
     #[new]
+    #[pyo3(signature = (num_qubits, difficulty, gateset, depth_slope, max_depth, metrics_weights=None, add_inverts=None))]
     pub fn new(
         num_qubits: usize,
         difficulty: usize,
@@ -203,9 +229,11 @@ impl PyPermutationEnv {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: Option<HashMap<String, f32>>,
+        add_inverts: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
-        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights);
+        let add_inverts = add_inverts.unwrap_or(false);
+        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights, add_inverts);
         let env = Box::new(env);
         (PyPermutationEnv, PyBaseEnv { env })
     }
diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index dff488f..78710cc 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -44,6 +44,7 @@ def from_coupling_map(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
+        add_inverts: bool = False,
     ):
         if basis_gates is None:
             basis_gates = tuple(cls.allowed_gates)
@@ -74,6 +75,7 @@ def from_coupling_map(
             "depth_slope": depth_slope,
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
+            "add_inverts": add_inverts,
         }
         return cls(**config)
 
@@ -179,6 +181,7 @@ def __init__(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
+        add_inverts: bool = False,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -187,6 +190,7 @@ def __init__(
             "depth_slope": depth_slope,
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
+            "add_inverts": add_inverts,
         })
 
     def get_state(self, input: QuantumCircuit | PermutationGate | Iterable[int]):

From 737b690682cafcad34f25ef713e60eb8e862ad80 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 28 Nov 2025 10:03:33 +0100
Subject: [PATCH 09/20] Perms optional

---
 rust/src/envs/clifford.rs        | 15 ++++++++++++++-
 rust/src/envs/linear_function.rs | 27 ++++++++++++++++++++++++---
 rust/src/envs/permutation.rs     | 22 +++++++++++++++++-----
 src/qiskit_gym/envs/synthesis.py | 24 ++++++++++++++++++++++--
 4 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 450dcbf..317553c 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -194,6 +194,7 @@ pub struct Clifford {
     reward_value: f32,
     add_inverts: bool,
     inverted: bool,
+    add_perms: bool,
 }
 
 impl Clifford {
@@ -205,10 +206,18 @@ impl Clifford {
         max_depth: usize,
         metrics_weights: MetricsWeights,
         add_inverts: bool,
+        add_perms: bool,
     ) -> Self {
         let cf = CFState::new(num_qubits);
         let success = cf.solved();
-        let (obs_perms, act_perms) = compute_twists_clifford(num_qubits, &gateset);
+
+        // Only compute symmetries if enabled
+        let (obs_perms, act_perms) = if add_perms {
+            compute_twists_clifford(num_qubits, &gateset)
+        } else {
+            (Vec::new(), Vec::new())
+        };
+
         let metrics = MetricsTracker::new(num_qubits);
         let metrics_values = metrics.snapshot();
         Clifford {
@@ -227,6 +236,7 @@ impl Clifford {
             reward_value: if success { 1.0 } else { 0.0 },
             add_inverts,
             inverted: false,
+            add_perms,
         }
     }
     pub fn solved(&self) -> bool { self.cf.solved() }
@@ -365,6 +375,7 @@ impl PyCliffordEnv {
         max_depth,
         metrics_weights=None,
         add_inverts=None,
+        add_perms=None,
     ))]
     pub fn new(
         num_qubits: usize,
@@ -374,6 +385,7 @@ impl PyCliffordEnv {
         max_depth: usize,
         metrics_weights: Option<HashMap<String, f32>>,
         add_inverts: Option<bool>,
+        add_perms: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
         let env = Clifford::new(
@@ -384,6 +396,7 @@ impl PyCliffordEnv {
             max_depth,
             weights,
             add_inverts.unwrap_or(true),
+            add_perms.unwrap_or(true),
         );
         let env = Box::new(env);
         (PyCliffordEnv, PyBaseEnv { env })
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 548bc60..c3c250d 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -116,6 +116,7 @@ pub struct LinearFunction {
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
+    add_perms: bool,
 }
 
 
@@ -127,10 +128,18 @@ impl LinearFunction {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: MetricsWeights,
+        add_perms: bool,
     ) -> Self {
         let lf = LFState::new(num_qubits);
         let success = lf.solved();
-        let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
+
+        // Only compute symmetries if enabled
+        let (obs_perms, act_perms) = if add_perms {
+            compute_twists_square(num_qubits, &gateset)
+        } else {
+            (Vec::new(), Vec::new())
+        };
+
         let metrics = MetricsTracker::new(num_qubits);
         let metrics_values = metrics.snapshot();
         LinearFunction {
@@ -147,6 +156,7 @@ impl LinearFunction {
             metrics_values,
             metrics_weights,
             reward_value: if success { 1.0 } else { 0.0 },
+            add_perms,
         }
     }
     pub fn solved(&self) -> bool {
@@ -271,7 +281,7 @@ mod tests {
     fn cx_gate_is_self_inverse() {
         let gateset = vec![Gate::CX(0, 1)];
         let metrics_weights = MetricsWeights::default();
-        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights);
+        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true);
         env.depth = env.max_depth;
 
         env.step(0);
@@ -290,13 +300,23 @@ pub struct PyLinearFunctionEnv;
 #[pymethods]
 impl PyLinearFunctionEnv {
     #[new]
+    #[pyo3(signature = (
+        num_qubits,
+        difficulty,
+        gateset,
+        depth_slope,
+        max_depth,
+        metrics_weights=None,
+        add_perms=None,
+    ))]
     pub fn new(
         num_qubits: usize,
         difficulty: usize,
         gateset: Vec<Gate>,
         depth_slope: usize,
         max_depth: usize,
-        metrics_weights: Option<HashMap<String, f32>>
+        metrics_weights: Option<HashMap<String, f32>>,
+        add_perms: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
         let env = LinearFunction::new(
@@ -306,6 +326,7 @@ impl PyLinearFunctionEnv {
             depth_slope,
             max_depth,
             weights,
+            add_perms.unwrap_or(true),
         );
         let env = Box::new(env);
         (PyLinearFunctionEnv, PyBaseEnv { env })
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index f5063c3..5a879e9 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -44,6 +44,7 @@ pub struct Permutation {
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
+    add_perms: bool,
 }
 
 
@@ -56,10 +57,18 @@ impl Permutation {
         max_depth: usize,
         metrics_weights: MetricsWeights,
         add_inverts: bool,
+        add_perms: bool,
     ) -> Self {
-        println!("Computing perms");
-        let (obs_perms, act_perms) = compute_twists_square(num_qubits, &gateset);
-        println!("Computing perms finshed");
+        // Only compute symmetries if enabled
+        let (obs_perms, act_perms) = if add_perms {
+            println!("Computing perms");
+            let result = compute_twists_square(num_qubits, &gateset);
+            println!("Computing perms finished");
+            result
+        } else {
+            (Vec::new(), Vec::new())
+        };
+
         let metrics = MetricsTracker::new(num_qubits);
         let metrics_values = metrics.snapshot();
         let success = true;
@@ -79,6 +88,7 @@ impl Permutation {
             metrics_values,
             metrics_weights,
             reward_value: 1.0,
+            add_perms,
         }
     }
 
@@ -221,7 +231,7 @@ pub struct PyPermutationEnv;
 #[pymethods]
 impl PyPermutationEnv {
     #[new]
-    #[pyo3(signature = (num_qubits, difficulty, gateset, depth_slope, max_depth, metrics_weights=None, add_inverts=None))]
+    #[pyo3(signature = (num_qubits, difficulty, gateset, depth_slope, max_depth, metrics_weights=None, add_inverts=None, add_perms=None))]
     pub fn new(
         num_qubits: usize,
         difficulty: usize,
@@ -230,10 +240,12 @@ impl PyPermutationEnv {
         max_depth: usize,
         metrics_weights: Option<HashMap<String, f32>>,
         add_inverts: Option<bool>,
+        add_perms: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
         let add_inverts = add_inverts.unwrap_or(false);
-        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights, add_inverts);
+        let add_perms = add_perms.unwrap_or(true);
+        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights, add_inverts, add_perms);
         let env = Box::new(env);
         (PyPermutationEnv, PyBaseEnv { env })
     }
diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 78710cc..417c1e6 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -45,6 +45,7 @@ def from_coupling_map(
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
         add_inverts: bool = False,
+        add_perms: bool = True,
     ):
         if basis_gates is None:
             basis_gates = tuple(cls.allowed_gates)
@@ -76,12 +77,23 @@ def from_coupling_map(
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
             "add_inverts": add_inverts,
+            "add_perms": add_perms,
         }
-        return cls(**config)
+        # Filter config to only include parameters accepted by the class __init__
+        import inspect
+        sig = inspect.signature(cls.__init__)
+        valid_params = set(sig.parameters.keys()) - {'self'}
+        filtered_config = {k: v for k, v in config.items() if k in valid_params}
+        return cls(**filtered_config)
 
     @classmethod
     def from_json(cls, env_config):
-        return cls(**env_config)
+        # Filter config to only include parameters accepted by the class __init__
+        import inspect
+        sig = inspect.signature(cls.__init__)
+        valid_params = set(sig.parameters.keys()) - {'self'}
+        filtered_config = {k: v for k, v in env_config.items() if k in valid_params}
+        return cls(**filtered_config)
 
     @classmethod
     @abstractmethod
@@ -111,6 +123,8 @@ def __init__(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
+        add_inverts: bool = True,
+        add_perms: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -119,6 +133,8 @@ def __init__(
             "depth_slope": depth_slope,
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
+            "add_inverts": add_inverts,
+            "add_perms": add_perms,
         })
 
     def get_state(self, input: QuantumCircuit | Clifford):
@@ -145,6 +161,7 @@ def __init__(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
+        add_perms: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -153,6 +170,7 @@ def __init__(
             "depth_slope": depth_slope,
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
+            "add_perms": add_perms,
         })
     
     def get_state(self, input: QuantumCircuit | LinearFunction):
@@ -182,6 +200,7 @@ def __init__(
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
         add_inverts: bool = False,
+        add_perms: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -191,6 +210,7 @@ def __init__(
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
             "add_inverts": add_inverts,
+            "add_perms": add_perms,
         })
 
     def get_state(self, input: QuantumCircuit | PermutationGate | Iterable[int]):

From e997e1109ff12916ffde7aa1654a491cd7c20ae9 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 28 Nov 2025 13:02:32 +0100
Subject: [PATCH 10/20] Linfunc inversions in rust, not python

---
 rust/src/envs/linear_function.rs | 90 +++++++++++++++++++++++++++++++-
 src/qiskit_gym/envs/synthesis.py |  8 +--
 2 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index c3c250d..59c7b84 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -14,6 +14,7 @@ that they have been altered from the originals.
 use pyo3::prelude::*;
 
 use rand::distributions::{Distribution, Uniform};
+use rand::Rng;
 
 use twisterl::rl::env::Env;
 use twisterl::python_interface::env::PyBaseEnv;
@@ -97,6 +98,56 @@ impl LFState {
         }
         true
     }
+
+    fn row_xor(&mut self, dest: usize, src: usize) {
+        if dest == src {
+            return;
+        }
+        for col in 0..self.size {
+            let dest_idx = self.index(dest, col);
+            let src_idx = self.index(src, col);
+            self.data[dest_idx] ^= self.data[src_idx];
+        }
+    }
+
+    fn swap_rows(&mut self, r1: usize, r2: usize) {
+        if r1 == r2 {
+            return;
+        }
+        for col in 0..self.size {
+            let i1 = self.index(r1, col);
+            let i2 = self.index(r2, col);
+            self.data.swap(i1, i2);
+        }
+    }
+
+    fn inverse(&self) -> Self {
+        let mut mat = self.clone();
+        let mut inv = LFState::new(self.size);
+
+        for col in 0..self.size {
+            if !mat.get(col, col) {
+                let pivot = ((col + 1)..self.size).find(|&row| mat.get(row, col));
+                let pivot = pivot.expect("LFState is singular; cannot invert");
+                mat.swap_rows(col, pivot);
+                inv.swap_rows(col, pivot);
+            }
+
+            for row in 0..self.size {
+                if row != col && mat.get(row, col) {
+                    mat.row_xor(row, col);
+                    inv.row_xor(row, col);
+                }
+            }
+        }
+
+        debug_assert!(mat.solved(), "LFState inverse computation failed");
+        inv
+    }
+
+    fn invert(&mut self) {
+        *self = self.inverse();
+    }
 }
 
 // This is the Env definition
@@ -116,6 +167,8 @@ pub struct LinearFunction {
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
+    add_inverts: bool,
+    inverted: bool,
     add_perms: bool,
 }
 
@@ -128,6 +181,7 @@ impl LinearFunction {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: MetricsWeights,
+        add_inverts: bool,
         add_perms: bool,
     ) -> Self {
         let lf = LFState::new(num_qubits);
@@ -156,6 +210,8 @@ impl LinearFunction {
             metrics_values,
             metrics_weights,
             reward_value: if success { 1.0 } else { 0.0 },
+            add_inverts,
+            inverted: false,
             add_perms,
         }
     }
@@ -163,6 +219,16 @@ impl LinearFunction {
         self.lf.solved()
     }
 
+    fn maybe_random_invert(&mut self) {
+        if !self.add_inverts {
+            return;
+        }
+        if rand::thread_rng().gen_bool(0.5) {
+            self.lf.invert();
+            self.inverted = !self.inverted;
+        }
+    }
+
 }
 
 // This implements the necessary functions for the environment
@@ -188,11 +254,13 @@ impl Env for LinearFunction {
 
     fn set_state(&mut self, state: Vec<i64>) {
         self.lf.data = state.iter().map(|&x| x>0).collect();
+        self.lf.invert();
         self.depth = self.max_depth;
         self.success = self.solved();
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         //self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn reset(&mut self) {
@@ -203,6 +271,7 @@ impl Env for LinearFunction {
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
 
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
@@ -217,6 +286,7 @@ impl Env for LinearFunction {
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn step(&mut self, action: usize)  {
@@ -238,6 +308,7 @@ impl Env for LinearFunction {
         }
 
         self.depth = self.depth.saturating_sub(1); // Prevent underflow
+        self.maybe_random_invert();
         self.success = self.solved();
         let achieved = if self.success { 1.0 } else { 0.0 };
         self.reward_value = achieved - penalty;
@@ -281,7 +352,7 @@ mod tests {
     fn cx_gate_is_self_inverse() {
         let gateset = vec![Gate::CX(0, 1)];
         let metrics_weights = MetricsWeights::default();
-        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true);
+        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true, true);
         env.depth = env.max_depth;
 
         env.step(0);
@@ -291,6 +362,20 @@ mod tests {
         assert!(env.solved());
         assert!(env.reward() <= 1.0);
     }
+
+    #[test]
+    fn lfstate_inversion_roundtrip() {
+        let mut state = LFState::new(3);
+        state.cx(0, 1);
+        state.swap(1, 2);
+
+        let original = state.data.clone();
+        state.invert();
+        state.invert();
+
+        assert_eq!(state.data, original, "double inversion should restore the matrix");
+        assert!(!state.solved());
+    }
 }
 
 
@@ -307,6 +392,7 @@ impl PyLinearFunctionEnv {
         depth_slope,
         max_depth,
         metrics_weights=None,
+        add_inverts=None,
         add_perms=None,
     ))]
     pub fn new(
@@ -316,6 +402,7 @@ impl PyLinearFunctionEnv {
         depth_slope: usize,
         max_depth: usize,
         metrics_weights: Option<HashMap<String, f32>>,
+        add_inverts: Option<bool>,
         add_perms: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
@@ -326,6 +413,7 @@ impl PyLinearFunctionEnv {
             depth_slope,
             max_depth,
             weights,
+            add_inverts.unwrap_or(true),
             add_perms.unwrap_or(true),
         );
         let env = Box::new(env);
diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 417c1e6..1b28a4d 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -161,6 +161,7 @@ def __init__(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
+        add_inverts: bool = True,
         add_perms: bool = True,
     ):
         super().__init__(**{
@@ -170,14 +171,15 @@ def __init__(
             "depth_slope": depth_slope,
             "max_depth": max_depth,
             "metrics_weights": metrics_weights,
+            "add_inverts": add_inverts,
             "add_perms": add_perms,
         })
     
     def get_state(self, input: QuantumCircuit | LinearFunction):
         if isinstance(input, QuantumCircuit):
-            input = LinearFunction(input.inverse())
-        elif isinstance(input, LinearFunction):
-            input = LinearFunction(Clifford(input).adjoint())
+            input = LinearFunction(input)
+        elif not isinstance(input, LinearFunction):
+            input = LinearFunction(Clifford(input))
         return np.array(input.linear).flatten().astype(int).tolist()
 
 

From f40bf6886e1688f99c3f35f45312405af1436201 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 28 Nov 2025 15:39:29 +0100
Subject: [PATCH 11/20] Inverts default to true for permutations

---
 src/qiskit_gym/envs/synthesis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 1b28a4d..3946da8 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -201,7 +201,7 @@ def __init__(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
-        add_inverts: bool = False,
+        add_inverts: bool = True,
         add_perms: bool = True,
     ):
         super().__init__(**{

From 073d9810d698f81f05096b5f535d551494f1bae6 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Wed, 3 Dec 2025 15:59:14 +0100
Subject: [PATCH 12/20] Update twisterl versions

---
 pyproject.toml  | 2 +-
 rust/Cargo.lock | 4 +++-
 rust/Cargo.toml | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d5fe42d..684fe73 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dynamic = ["version"]
 dependencies = [
   "qiskit>=2.1",
   "gymnasium",
-  "twisterl",
+  "twisterl~=0.3.0",
 ]
 
 
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 187e4c2..373031f 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -466,7 +466,9 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
 name = "twisterl"
-version = "0.1.0"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca21419515e226009756a6e9fd094530f9e8915191f1f6499de949080bcef7e9"
 dependencies = [
  "anyhow",
  "dyn-clone",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 8350317..52d3067 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -14,7 +14,7 @@ nalgebra = "0.33.0"
 rand = "0.8.4"
 rayon = "1.1.0"
 petgraph = "0.6.5"
-twisterl = {path = "../../../rust/", features = ["python_bindings"]}
+twisterl = {version = "~0.3.0", features = ["python_bindings"]}
 
 [profile.release]
 opt-level = 3

From e7c4a5ff8108763bb0a843e84b8f5005ef8c223a Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Thu, 4 Dec 2025 10:53:42 +0100
Subject: [PATCH 13/20] Fix a few issues

---
 rust/src/envs/clifford.rs        |  8 --------
 rust/src/envs/linear_function.rs |  5 +----
 rust/src/envs/metrics.rs         |  8 ++++----
 rust/src/envs/permutation.rs     | 27 +++++++++++++-------------
 rust/src/envs/symmetry.rs        | 33 +++++++++++++++++++++++++++++++-
 src/qiskit_gym/envs/synthesis.py |  2 +-
 6 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 317553c..c7c9680 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -54,12 +54,6 @@ impl CFState {
         self.data[self.index(row, col)]
     }
 
-    #[inline]
-    fn set(&mut self, row: usize, col: usize, val: bool) {
-        let idx = self.index(row, col);
-        self.data[idx] = val;
-    }
-
     // Row ops over GF(2)
     fn row_xor(&mut self, dest: usize, src: usize) {
         if dest == src { return; }
@@ -194,7 +188,6 @@ pub struct Clifford {
     reward_value: f32,
     add_inverts: bool,
     inverted: bool,
-    add_perms: bool,
 }
 
 impl Clifford {
@@ -236,7 +229,6 @@ impl Clifford {
             reward_value: if success { 1.0 } else { 0.0 },
             add_inverts,
             inverted: false,
-            add_perms,
         }
     }
     pub fn solved(&self) -> bool { self.cf.solved() }
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 59c7b84..5b6e88d 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -169,7 +169,6 @@ pub struct LinearFunction {
     reward_value: f32,
     add_inverts: bool,
     inverted: bool,
-    add_perms: bool,
 }
 
 
@@ -212,7 +211,6 @@ impl LinearFunction {
             reward_value: if success { 1.0 } else { 0.0 },
             add_inverts,
             inverted: false,
-            add_perms,
         }
     }
     pub fn solved(&self) -> bool {
@@ -254,12 +252,11 @@ impl Env for LinearFunction {
 
     fn set_state(&mut self, state: Vec<i64>) {
         self.lf.data = state.iter().map(|&x| x>0).collect();
-        self.lf.invert();
         self.depth = self.max_depth;
         self.success = self.solved();
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
-        //self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
         self.inverted = false;
     }
 
diff --git a/rust/src/envs/metrics.rs b/rust/src/envs/metrics.rs
index 929696d..4eb8fc5 100644
--- a/rust/src/envs/metrics.rs
+++ b/rust/src/envs/metrics.rs
@@ -86,11 +86,12 @@ impl MetricsTracker {
         }
 
         self.n_gates += 1;
+        let gate_layer = self.last_gates[target] + 1;
+        self.last_gates[target] = gate_layer;
 
-        if self.last_gates[target] >= 0 {
-            self.layers.insert(self.last_gates[target] as usize);
+        if gate_layer >= 0 {
+            self.layers.insert(gate_layer as usize);
         }
-        self.last_gates[target] += 1;
     }
 
     fn cx(&mut self, control: usize, target: usize) {
@@ -181,4 +182,3 @@ impl MetricsWeights {
         weights
     }
 }
-
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index 5a879e9..8d028c4 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -44,7 +44,6 @@ pub struct Permutation {
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
-    add_perms: bool,
 }
 
 
@@ -61,10 +60,7 @@ impl Permutation {
     ) -> Self {
         // Only compute symmetries if enabled
         let (obs_perms, act_perms) = if add_perms {
-            println!("Computing perms");
-            let result = compute_twists_square(num_qubits, &gateset);
-            println!("Computing perms finished");
-            result
+            compute_twists_square(num_qubits, &gateset)
         } else {
             (Vec::new(), Vec::new())
         };
@@ -88,7 +84,6 @@ impl Permutation {
             metrics_values,
             metrics_weights,
             reward_value: 1.0,
-            add_perms,
         }
     }
 
@@ -102,6 +97,18 @@ impl Permutation {
         inv
     }
 
+    /// Randomly invert the permutation with 50% probability when enabled.
+    fn maybe_random_invert(&mut self) {
+        if !self.add_inverts {
+            return;
+        }
+
+        let mut rng = rand::thread_rng();
+        if rng.gen_bool(0.5) {
+            self.state = Self::invert_perm(&self.state);
+        }
+    }
+
     pub fn solved(&self) -> bool {
         for i in 0..self.state.len() {
             if self.state[i] != i {return false}
@@ -187,13 +194,7 @@ impl Env for Permutation {
             }
         }
 
-        // Randomly invert the permutation with 50% probability during training
-        if self.add_inverts {
-            let mut rng = rand::thread_rng();
-            if rng.gen::<f32>() > 0.5 {
-                self.state = Self::invert_perm(&self.state);
-            }
-        }
+        self.maybe_random_invert();
 
         self.depth = self.depth.saturating_sub(1); // Prevent underflow
         self.success = self.solved();
diff --git a/rust/src/envs/symmetry.rs b/rust/src/envs/symmetry.rs
index d32ab27..f298394 100644
--- a/rust/src/envs/symmetry.rs
+++ b/rust/src/envs/symmetry.rs
@@ -77,6 +77,37 @@ fn identity_perm(num_qubits: usize) -> Vec<usize> {
     (0..num_qubits).collect()
 }
 
+fn all_permutations(num_qubits: usize) -> Vec<Vec<usize>> {
+    let mut perm: Vec<usize> = (0..num_qubits).collect();
+    let mut results = Vec::new();
+
+    fn heap_permute(k: usize, perm: &mut Vec<usize>, results: &mut Vec<Vec<usize>>) {
+        if k == 1 {
+            results.push(perm.clone());
+            return;
+        }
+
+        heap_permute(k - 1, perm, results);
+
+        for i in 0..(k - 1) {
+            if k % 2 == 0 {
+                perm.swap(i, k - 1);
+            } else {
+                perm.swap(0, k - 1);
+            }
+            heap_permute(k - 1, perm, results);
+        }
+    }
+
+    if num_qubits == 0 {
+        results.push(Vec::new());
+    } else {
+        heap_permute(num_qubits, &mut perm, &mut results);
+    }
+
+    results
+}
+
 #[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
 struct NodeSignature {
     degree: usize,
@@ -109,7 +140,7 @@ fn enumerate_automorphisms(
     }
 
     if !has_edge {
-        return vec![identity_perm(n)];
+        return all_permutations(n);
     }
 
     let mut nodes_order: Vec<usize> = (0..n).collect();
diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 3946da8..9f040f2 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -44,7 +44,7 @@ def from_coupling_map(
         depth_slope: int = 2,
         max_depth: int = 128,
         metrics_weights: dict[str, float] | None = None,
-        add_inverts: bool = False,
+        add_inverts: bool = True,
         add_perms: bool = True,
     ):
         if basis_gates is None:

From c79045f6416494933605cf73b5aa8a642422d0d5 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Thu, 4 Dec 2025 15:19:58 +0100
Subject: [PATCH 14/20] Fix permutations/twsit generation performance

---
 rust/src/envs/symmetry.rs | 152 ++++++++++++--------------------------
 1 file changed, 48 insertions(+), 104 deletions(-)

diff --git a/rust/src/envs/symmetry.rs b/rust/src/envs/symmetry.rs
index f298394..069cb1d 100644
--- a/rust/src/envs/symmetry.rs
+++ b/rust/src/envs/symmetry.rs
@@ -14,6 +14,10 @@ that they have been altered from the originals.
 use std::collections::{HashMap, HashSet};
 
 use crate::envs::common::Gate;
+use petgraph::algo::isomorphism::subgraph_isomorphisms_iter;
+use petgraph::graph::Graph;
+use petgraph::visit::NodeIndexable;
+use petgraph::Directed;
 
 #[derive(Hash, Eq, PartialEq, Clone, Copy)]
 enum GateKind {
@@ -108,32 +112,7 @@ fn all_permutations(num_qubits: usize) -> Vec<Vec<usize>> {
     results
 }
 
-#[derive(Clone, Eq, PartialEq, Hash, PartialOrd, Ord)]
-struct NodeSignature {
-    degree: usize,
-    neighbor_degrees: Vec<usize>,
-}
-
-impl NodeSignature {
-    fn new(node: usize, degrees: &[usize], adjacency: &[Vec<bool>]) -> Self {
-        let mut neighbor_degrees: Vec<usize> = adjacency[node]
-            .iter()
-            .enumerate()
-            .filter_map(|(idx, &connected)| if connected { Some(degrees[idx]) } else { None })
-            .collect();
-        neighbor_degrees.sort_unstable();
-        Self {
-            degree: degrees[node],
-            neighbor_degrees,
-        }
-    }
-}
-
-fn enumerate_automorphisms(
-    adjacency: &[Vec<bool>],
-    has_edge: bool,
-    signatures: &[NodeSignature],
-) -> Vec<Vec<usize>> {
+fn compute_automorphisms(adjacency: &[Vec<bool>], has_edge: bool) -> Vec<Vec<usize>> {
     let n = adjacency.len();
     if n == 0 {
         return vec![Vec::new()];
@@ -143,84 +122,57 @@ fn enumerate_automorphisms(
         return all_permutations(n);
     }
 
-    let mut nodes_order: Vec<usize> = (0..n).collect();
-    nodes_order.sort_by(|&a, &b| signatures[a].cmp(&signatures[b]));
-
-    let mut perm = vec![usize::MAX; n];
-    let mut used = vec![false; n];
-    let mut results = Vec::new();
-
-    backtrack_automorphisms(
-        0,
-        &nodes_order,
-        signatures,
-        adjacency,
-        &mut perm,
-        &mut used,
-        &mut results,
-    );
-
-    if results.is_empty() {
-        results.push(identity_perm(n));
-    }
-
-    results
-}
-
-fn backtrack_automorphisms(
-    idx: usize,
-    nodes_order: &[usize],
-    signatures: &[NodeSignature],
-    adjacency: &[Vec<bool>],
-    perm: &mut Vec<usize>,
-    used: &mut Vec<bool>,
-    results: &mut Vec<Vec<usize>>,
-) {
-    if idx == nodes_order.len() {
-        results.push(perm.clone());
-        return;
+    // Build a directed graph with symmetric edges and use petgraph's VF2 enumerator.
+    let mut graph = Graph::<usize, (), Directed>::new();
+    let mut nodes = Vec::with_capacity(n);
+    for node in 0..n {
+        nodes.push(graph.add_node(node));
     }
-
-    let node_from = nodes_order[idx];
-    let target_signature = &signatures[node_from];
-    let n = adjacency.len();
-
-    for node_to in 0..n {
-        if used[node_to] || &signatures[node_to] != target_signature {
-            continue;
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if adjacency[i][j] {
+                graph.add_edge(nodes[i], nodes[j], ());
+                graph.add_edge(nodes[j], nodes[i], ());
+            }
         }
+    }
 
-        let mut consistent = true;
-        for prev_idx in 0..idx {
-            let prev_from = nodes_order[prev_idx];
-            let prev_to = perm[prev_from];
-            if prev_to == usize::MAX {
+    let mut results: Vec<Vec<usize>> = Vec::new();
+    let mut node_match = |_: &usize, _: &usize| true;
+    let mut edge_match = |_: &(), _: &()| true;
+
+    // Use &&graph so G0/G1 are `&Graph`, which implement the required traits for VF2.
+    let graph_ref = &graph;
+    if let Some(iter) =
+        subgraph_isomorphisms_iter(&graph_ref, &graph_ref, &mut node_match, &mut edge_match)
+    {
+        for mapping in iter {
+            if mapping.len() != n {
                 continue;
             }
-            if adjacency[node_from][prev_from] != adjacency[node_to][prev_to] {
-                consistent = false;
-                break;
+            // mapping indices are compact node indices; translate back to node labels
+            let mut perm = vec![usize::MAX; n];
+            for (from_idx, to_idx) in mapping.into_iter().enumerate() {
+                let from_node = graph.from_index(from_idx);
+                let to_node = graph.from_index(to_idx);
+                let from_label = graph.node_weight(from_node).copied().unwrap_or(0);
+                let to_label = graph.node_weight(to_node).copied().unwrap_or(0);
+                perm[from_label] = to_label;
             }
+            if perm.iter().any(|&v| v == usize::MAX) {
+                continue;
+            }
+            results.push(perm);
         }
+    }
 
-        if !consistent {
-            continue;
-        }
-
-        perm[node_from] = node_to;
-        used[node_to] = true;
-        backtrack_automorphisms(
-            idx + 1,
-            nodes_order,
-            signatures,
-            adjacency,
-            perm,
-            used,
-            results,
-        );
-        used[node_to] = false;
-        perm[node_from] = usize::MAX;
+    if results.is_empty() {
+        results.push(identity_perm(n));
     }
+
+    results.sort();
+    results.dedup();
+    results
 }
 
 fn build_action_perm(
@@ -283,15 +235,7 @@ where
         }
     }
 
-    let degrees: Vec<usize> = adjacency
-        .iter()
-        .map(|row| row.iter().filter(|&&edge| edge).count())
-        .collect();
-    let signatures: Vec<NodeSignature> = (0..num_qubits)
-        .map(|idx| NodeSignature::new(idx, &degrees, &adjacency))
-        .collect();
-
-    let automorphisms = enumerate_automorphisms(&adjacency, has_edge, &signatures);
+    let automorphisms = compute_automorphisms(&adjacency, has_edge);
 
     let mut seen: HashSet<Vec<usize>> = HashSet::new();
     let mut obs_perms: Vec<Vec<usize>> = Vec::new();

From ad63f3cb3da06936834f6dd8a8b160a3df7cb105 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Thu, 4 Dec 2025 15:21:39 +0100
Subject: [PATCH 15/20] Recover deleted code

---
 rust/src/envs/clifford.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index c7c9680..61964d0 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -54,6 +54,12 @@ impl CFState {
         self.data[self.index(row, col)]
     }
 
+    #[inline]
+    fn set(&mut self, row: usize, col: usize, val: bool) {
+        let idx = self.index(row, col);
+        self.data[idx] = val;
+    }
+
     // Row ops over GF(2)
     fn row_xor(&mut self, dest: usize, src: usize) {
         if dest == src { return; }

From 87dff2b6efe3f5f95bac41247118a5b1fbbd2eb0 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Fri, 5 Dec 2025 08:48:07 +0100
Subject: [PATCH 16/20] Perms ad__inverts default to true

---
 rust/src/envs/permutation.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index 8d028c4..bde9d89 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -64,7 +64,7 @@ impl Permutation {
         } else {
             (Vec::new(), Vec::new())
         };
-
+        
         let metrics = MetricsTracker::new(num_qubits);
         let metrics_values = metrics.snapshot();
         let success = true;
@@ -244,7 +244,7 @@ impl PyPermutationEnv {
         add_perms: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
-        let add_inverts = add_inverts.unwrap_or(false);
+        let add_inverts = add_inverts.unwrap_or(true);
         let add_perms = add_perms.unwrap_or(true);
         let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights, add_inverts, add_perms);
         let env = Box::new(env);

From e0e956dd6c0e80c6777845209e211f7f6ff0cdde Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Mon, 22 Dec 2025 10:35:30 +0100
Subject: [PATCH 17/20] Fix inverse solution tracking. Address some comments.

---
 pyproject.toml                   |   2 +-
 rust/Cargo.lock                  |   4 +-
 rust/Cargo.toml                  |   2 +-
 rust/src/envs/clifford.rs        |  33 +++++++--
 rust/src/envs/linear_function.rs | 121 ++++++++++++++++++-------------
 rust/src/envs/permutation.rs     |  72 ++++++++++++++----
 src/qiskit_gym/envs/synthesis.py |  43 +++++++++--
 src/qiskit_gym/rl/synthesis.py   |   4 +-
 8 files changed, 203 insertions(+), 78 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 684fe73..da82e3b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dynamic = ["version"]
 dependencies = [
   "qiskit>=2.1",
   "gymnasium",
-  "twisterl~=0.3.0",
+  "twisterl~=0.4.1",
 ]
 
 
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 373031f..672d221 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -466,9 +466,9 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
 name = "twisterl"
-version = "0.3.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca21419515e226009756a6e9fd094530f9e8915191f1f6499de949080bcef7e9"
+checksum = "2e20f49e0f02e09d1ddee49bde56e1ea4642427580012458cf98d6d87fd0c15c"
 dependencies = [
  "anyhow",
  "dyn-clone",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 52d3067..b73406b 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -14,7 +14,7 @@ nalgebra = "0.33.0"
 rand = "0.8.4"
 rayon = "1.1.0"
 petgraph = "0.6.5"
-twisterl = {version = "~0.3.0", features = ["python_bindings"]}
+twisterl = {version = "~0.4.1", features = ["python_bindings"]}
 
 [profile.release]
 opt-level = 3
diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 61964d0..23da12f 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -193,6 +193,9 @@ pub struct Clifford {
     metrics_weights: MetricsWeights,
     reward_value: f32,
     add_inverts: bool,
+    track_solution: bool,
+    solution: Vec<usize>,
+    solution_inv: Vec<usize>,
     inverted: bool,
 }
 
@@ -206,6 +209,7 @@ impl Clifford {
         metrics_weights: MetricsWeights,
         add_inverts: bool,
         add_perms: bool,
+        track_solution: bool,
     ) -> Self {
         let cf = CFState::new(num_qubits);
         let success = cf.solved();
@@ -234,6 +238,9 @@ impl Clifford {
             metrics_weights,
             reward_value: if success { 1.0 } else { 0.0 },
             add_inverts,
+            track_solution,
+            solution: Vec::new(),
+            solution_inv: Vec::new(),
             inverted: false,
         }
     }
@@ -290,12 +297,6 @@ impl Env for Clifford {
 
     fn reset(&mut self) {
         self.cf = CFState::new(self.cf.n);
-        self.depth = self.max_depth;
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
 
@@ -326,6 +327,14 @@ impl Env for Clifford {
             self.apply_gate_to_state(&gate);
         }
 
+        if self.track_solution {
+            if self.inverted {
+                self.solution_inv.push(action);
+            } else {
+                self.solution.push(action);
+            }
+        }
+
         self.depth = self.depth.saturating_sub(1);
         self.maybe_random_invert();
         self.success = self.solved();
@@ -357,6 +366,15 @@ impl Env for Clifford {
     fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
         (self.obs_perms.clone(), self.act_perms.clone())
     }
+
+    fn track_solution(&self) -> bool { self.track_solution }
+
+    fn solution(&self) -> Vec<usize> {
+        let mut out = Vec::with_capacity(self.solution.len() + self.solution_inv.len());
+        out.extend_from_slice(&self.solution);
+        out.extend(self.solution_inv.iter().rev().copied());
+        out
+    }
 }
 
 #[pyclass(name="CliffordEnv", extends=PyBaseEnv)]
@@ -374,6 +392,7 @@ impl PyCliffordEnv {
         metrics_weights=None,
         add_inverts=None,
         add_perms=None,
+        track_solution=None,
     ))]
     pub fn new(
         num_qubits: usize,
@@ -384,6 +403,7 @@ impl PyCliffordEnv {
         metrics_weights: Option<HashMap<String, f32>>,
         add_inverts: Option<bool>,
         add_perms: Option<bool>,
+        track_solution: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
         let env = Clifford::new(
@@ -395,6 +415,7 @@ impl PyCliffordEnv {
             weights,
             add_inverts.unwrap_or(true),
             add_perms.unwrap_or(true),
+            track_solution.unwrap_or(true),
         );
         let env = Box::new(env);
         (PyCliffordEnv, PyBaseEnv { env })
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 5b6e88d..f1f020f 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -168,6 +168,9 @@ pub struct LinearFunction {
     metrics_weights: MetricsWeights,
     reward_value: f32,
     add_inverts: bool,
+    track_solution: bool,
+    solution: Vec<usize>,
+    solution_inv: Vec<usize>,
     inverted: bool,
 }
 
@@ -182,6 +185,7 @@ impl LinearFunction {
         metrics_weights: MetricsWeights,
         add_inverts: bool,
         add_perms: bool,
+        track_solution: bool,
     ) -> Self {
         let lf = LFState::new(num_qubits);
         let success = lf.solved();
@@ -210,6 +214,9 @@ impl LinearFunction {
             metrics_weights,
             reward_value: if success { 1.0 } else { 0.0 },
             add_inverts,
+            track_solution,
+            solution: Vec::new(),
+            solution_inv: Vec::new(),
             inverted: false,
         }
     }
@@ -227,6 +234,14 @@ impl LinearFunction {
         }
     }
 
+    fn apply_gate_to_state(&mut self, gate: &Gate) {
+        match gate {
+            &Gate::CX(q1, q2) => self.lf.cx(q1, q2),
+            &Gate::SWAP(q1, q2) => self.lf.swap(q1, q2),
+            _ => {}
+        }
+    }
+
 }
 
 // This implements the necessary functions for the environment
@@ -263,20 +278,15 @@ impl Env for LinearFunction {
     fn reset(&mut self) {
         // Create an identity matrix for the initial 'lf' state
         self.lf = LFState::new(self.lf.size);
-        self.depth = self.max_depth;
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
-
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
 
         // Apply random actions based on the difficulty
         for _ in 0..self.difficulty {
             let action = action_range.sample(&mut rng);
-            self.step(action);
+            if let Some(gate) = self.gateset.get(action).cloned() {
+                self.apply_gate_to_state(&gate);
+            }
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
@@ -289,18 +299,21 @@ impl Env for LinearFunction {
     fn step(&mut self, action: usize)  {
         let mut penalty = 0.0f32;
 
-        if action < self.gateset.len() {
-            let gate = &self.gateset[action];
+         if let Some(gate) = self.gateset.get(action).cloned() {
             let previous = self.metrics_values.clone();
-            self.metrics.apply_gate(gate);
+            self.metrics.apply_gate(&gate);
             let new_metrics = self.metrics.snapshot();
             penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
             self.metrics_values = new_metrics;
 
-            match gate {
-                &Gate::CX(q1, q2) => self.lf.cx(q1, q2),
-                &Gate::SWAP(q1, q2) => self.lf.swap(q1, q2),
-                _ => {}
+            self.apply_gate_to_state(&gate);
+        }
+
+        if self.track_solution {
+           if self.inverted {
+               self.solution_inv.push(action);
+            } else {
+                self.solution.push(action);
             }
         }
 
@@ -337,45 +350,17 @@ impl Env for LinearFunction {
     fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
         (self.obs_perms.clone(), self.act_perms.clone())
     }
-}
-
-// metrics implementation shared via crate::envs::metrics
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn cx_gate_is_self_inverse() {
-        let gateset = vec![Gate::CX(0, 1)];
-        let metrics_weights = MetricsWeights::default();
-        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true, true);
-        env.depth = env.max_depth;
 
-        env.step(0);
-        assert!(!env.solved());
+    fn track_solution(&self) -> bool { self.track_solution }
 
-        env.step(0);
-        assert!(env.solved());
-        assert!(env.reward() <= 1.0);
-    }
-
-    #[test]
-    fn lfstate_inversion_roundtrip() {
-        let mut state = LFState::new(3);
-        state.cx(0, 1);
-        state.swap(1, 2);
-
-        let original = state.data.clone();
-        state.invert();
-        state.invert();
-
-        assert_eq!(state.data, original, "double inversion should restore the matrix");
-        assert!(!state.solved());
+    fn solution(&self) -> Vec<usize> {
+        let mut out = Vec::with_capacity(self.solution.len() + self.solution_inv.len());
+        out.extend_from_slice(&self.solution);
+        out.extend(self.solution_inv.iter().rev().copied());
+        out
     }
 }
 
-
 #[pyclass(name="LinearFunctionEnv", extends=PyBaseEnv)]
 pub struct PyLinearFunctionEnv;
 
@@ -391,6 +376,7 @@ impl PyLinearFunctionEnv {
         metrics_weights=None,
         add_inverts=None,
         add_perms=None,
+        track_solution=None,
     ))]
     pub fn new(
         num_qubits: usize,
@@ -401,6 +387,7 @@ impl PyLinearFunctionEnv {
         metrics_weights: Option<HashMap<String, f32>>,
         add_inverts: Option<bool>,
         add_perms: Option<bool>,
+        track_solution: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
         let env = LinearFunction::new(
@@ -412,8 +399,44 @@ impl PyLinearFunctionEnv {
             weights,
             add_inverts.unwrap_or(true),
             add_perms.unwrap_or(true),
+            track_solution.unwrap_or(true)
         );
         let env = Box::new(env);
         (PyLinearFunctionEnv, PyBaseEnv { env })
     }
 }
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cx_gate_is_self_inverse() {
+        let gateset = vec![Gate::CX(0, 1)];
+        let metrics_weights = MetricsWeights::default();
+        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true, true, true);
+        env.depth = env.max_depth;
+
+        env.step(0);
+        assert!(!env.solved());
+
+        env.step(0);
+        assert!(env.solved());
+        assert!(env.reward() <= 1.0);
+    }
+
+    #[test]
+    fn lfstate_inversion_roundtrip() {
+        let mut state = LFState::new(3);
+        state.cx(0, 1);
+        state.swap(1, 2);
+
+        let original = state.data.clone();
+        state.invert();
+        state.invert();
+
+        assert_eq!(state.data, original, "double inversion should restore the matrix");
+        assert!(!state.solved());
+    }
+}
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index bde9d89..e168f56 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -39,11 +39,15 @@ pub struct Permutation {
     pub max_depth: usize,
     pub obs_perms: Vec<Vec<usize>>,
     pub act_perms: Vec<Vec<usize>>,
-    pub add_inverts: bool,
     metrics: MetricsTracker,
     metrics_values: MetricsCounts,
     metrics_weights: MetricsWeights,
     reward_value: f32,
+    pub add_inverts: bool,
+    track_solution: bool,
+    solution: Vec<usize>,
+    solution_inv: Vec<usize>,    
+    inverted: bool,
 }
 
 
@@ -57,6 +61,7 @@ impl Permutation {
         metrics_weights: MetricsWeights,
         add_inverts: bool,
         add_perms: bool,
+        track_solution: bool,
     ) -> Self {
         // Only compute symmetries if enabled
         let (obs_perms, act_perms) = if add_perms {
@@ -79,11 +84,15 @@ impl Permutation {
             max_depth,
             obs_perms,
             act_perms,
-            add_inverts,
             metrics,
             metrics_values,
             metrics_weights,
             reward_value: 1.0,
+            add_inverts,
+            track_solution,
+            solution: Vec::new(),
+            solution_inv: Vec::new(),
+            inverted: false,
         }
     }
 
@@ -106,6 +115,7 @@ impl Permutation {
         let mut rng = rand::thread_rng();
         if rng.gen_bool(0.5) {
             self.state = Self::invert_perm(&self.state);
+            self.inverted = !self.inverted;
         }
     }
 
@@ -151,30 +161,30 @@ impl Env for Permutation {
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn reset(&mut self) {
         // Reset the state to the target
         self.state = (0..self.num_qubits).collect();
-        self.depth = self.max_depth;
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-
         let mut rng = rand::thread_rng();
         let action_range = Uniform::new(0, self.num_actions());
 
         // Apply random actions based on the difficulty
         for _ in 0..self.difficulty {
             let action = action_range.sample(&mut rng);
-            self.step(action);
+            let gate = &self.gateset[action];
+            match gate {
+                Gate::SWAP(q1, q2) => (self.state[*q2], self.state[*q1]) = (self.state[*q1], self.state[*q2]),
+                _ => {}
+            }
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
         self.success = self.solved();
         self.metrics.reset();
         self.metrics_values = self.metrics.snapshot();
         self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
     }
 
     fn step(&mut self, action: usize)  {
@@ -192,6 +202,14 @@ impl Env for Permutation {
                 Gate::SWAP(q1, q2) => (self.state[*q2], self.state[*q1]) = (self.state[*q1], self.state[*q2]),
                 _ => {}
             }
+
+            if self.track_solution {
+               if self.inverted {
+                   self.solution_inv.push(action);
+                } else {
+                    self.solution.push(action);
+                }
+            }
         }
 
         self.maybe_random_invert();
@@ -223,6 +241,15 @@ impl Env for Permutation {
     fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
         (self.obs_perms.clone(), self.act_perms.clone())
     }
+
+    fn track_solution(&self) -> bool { self.track_solution }
+
+    fn solution(&self) -> Vec<usize> {
+        let mut out = Vec::with_capacity(self.solution.len() + self.solution_inv.len());
+        out.extend_from_slice(&self.solution);
+        out.extend(self.solution_inv.iter().rev().copied());
+        out
+    }
 }
 
 
@@ -232,7 +259,17 @@ pub struct PyPermutationEnv;
 #[pymethods]
 impl PyPermutationEnv {
     #[new]
-    #[pyo3(signature = (num_qubits, difficulty, gateset, depth_slope, max_depth, metrics_weights=None, add_inverts=None, add_perms=None))]
+    #[pyo3(signature = (
+        num_qubits,
+        difficulty,
+        gateset,
+        depth_slope,
+        max_depth,
+        metrics_weights=None,
+        add_inverts=None,
+        add_perms=None,
+        track_solution=None,
+    ))]
     pub fn new(
         num_qubits: usize,
         difficulty: usize,
@@ -242,11 +279,20 @@ impl PyPermutationEnv {
         metrics_weights: Option<HashMap<String, f32>>,
         add_inverts: Option<bool>,
         add_perms: Option<bool>,
+        track_solution: Option<bool>,
     ) -> (Self, PyBaseEnv) {
         let weights = MetricsWeights::from_hashmap(metrics_weights);
-        let add_inverts = add_inverts.unwrap_or(true);
-        let add_perms = add_perms.unwrap_or(true);
-        let env = Permutation::new(num_qubits, difficulty, gateset, depth_slope, max_depth, weights, add_inverts, add_perms);
+        let env = Permutation::new(
+            num_qubits,
+            difficulty,
+            gateset,
+            depth_slope,
+            max_depth,
+            weights,
+            add_inverts.unwrap_or(true),
+            add_perms.unwrap_or(true),
+            track_solution.unwrap_or(true)
+        );
         let env = Box::new(env);
         (PyPermutationEnv, PyBaseEnv { env })
     }
diff --git a/src/qiskit_gym/envs/synthesis.py b/src/qiskit_gym/envs/synthesis.py
index 9f040f2..a6fa98f 100644
--- a/src/qiskit_gym/envs/synthesis.py
+++ b/src/qiskit_gym/envs/synthesis.py
@@ -100,6 +100,8 @@ def from_json(cls, env_config):
     def get_state(cls, input):
         pass
 
+    def post_process_synthesis(self, synth_circuit: QuantumCircuit, input_state):
+        return synth_circuit
 
 # ---------------------------------------
 # ------------- Env classes -------------
@@ -110,6 +112,22 @@ def get_state(cls, input):
 
 CliffordEnv = gym_adapter(qiskit_gym_rs.CliffordEnv)
 
+def _solve_phases(clifford_cpy):
+    num_qubits = clifford_cpy.num_qubits
+    out = QuantumCircuit(num_qubits)
+
+    # Add the phases (Pauli gates) to the Clifford circuit
+    for qubit in range(num_qubits):
+        stab = clifford_cpy.stab_phase[qubit]
+        destab = clifford_cpy.destab_phase[qubit]
+        if destab and stab:
+            out.y(qubit)
+        elif not destab and stab:
+            out.x(qubit)
+        elif destab and not stab:
+            out.z(qubit)
+
+    return out
 
 class CliffordGym(CliffordEnv, BaseSynthesisEnv):
     cls_name = "CliffordEnv"
@@ -125,6 +143,7 @@ def __init__(
         metrics_weights: dict[str, float] | None = None,
         add_inverts: bool = True,
         add_perms: bool = True,
+        track_solution: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -135,12 +154,21 @@ def __init__(
             "metrics_weights": metrics_weights,
             "add_inverts": add_inverts,
             "add_perms": add_perms,
+            "track_solution": track_solution,
         })
 
     def get_state(self, input: QuantumCircuit | Clifford):
         if isinstance(input, QuantumCircuit):
             input = Clifford(input)
         return input.adjoint().tableau[:, :-1].T.flatten().astype(int).tolist()
+    
+    def post_process_synthesis(self, synth_circuit: QuantumCircuit, input):
+        synth_circuit = synth_circuit.inverse()
+        if isinstance(input, QuantumCircuit):
+            input = Clifford(input)
+        dcliff = Clifford(synth_circuit).compose(input)
+        out = _solve_phases(dcliff).compose(synth_circuit).inverse()
+        return out
 
 
 # ------------- Linear Function -------------
@@ -163,6 +191,7 @@ def __init__(
         metrics_weights: dict[str, float] | None = None,
         add_inverts: bool = True,
         add_perms: bool = True,
+        track_solution: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -173,15 +202,15 @@ def __init__(
             "metrics_weights": metrics_weights,
             "add_inverts": add_inverts,
             "add_perms": add_perms,
+            "track_solution": track_solution,
         })
     
     def get_state(self, input: QuantumCircuit | LinearFunction):
-        if isinstance(input, QuantumCircuit):
-            input = LinearFunction(input)
-        elif not isinstance(input, LinearFunction):
-            input = LinearFunction(Clifford(input))
+        # This returns the inverse permutation to get the right 
+        # synthesized circuit at output, instead of its inverse.
+        input = LinearFunction(Clifford(input).adjoint())
         return np.array(input.linear).flatten().astype(int).tolist()
-
+        
 
 # ------------- Permutation -------------
 from qiskit.circuit.library.generalized_gates import PermutationGate
@@ -203,6 +232,7 @@ def __init__(
         metrics_weights: dict[str, float] | None = None,
         add_inverts: bool = True,
         add_perms: bool = True,
+        track_solution: bool = True,
     ):
         super().__init__(**{
             "num_qubits": num_qubits,
@@ -213,6 +243,7 @@ def __init__(
             "metrics_weights": metrics_weights,
             "add_inverts": add_inverts,
             "add_perms": add_perms,
+            "track_solution": track_solution,
         })
 
     def get_state(self, input: QuantumCircuit | PermutationGate | Iterable[int]):
@@ -221,6 +252,8 @@ def get_state(self, input: QuantumCircuit | PermutationGate | Iterable[int]):
         elif isinstance(input, PermutationGate):
             input = input.pattern
 
+        # This returns the inverse permutation to get the right 
+        # synthesized circuit at output, instead of its inverse.
         return np.argsort(np.array(input)).astype(int).tolist()
 
 
diff --git a/src/qiskit_gym/rl/synthesis.py b/src/qiskit_gym/rl/synthesis.py
index 5703f18..461ac38 100644
--- a/src/qiskit_gym/rl/synthesis.py
+++ b/src/qiskit_gym/rl/synthesis.py
@@ -125,10 +125,12 @@ def synth(
             state, deterministic, num_searches, num_mcts_searches, C, max_expand_depth
         )
         if actions is not None:
-            return gate_list_to_circuit(
+            synth_circuit = gate_list_to_circuit(
                 [self.env_config["gateset"][a] for a in actions],
                 num_qubits=self.env.config["num_qubits"],
             )
+            synth_circuit = self.env.post_process_synthesis(synth_circuit, input)
+            return synth_circuit
 
     def learn(self, initial_difficulty=1, num_iterations=int(1e10), tb_path=None):
         if tb_path is not None:

From 4bdbb13988af2e160d1630314ba67669011a8ac1 Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Mon, 22 Dec 2025 12:04:50 +0100
Subject: [PATCH 18/20] Remove low value tests

---
 rust/src/envs/linear_function.rs | 35 --------------------------------
 1 file changed, 35 deletions(-)

diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index f1f020f..26bb137 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -405,38 +405,3 @@ impl PyLinearFunctionEnv {
         (PyLinearFunctionEnv, PyBaseEnv { env })
     }
 }
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn cx_gate_is_self_inverse() {
-        let gateset = vec![Gate::CX(0, 1)];
-        let metrics_weights = MetricsWeights::default();
-        let mut env = LinearFunction::new(2, 1, gateset, 2, 8, metrics_weights, true, true, true);
-        env.depth = env.max_depth;
-
-        env.step(0);
-        assert!(!env.solved());
-
-        env.step(0);
-        assert!(env.solved());
-        assert!(env.reward() <= 1.0);
-    }
-
-    #[test]
-    fn lfstate_inversion_roundtrip() {
-        let mut state = LFState::new(3);
-        state.cx(0, 1);
-        state.swap(1, 2);
-
-        let original = state.data.clone();
-        state.invert();
-        state.invert();
-
-        assert_eq!(state.data, original, "double inversion should restore the matrix");
-        assert!(!state.solved());
-    }
-}

From db7e72f5802bb732322a600ff8c34b5240b48a0e Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Tue, 23 Dec 2025 12:27:09 +0100
Subject: [PATCH 19/20] Delegate model load to twisterl

---
 src/qiskit_gym/rl/synthesis.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/qiskit_gym/rl/synthesis.py b/src/qiskit_gym/rl/synthesis.py
index 461ac38..42adb99 100644
--- a/src/qiskit_gym/rl/synthesis.py
+++ b/src/qiskit_gym/rl/synthesis.py
@@ -17,7 +17,7 @@
 import torch
 from torch.utils.tensorboard import SummaryWriter
 
-from twisterl.utils import dynamic_import
+from twisterl.utils import dynamic_import, load_checkpoint
 from qiskit_gym.rl.configs import (
     AlphaZeroConfig,
     PPOConfig,
@@ -103,9 +103,7 @@ def init_algorithm(self, model_path=None):
             act_perms=act_perms,
         )
         if model_path is not None:
-            model.load_state_dict(
-                torch.load(open(model_path, "rb"), map_location=torch.device("cpu"))
-            )
+            model.load_state_dict(load_checkpoint(model_path))
 
         return self.algorithm_cls(
             self.env._raw_env, model, self.rl_config.to_json(), None

From 32542543d4f0441e01d2fc6dd772c5eac2ac379d Mon Sep 17 00:00:00 2001
From: victor-villar <villar@ibm.com>
Date: Wed, 7 Jan 2026 13:19:28 +0100
Subject: [PATCH 20/20] Address comments. Solution reset and reward info

---
 README.md                        |  8 +++++++-
 rust/src/envs/clifford.rs        | 24 ++++++++++++++----------
 rust/src/envs/linear_function.rs | 25 ++++++++++++++-----------
 rust/src/envs/permutation.rs     | 24 ++++++++++++++----------
 4 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index c9620b6..83b9b6a 100644
--- a/README.md
+++ b/README.md
@@ -87,6 +87,12 @@ random_permutation = np.random.permutation(9)
 optimized_circuit = rls.synth(random_permutation, num_searches=1000)
 ```
 
+## 🏅 Reward and Gate Penalties (at a glance)
+- Each step returns `reward = (1.0 if solved else 0.0) - penalty`.
+- `penalty` is the weighted increase in cost metrics after the chosen gate: CNOT count, CNOT layers, total layers, and total gates.
+- Default weights (`MetricsWeights`) are `n_cnots=0.01`, `n_layers_cnots=0.0`, `n_layers=0.0`, `n_gates=0.0001`; configure per env via `metrics_weights`.
+- Metrics accumulate over the episode; once the target is solved, the positive reward is offset by the penalties from any extra cost incurred.
+
 ## 🤝 Contributing
 
 We welcome contributions! Whether you're adding new synthesis problems, improving RL algorithms, or enhancing documentation - every contribution helps advance quantum computing research.
@@ -100,4 +106,4 @@ Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for d
 
 - Kremer, D., Villar, V., Paik, H., Duran, I., Faro, I., & Cruz-Benito, J. (2024). Practical and efficient quantum circuit synthesis and transpiling with reinforcement learning. arXiv preprint [arXiv:2405.13196](https://arxiv.org/abs/2405.13196).
 
-- Dubal, A., Kremer, D., Martiel, S., Villar, V., Wang, D., & Cruz-Benito, J. (2025). Pauli Network Circuit Synthesis with Reinforcement Learning. arXiv preprint [arXiv:2503.14448](https://arxiv.org/abs/2503.14448). 
\ No newline at end of file
+- Dubal, A., Kremer, D., Martiel, S., Villar, V., Wang, D., & Cruz-Benito, J. (2025). Pauli Network Circuit Synthesis with Reinforcement Learning. arXiv preprint [arXiv:2503.14448](https://arxiv.org/abs/2503.14448). 
diff --git a/rust/src/envs/clifford.rs b/rust/src/envs/clifford.rs
index 23da12f..a798745 100644
--- a/rust/src/envs/clifford.rs
+++ b/rust/src/envs/clifford.rs
@@ -268,6 +268,18 @@ impl Clifford {
             self.inverted = !self.inverted;
         }
     }
+
+    fn reset_internals(&mut self) {
+        self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
+        if self.track_solution {
+            self.solution_inv = Vec::new();
+            self.solution = Vec::new();
+        }
+    }
 }
 
 impl Env for Clifford {
@@ -288,11 +300,7 @@ impl Env for Clifford {
         // Expecting a flattened 2N x 2N boolean matrix encoded as i64s (>0 => true)
         self.cf.data = state.iter().map(|&x| x > 0).collect();
         self.depth = self.max_depth;
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.reset_internals();
     }
 
     fn reset(&mut self) {
@@ -307,11 +315,7 @@ impl Env for Clifford {
             }
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.reset_internals();
     }
 
     fn step(&mut self, action: usize) {
diff --git a/rust/src/envs/linear_function.rs b/rust/src/envs/linear_function.rs
index 26bb137..7cf7eb0 100644
--- a/rust/src/envs/linear_function.rs
+++ b/rust/src/envs/linear_function.rs
@@ -242,6 +242,17 @@ impl LinearFunction {
         }
     }
 
+    fn reset_internals(&mut self) {
+        self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
+        if self.track_solution {
+            self.solution_inv = Vec::new();
+            self.solution = Vec::new();
+        }
+    }
 }
 
 // This implements the necessary functions for the environment
@@ -268,11 +279,7 @@ impl Env for LinearFunction {
     fn set_state(&mut self, state: Vec<i64>) {
         self.lf.data = state.iter().map(|&x| x>0).collect();
         self.depth = self.max_depth;
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.reset_internals();
     }
 
     fn reset(&mut self) {
@@ -288,12 +295,8 @@ impl Env for LinearFunction {
                 self.apply_gate_to_state(&gate);
             }
         }
-        self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.depth = (self.depth_slope * self.difficulty).min(self.max_depth); 
+        self.reset_internals();
     }
 
     fn step(&mut self, action: usize)  {
diff --git a/rust/src/envs/permutation.rs b/rust/src/envs/permutation.rs
index e168f56..ae70e8f 100644
--- a/rust/src/envs/permutation.rs
+++ b/rust/src/envs/permutation.rs
@@ -130,6 +130,18 @@ impl Permutation {
     pub fn get_state(&self) -> Vec<usize> {
         self.state.clone()
     }
+
+    fn reset_internals(&mut self) {
+        self.success = self.solved();
+        self.metrics.reset();
+        self.metrics_values = self.metrics.snapshot();
+        self.reward_value = if self.success { 1.0 } else { 0.0 };
+        self.inverted = false;
+        if self.track_solution {
+            self.solution_inv = Vec::new();
+            self.solution = Vec::new();
+        }
+    }
 }
 
 // This implements the necessary functions for the environment
@@ -157,11 +169,7 @@ impl Env for Permutation {
         self.state = state.iter().map(|&x| x as usize).collect();
 
         self.depth = self.max_depth;  
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.reset_internals();
     }
 
     fn reset(&mut self) {
@@ -180,11 +188,7 @@ impl Env for Permutation {
             }
         }
         self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
-        self.success = self.solved();
-        self.metrics.reset();
-        self.metrics_values = self.metrics.snapshot();
-        self.reward_value = if self.success { 1.0 } else { 0.0 };
-        self.inverted = false;
+        self.reset_internals();
     }
 
     fn step(&mut self, action: usize)  {