Skip to content

Commit 98688a5

Browse files
Add metrics-based rewards, symmetry permutations, and random inversions (#8)
* Perms in linear functions * Perms in cliffords and permutations * Added missing file * Add gate metrics to lf * Metrics in cliffords and permutations * Test inverts in cliffords * Metrics in envs * inv in perms * Perms optional * Linfunc inversions in rust, not python * Inverts default to true for permutations * Update twisterl versions * Fix a few issues * Fix permutations/twsit generation performance * Recover deleted code * Perms ad__inverts default to true * Fix inverse solution tracking. Address some comments. * Remove low value tests * Delegate model load to twisterl * Address comments. Solution reset and reward info
1 parent 3e19043 commit 98688a5

File tree

12 files changed

+1144
-92
lines changed

12 files changed

+1144
-92
lines changed

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ random_permutation = np.random.permutation(9)
8787
optimized_circuit = rls.synth(random_permutation, num_searches=1000)
8888
```
8989

90+
## 🏅 Reward and Gate Penalties (at a glance)
91+
- Each step returns `reward = (1.0 if solved else 0.0) - penalty`.
92+
- `penalty` is the weighted increase in cost metrics after the chosen gate: CNOT count, CNOT layers, total layers, and total gates.
93+
- Default weights (`MetricsWeights`) are `n_cnots=0.01`, `n_layers_cnots=0.0`, `n_layers=0.0`, `n_gates=0.0001`; configure per env via `metrics_weights`.
94+
- Metrics accumulate over the episode; once the target is solved, the positive reward is offset by the penalties from any extra cost incurred.
95+
9096
## 🤝 Contributing
9197

9298
We welcome contributions! Whether you're adding new synthesis problems, improving RL algorithms, or enhancing documentation - every contribution helps advance quantum computing research.
@@ -100,4 +106,4 @@ Licensed under the Apache License, Version 2.0. See [LICENSE](LICENSE.txt) for d
100106

101107
- Kremer, D., Villar, V., Paik, H., Duran, I., Faro, I., & Cruz-Benito, J. (2024). Practical and efficient quantum circuit synthesis and transpiling with reinforcement learning. arXiv preprint [arXiv:2405.13196](https://arxiv.org/abs/2405.13196).
102108

103-
- Dubal, A., Kremer, D., Martiel, S., Villar, V., Wang, D., & Cruz-Benito, J. (2025). Pauli Network Circuit Synthesis with Reinforcement Learning. arXiv preprint [arXiv:2503.14448](https://arxiv.org/abs/2503.14448).
109+
- Dubal, A., Kremer, D., Martiel, S., Villar, V., Wang, D., & Cruz-Benito, J. (2025). Pauli Network Circuit Synthesis with Reinforcement Learning. arXiv preprint [arXiv:2503.14448](https://arxiv.org/abs/2503.14448).

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dynamic = ["version"]
1818
dependencies = [
1919
"qiskit>=2.1",
2020
"gymnasium",
21-
"twisterl",
21+
"twisterl~=0.4.1",
2222
]
2323

2424

rust/Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ nalgebra = "0.33.0"
1414
rand = "0.8.4"
1515
rayon = "1.1.0"
1616
petgraph = "0.6.5"
17-
twisterl = {package = "twisterl-rs", version = "0.1.0", features = ["python_bindings"]}
17+
twisterl = {version = "~0.4.1", features = ["python_bindings"]}
1818

1919
[profile.release]
2020
opt-level = 3

rust/src/envs/clifford.rs

Lines changed: 187 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,15 @@ that they have been altered from the originals.
1414
use pyo3::prelude::*;
1515

1616
use rand::distributions::{Distribution, Uniform};
17+
use rand::Rng;
1718

1819
use twisterl::rl::env::Env;
19-
use twisterl::python_interface::env::{PyBaseEnv, get_env_ref, get_env_mut};
20+
use twisterl::python_interface::env::PyBaseEnv;
2021

2122
use crate::envs::common::Gate;
23+
use crate::envs::metrics::{MetricsCounts, MetricsTracker, MetricsWeights};
24+
use crate::envs::symmetry::compute_twists_clifford;
25+
use std::collections::HashMap;
2226

2327

2428
#[derive(Clone)]
@@ -139,6 +143,35 @@ impl CFState {
139143
}
140144
true
141145
}
146+
147+
fn inverse(&self) -> Self {
148+
let dim = self.dim();
149+
let mut mat = self.clone();
150+
let mut inv = CFState::new(self.n);
151+
152+
for col in 0..dim {
153+
if !mat.get(col, col) {
154+
let pivot = ((col + 1)..dim).find(|&row| mat.get(row, col));
155+
let pivot = pivot.expect("CFState is singular; cannot invert");
156+
mat.swap_rows(col, pivot);
157+
inv.swap_rows(col, pivot);
158+
}
159+
160+
for row in 0..dim {
161+
if row != col && mat.get(row, col) {
162+
mat.row_xor(row, col);
163+
inv.row_xor(row, col);
164+
}
165+
}
166+
}
167+
168+
debug_assert!(mat.solved(), "CFState inverse computation failed");
169+
inv
170+
}
171+
172+
fn invert(&mut self) {
173+
*self = self.inverse();
174+
}
142175
}
143176

144177
// -------- Env: Clifford synthesis over the symplectic tableau (phase ignored) --------
@@ -153,6 +186,17 @@ pub struct Clifford {
153186
pub gateset: Vec<Gate>,
154187
pub depth_slope: usize,
155188
pub max_depth: usize,
189+
pub obs_perms: Vec<Vec<usize>>,
190+
pub act_perms: Vec<Vec<usize>>,
191+
metrics: MetricsTracker,
192+
metrics_values: MetricsCounts,
193+
metrics_weights: MetricsWeights,
194+
reward_value: f32,
195+
add_inverts: bool,
196+
track_solution: bool,
197+
solution: Vec<usize>,
198+
solution_inv: Vec<usize>,
199+
inverted: bool,
156200
}
157201

158202
impl Clifford {
@@ -162,12 +206,80 @@ impl Clifford {
162206
gateset: Vec<Gate>,
163207
depth_slope: usize,
164208
max_depth: usize,
209+
metrics_weights: MetricsWeights,
210+
add_inverts: bool,
211+
add_perms: bool,
212+
track_solution: bool,
165213
) -> Self {
166214
let cf = CFState::new(num_qubits);
167215
let success = cf.solved();
168-
Clifford { cf, depth: 1, success, difficulty, gateset, depth_slope, max_depth }
216+
217+
// Only compute symmetries if enabled
218+
let (obs_perms, act_perms) = if add_perms {
219+
compute_twists_clifford(num_qubits, &gateset)
220+
} else {
221+
(Vec::new(), Vec::new())
222+
};
223+
224+
let metrics = MetricsTracker::new(num_qubits);
225+
let metrics_values = metrics.snapshot();
226+
Clifford {
227+
cf,
228+
depth: 1,
229+
success,
230+
difficulty,
231+
gateset,
232+
depth_slope,
233+
max_depth,
234+
obs_perms,
235+
act_perms,
236+
metrics,
237+
metrics_values,
238+
metrics_weights,
239+
reward_value: if success { 1.0 } else { 0.0 },
240+
add_inverts,
241+
track_solution,
242+
solution: Vec::new(),
243+
solution_inv: Vec::new(),
244+
inverted: false,
245+
}
169246
}
170247
pub fn solved(&self) -> bool { self.cf.solved() }
248+
249+
fn apply_gate_to_state(&mut self, gate: &Gate) {
250+
match gate {
251+
Gate::H(q) => self.cf.h(*q),
252+
Gate::S(q) => self.cf.s(*q),
253+
Gate::Sdg(q) => self.cf.sdg(*q), // identical to S modulo global phase (ignored)
254+
Gate::SX(q) => self.cf.sx(*q),
255+
Gate::SXdg(q) => self.cf.sxdg(*q), // identical to SX modulo global phase (ignored)
256+
Gate::CX(c, t) => self.cf.cx(*c, *t),
257+
Gate::CZ(a, b) => self.cf.cz(*a, *b),
258+
Gate::SWAP(a, b) => self.cf.swap(*a, *b),
259+
}
260+
}
261+
262+
fn maybe_random_invert(&mut self) {
263+
if !self.add_inverts {
264+
return;
265+
}
266+
if rand::thread_rng().gen_bool(0.5) {
267+
self.cf.invert();
268+
self.inverted = !self.inverted;
269+
}
270+
}
271+
272+
fn reset_internals(&mut self) {
273+
self.success = self.solved();
274+
self.metrics.reset();
275+
self.metrics_values = self.metrics.snapshot();
276+
self.reward_value = if self.success { 1.0 } else { 0.0 };
277+
self.inverted = false;
278+
if self.track_solution {
279+
self.solution_inv = Vec::new();
280+
self.solution = Vec::new();
281+
}
282+
}
171283
}
172284

173285
impl Env for Clifford {
@@ -188,38 +300,50 @@ impl Env for Clifford {
188300
// Expecting a flattened 2N x 2N boolean matrix encoded as i64s (>0 => true)
189301
self.cf.data = state.iter().map(|&x| x > 0).collect();
190302
self.depth = self.max_depth;
191-
self.success = self.solved();
303+
self.reset_internals();
192304
}
193305

194306
fn reset(&mut self) {
195307
self.cf = CFState::new(self.cf.n);
196-
self.depth = self.max_depth;
197-
self.success = self.solved();
198-
199308
let mut rng = rand::thread_rng();
200309
let action_range = Uniform::new(0, self.num_actions());
201310

202311
for _ in 0..self.difficulty {
203312
let action = action_range.sample(&mut rng);
204-
self.step(action);
313+
if let Some(gate) = self.gateset.get(action).cloned() {
314+
self.apply_gate_to_state(&gate);
315+
}
205316
}
206317
self.depth = (self.depth_slope * self.difficulty).min(self.max_depth);
207-
self.success = self.solved();
318+
self.reset_internals();
208319
}
209320

210321
fn step(&mut self, action: usize) {
211-
match self.gateset[action] {
212-
Gate::H(q) => self.cf.h(q),
213-
Gate::S(q) => self.cf.s(q),
214-
Gate::Sdg(q) => self.cf.sdg(q), // identical to S modulo global phase (ignored)
215-
Gate::SX(q) => self.cf.sx(q),
216-
Gate::SXdg(q) => self.cf.sxdg(q), // identical to SX modulo global phase (ignored)
217-
Gate::CX(c, t) => self.cf.cx(c, t),
218-
Gate::CZ(a, b) => self.cf.cz(a, b),
219-
Gate::SWAP(a,b) => self.cf.swap(a, b),
322+
let mut penalty = 0.0f32;
323+
324+
if let Some(gate) = self.gateset.get(action).cloned() {
325+
let previous = self.metrics_values.clone();
326+
self.metrics.apply_gate(&gate);
327+
let new_metrics = self.metrics.snapshot();
328+
penalty = new_metrics.weighted_delta(&previous, &self.metrics_weights);
329+
self.metrics_values = new_metrics;
330+
331+
self.apply_gate_to_state(&gate);
332+
}
333+
334+
if self.track_solution {
335+
if self.inverted {
336+
self.solution_inv.push(action);
337+
} else {
338+
self.solution.push(action);
339+
}
220340
}
341+
221342
self.depth = self.depth.saturating_sub(1);
343+
self.maybe_random_invert();
222344
self.success = self.solved();
345+
let achieved = if self.success { 1.0 } else { 0.0 };
346+
self.reward_value = achieved - penalty;
223347
}
224348

225349
fn masks(&self) -> Vec<bool> {
@@ -228,14 +352,10 @@ impl Env for Clifford {
228352

229353
fn is_final(&self) -> bool { self.depth == 0 || self.success }
230354

231-
fn reward(&self) -> f32 {
232-
if self.success {
233-
1.0
234-
} else if self.depth == 0 {
235-
-0.5
236-
} else {
237-
-0.5 / (self.max_depth as f32)
238-
}
355+
fn reward(&self) -> f32 { self.reward_value }
356+
357+
fn success(&self) -> bool {
358+
self.success
239359
}
240360

241361
fn observe(&self) -> Vec<usize> {
@@ -246,6 +366,19 @@ impl Env for Clifford {
246366
.filter_map(|(i, &v)| if v { Some(i) } else { None })
247367
.collect()
248368
}
369+
370+
fn twists(&self) -> (Vec<Vec<usize>>, Vec<Vec<usize>>) {
371+
(self.obs_perms.clone(), self.act_perms.clone())
372+
}
373+
374+
fn track_solution(&self) -> bool { self.track_solution }
375+
376+
fn solution(&self) -> Vec<usize> {
377+
let mut out = Vec::with_capacity(self.solution.len() + self.solution_inv.len());
378+
out.extend_from_slice(&self.solution);
379+
out.extend(self.solution_inv.iter().rev().copied());
380+
out
381+
}
249382
}
250383

251384
#[pyclass(name="CliffordEnv", extends=PyBaseEnv)]
@@ -254,15 +387,41 @@ pub struct PyCliffordEnv;
254387
#[pymethods]
255388
impl PyCliffordEnv {
256389
#[new]
390+
#[pyo3(signature = (
391+
num_qubits,
392+
difficulty,
393+
gateset,
394+
depth_slope,
395+
max_depth,
396+
metrics_weights=None,
397+
add_inverts=None,
398+
add_perms=None,
399+
track_solution=None,
400+
))]
257401
pub fn new(
258402
num_qubits: usize,
259403
difficulty: usize,
260404
gateset: Vec<Gate>,
261405
depth_slope: usize,
262-
max_depth: usize
406+
max_depth: usize,
407+
metrics_weights: Option<HashMap<String, f32>>,
408+
add_inverts: Option<bool>,
409+
add_perms: Option<bool>,
410+
track_solution: Option<bool>,
263411
) -> (Self, PyBaseEnv) {
264-
let env = Clifford::new(num_qubits, difficulty, gateset, depth_slope, max_depth);
412+
let weights = MetricsWeights::from_hashmap(metrics_weights);
413+
let env = Clifford::new(
414+
num_qubits,
415+
difficulty,
416+
gateset,
417+
depth_slope,
418+
max_depth,
419+
weights,
420+
add_inverts.unwrap_or(true),
421+
add_perms.unwrap_or(true),
422+
track_solution.unwrap_or(true),
423+
);
265424
let env = Box::new(env);
266425
(PyCliffordEnv, PyBaseEnv { env })
267426
}
268-
}
427+
}

0 commit comments

Comments
 (0)