munich-quantum-toolkit · Shaobo-Zhou · Mar 29, 2025 · Apr 10, 2025 · Jul 29, 2025 · Jul 29, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -139,7 +139,7 @@ repos:
       - id: ty-check
         name: ty check
         entry: uv run ty check
-        language: unsupported
+        language: system
         require_serial: true
         types_or: [python, pyi, jupyter]
         exclude: ^(docs/)

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,8 +11,12 @@ This project adheres to [Semantic Versioning], with the exception that minor rel
 
 ### Changed
 
+- ✨ Improve RL reward design by adding intermediate rewards ([#526]) ([**@Shaobo-Zhou**])
 - 🔧 Replace `mypy` with `ty` ([#572]) ([**@denialhaag**])
 - 🐛 Fix instruction duration unit in estimated success probability calculation ([#445]) ([**@Shaobo-Zhou**])
+
+### Removed
+
 - ✨ Remove support for custom names of trained models ([#489]) ([**@bachase**])
 - 🔥 Drop support for x86 macOS systems ([#421]) ([**@denialhaag**])
 
@@ -47,6 +51,7 @@ _📚 Refer to the [GitHub Release Notes](https://github.com/munich-quantum-tool
 <!-- PR links -->
 
 [#572]: https://github.com/munich-quantum-toolkit/predictor/pull/572
+[#526]: https://github.com/munich-quantum-toolkit/predictor/pull/526
 [#489]: https://github.com/munich-quantum-toolkit/predictor/pull/489
 [#445]: https://github.com/munich-quantum-toolkit/predictor/pull/445
 [#421]: https://github.com/munich-quantum-toolkit/predictor/pull/421

diff --git a/src/mqt/predictor/ml/predictor.py b/src/mqt/predictor/ml/predictor.py
@@ -400,7 +400,6 @@ def train_random_forest_model(
             training_data = self._get_prepared_training_data()
         num_cv = min(len(training_data.y_train), 5)
         mdl = GridSearchCV(mdl, tree_param, cv=num_cv, n_jobs=8).fit(training_data.X_train, training_data.y_train)
-
         joblib_dump(mdl, save_mdl_path)
         logger.info("Random Forest model is trained and saved.")
 

diff --git a/src/mqt/predictor/reward.py b/src/mqt/predictor/reward.py
@@ -204,7 +204,7 @@ def estimated_success_probability(qc: QuantumCircuit, device: Target, precision:
                 if first_qubit_idx not in active_qubits:
                     continue
 
-                dt = device.dt  # instruction durations are stored in unit dt
+                dt = device.dt or 1.0  # discrete time unit; fallback to 1.0 if unavailable
                 res *= np.exp(
                     -instruction.duration
                     * dt

diff --git a/src/mqt/predictor/rl/actions.py b/src/mqt/predictor/rl/actions.py
@@ -86,6 +86,7 @@
 
     from bqskit import Circuit
     from pytket._tket.passes import BasePass as tket_BasePass
+    from qiskit.passmanager import PropertySet
     from qiskit.transpiler.basepasses import BasePass as qiskit_BasePass
 
 
@@ -143,7 +144,7 @@ class DeviceDependentAction(Action):
             Callable[..., tuple[Any, ...] | Circuit],
         ]
     )
-    do_while: Callable[[dict[str, Circuit]], bool] | None = None
+    do_while: Callable[[PropertySet], bool] | None = None
 
 
 # Registry of actions

diff --git a/src/mqt/predictor/rl/approx_reward.py b/src/mqt/predictor/rl/approx_reward.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2023 - 2026 Chair for Design Automation, TUM
+# Copyright (c) 2025 - 2026 Munich Quantum Software Company GmbH
+# All rights reserved.
+#
+# SPDX-License-Identifier: MIT
+#
+# Licensed under the MIT License
+
+"""This module provides helper functions to approximate expected fidelity and estimated success probability (ESP) by transpiling a circuit to a device's basis gate set and combining resulting gate counts with calibration-derived per-gate error rates and durations."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+from qiskit import transpile
+
+if TYPE_CHECKING:
+    from qiskit import QuantumCircuit
+    from qiskit.transpiler import Target
+
+BLACKLIST: set[str] = {"measure", "reset", "delay", "barrier"}  # These gates do not directly contribute to the error
+
+
+def get_basis_gates_from_target(device: Target) -> list[str]:
+    """Return the basis gate names from a Qiskit Target."""
+    return sorted([g for g in device.operation_names if g not in BLACKLIST])
+
+
+def estimate_basis_gate_counts(qc: QuantumCircuit, *, basis_gates: list[str]) -> dict[str, int]:
+    """Transpile ``qc`` to ``basis_gates`` and count occurrences of each basis gate."""
+    qc_t = transpile(qc, basis_gates=basis_gates, optimization_level=1, seed_transpiler=42)
+    counts = dict.fromkeys(basis_gates, 0)
+    for ci in qc_t.data:
+        name = ci.operation.name
+        if name in BLACKLIST:
+            continue
+        if name in counts:
+            counts[name] += 1
+    return counts
+
+
+def approx_expected_fidelity(
+    qc: QuantumCircuit,
+    *,
+    device: Target,
+    error_rates: dict[str, float],
+) -> float:
+    """Approximate expected fidelity using per-basis-gate error rates.
+
+    The circuit is first transpiled to the device basis. Then a simple product
+    model is applied: Π_g (1 - p_g)^{count_g}.
+
+    Args:
+        qc: Circuit to evaluate.
+        device: Target providing the basis gate set.
+        error_rates: Mapping ``basis_gate -> error_probability``.
+
+    Returns:
+        Approximate fidelity in [0, 1].
+    """
+    basis = get_basis_gates_from_target(device)
+    counts = estimate_basis_gate_counts(qc, basis_gates=basis)
+    f = 1.0
+    for g, c in counts.items():
+        f *= (1.0 - error_rates.get(g, 0.0)) ** c
+    return float(max(min(f, 1.0), 0.0))
+
+
+def approx_estimated_success_probability(
+    qc: QuantumCircuit,
+    *,
+    device: Target,
+    error_rates: dict[str, float],
+    gate_durations: dict[str, float],
+    tbar: float | None,
+    par_feature: float,
+    liv_feature: float,
+    n_qubits: int,
+) -> float:
+    """Approximate ESP using per-basis-gate error rates, durations, and coherence.
+
+    This combines:
+    (1) a gate-infidelity product term, and
+    (2) an idle/decoherence penalty based on an effective circuit duration.
+
+    Args:
+        qc: Circuit to evaluate.
+        device: Target providing the basis gate set.
+        error_rates: Mapping ``basis_gate -> error_probability``.
+        gate_durations: Mapping ``basis_gate -> duration`` (seconds).
+        tbar: Representative coherence time (seconds). If None, idle penalty is skipped.
+        par_feature: Parallelism feature in [0, 1].
+        liv_feature: Liveness feature in [0, 1].
+        n_qubits: Number of qubits in the circuit.
+
+    Returns:
+        Approximate ESP in [0, 1].
+    """
+    basis = get_basis_gates_from_target(device)
+    counts = estimate_basis_gate_counts(qc, basis_gates=basis)
+
+    f_gate = 1.0
+    for g, c in counts.items():
+        f_gate *= (1.0 - error_rates.get(g, 0.0)) ** c
+
+    n_q = max(n_qubits, 1)
+    k_eff = 1.0 + (n_q - 1.0) * float(par_feature)
+
+    total_gate_time = sum(counts[g] * gate_durations.get(g, 0.0) for g in basis) / k_eff
+
+    idle_fraction = max(0.0, 1.0 - float(liv_feature))
+    idle_factor = 1.0 if tbar is None or tbar <= 0.0 else float(np.exp(-(total_gate_time * idle_fraction) / tbar))
+
+    esp = f_gate * idle_factor
+    return float(max(min(esp, 1.0), 0.0))