Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
79451be
rebase
rniczh Jan 8, 2026
7078d0e
Fix dynamic wires with samples
rniczh Jan 6, 2026
ddfdb53
fix
rniczh Jan 6, 2026
9d19fa9
add back the comment
rniczh Jan 6, 2026
7ae4e5e
Compact state vector before measurement
rniczh Jan 7, 2026
b3e806f
Rebase
jzaia18 Jan 6, 2026
d365d2d
update
rniczh Jan 7, 2026
6771882
fix
rniczh Jan 7, 2026
0e11617
no need to collapse released qubit during the
rniczh Jan 7, 2026
635370d
fix compact timing
rniczh Jan 7, 2026
aa98ec9
should use big-endian
rniczh Jan 7, 2026
a8367bd
Add test
rniczh Jan 7, 2026
6e9f177
update test
rniczh Jan 7, 2026
5bac5ae
clean
rniczh Jan 8, 2026
54a87e1
update
rniczh Jan 8, 2026
14a351e
fix
rniczh Jan 8, 2026
e0033de
fix codecoverage
rniczh Jan 8, 2026
90d9300
revert
rniczh Jan 8, 2026
bf6ff01
fix coverage
rniczh Jan 8, 2026
203a53b
Update .github/CHANGELOG.md
rniczh Jan 9, 2026
1ed47be
Update .github/CHANGELOG.md
rniczh Jan 9, 2026
81c4859
remove redundant
rniczh Jan 9, 2026
6eb8caa
formatting
rniczh Jan 9, 2026
a5e2132
compactStateVector -> reducedStateVector
rniczh Jan 9, 2026
43a1b6a
Update pennylane_lightning/core/catalyst/LightningQubitManager.hpp
rniczh Jan 9, 2026
9fc0cc0
formatting
rniczh Jan 9, 2026
abd373d
bumping to rc3
rniczh Jan 9, 2026
60511a8
move normalizeStateVector to Util
rniczh Jan 9, 2026
128dfff
fix util
rniczh Jan 9, 2026
57b3a61
no need to cover the complex with custom allocator
rniczh Jan 9, 2026
0ec104f
revert project.toml
rniczh Jan 9, 2026
fcc5229
ci
rniczh Jan 9, 2026
2b784c7
fix changelog
rniczh Jan 9, 2026
0f53498
change fn name of reducedStateVector
rniczh Jan 9, 2026
6c8cdb2
change to use fn name reduceStateVector for all devices
rniczh Jan 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

<h3>Bug fixes 🐛</h3>

- Fixed sampling with dynamically allocated and released qubits in Catalyst. The state vector is now reduced before measurements to correctly handle released qubits, ensuring `qml.sample()` and other measurement operations return correct results when using `qml.allocate()` and `qml.release()`.
[(#1321)](https://github.com/PennyLaneAI/pennylane-lightning/pull/1321)

- Corrected an issue in tests where a PennyLane operator was used within a QNode to compute a
matrix, which would lead to wrongful queuing as of PennyLane
pull request [#8131](https://github.com/PennyLaneAI/pennylane/pull/8131).
Expand Down
2 changes: 1 addition & 1 deletion pennylane_lightning/core/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
Version number (major.minor.patch[-label])
"""

__version__ = "0.44.0-rc2"
__version__ = "0.44.0-rc3"
11 changes: 11 additions & 0 deletions pennylane_lightning/core/catalyst/LightningQubitManager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,16 @@ class QubitManager final {
this->qubit_id_map.clear();
this->free_device_qubits.clear();
}

void RemapDeviceIds(const std::unordered_map<DeviceQubitID, DeviceQubitID>
&old_to_new_mapping) {
// Update each program_id's device_id according to the mapping
for (auto &[program_id, device_id] : this->qubit_id_map) {
if (auto it = old_to_new_mapping.find(device_id);
it != old_to_new_mapping.end()) {
device_id = it->second;
}
}
}
};
} // namespace Catalyst::Runtime::Simulator
82 changes: 82 additions & 0 deletions pennylane_lightning/core/catalyst/tests/Test_LightningDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,85 @@ TEST_CASE("Release Qubits", "[Driver]") {

CHECK(sim->GetNumQubits() == 2);
}

TEST_CASE("Sample after dynamic qubit release", "[Driver]") {
// This test mirrors the Python code:
// @qjit
// @qml.qnode(qml.device("lightning.qubit", wires=3, shots=10))
// def circuit():
// with qml.allocate(2) as qs:
// qml.X(qs[1])
// return qml.sample(wires=[0, 1])

std::unique_ptr<LSimulator> sim = std::make_unique<LSimulator>();

// Allocate 3 static qubits (wires 0, 1, 2) : all in |0>
std::vector<intptr_t> static_qubits = sim->AllocateQubits(3);

// Dynamically allocate 2 qubits
std::vector<intptr_t> dynamic_qubits = sim->AllocateQubits(2);

// Apply PauliX to dynamic_qubits[1]
sim->NamedOperation("PauliX", {}, {dynamic_qubits[1]}, false);

// Release the dynamic qubits
sim->ReleaseQubits(dynamic_qubits);

// Sample on static wires [0, 1]
// Since static qubits were never modified, they should all be |0>
constexpr size_t num_shots = 10;
constexpr size_t num_wires = 2;
sim->SetDeviceShots(num_shots);

std::vector<double> samples(num_shots * num_wires);
const size_t sizes[2] = {num_shots, num_wires};
const size_t strides[2] = {num_wires, 1}; // row-major: stride[0]=num_wires
DataView<double, 2> samples_view(samples.data(), 0, sizes, strides);

sim->PartialSample(samples_view, {static_qubits[0], static_qubits[1]});

for (size_t i = 0; i < num_shots * num_wires; i++) {
CHECK(samples[i] == 0.);
}
}

TEST_CASE("Sample after releasing middle qubit (triggers remap)", "[Driver]") {
// Scenario:
// 1. Allocate 3 qubits -> device IDs: 0, 1, 2
// 2. Apply X to qubit 2 (device ID 2) -> state |001>
// 3. Release qubit 1 (device ID 1) -> remaining device IDs: 0, 2
// 4. reduceStateVector remaps: device ID 2 -> 1
// 5. Sample qubit 2 (now device ID 1) -> should get |1>

std::unique_ptr<LSimulator> sim = std::make_unique<LSimulator>();

// Allocate 3 qubits: device IDs 0, 1, 2
std::vector<intptr_t> qubits = sim->AllocateQubits(3);

// Apply X to qubit[2] (device ID 2): state becomes |001>
sim->NamedOperation("PauliX", {}, {qubits[2]}, false);

// Release qubit[1] (device ID 1), this creates a gap in device IDs
// Remaining: qubit[0] (device 0), qubit[2] (device 2)
// After reduceStateVector: qubit[0] -> device 0, qubit[2] -> device 1
sim->ReleaseQubit(qubits[1]);

// Sample on qubit[0] and qubit[2]
// qubit[0] should be |0>, qubit[2] should be |1>
constexpr size_t num_shots = 10;
constexpr size_t num_wires = 2;
sim->SetDeviceShots(num_shots);

std::vector<double> samples(num_shots * num_wires);
const size_t sizes[2] = {num_shots, num_wires};
const size_t strides[2] = {num_wires, 1}; // row-major: stride[0]=num_wires
DataView<double, 2> samples_view(samples.data(), 0, sizes, strides);

sim->PartialSample(samples_view, {qubits[0], qubits[2]});

// each shot should be [0, 1] (qubit[0]=0, qubit[2]=1)
for (size_t shot = 0; shot < num_shots; shot++) {
CHECK(samples[shot * num_wires + 0] == 0.); // qubit[0] is |0>
CHECK(samples[shot * num_wires + 1] == 1.); // qubit[2] is |1>
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <unordered_set>

#include "LightningGPUSimulator.hpp"
#include "Util.hpp"

namespace Catalyst::Runtime::Simulator {

Expand Down Expand Up @@ -82,9 +83,14 @@ auto LightningGPUSimulator::AllocateQubits(std::size_t num_qubits)
}

void LightningGPUSimulator::ReleaseQubit(QubitIdType q) {
// We do not deallocate physical memory in the statevector for this
// operation, instead we just mark the qubits as released.
RT_FAIL_IF(!this->qubit_manager.isValidQubitId(q),
"Invalid qubit to release");

// Mark the qubit as released in the qubit manager
this->qubit_manager.Release(q);

// Mark that reduction is needed
this->needs_reduction = true;
}

void LightningGPUSimulator::ReleaseQubits(const std::vector<QubitIdType> &ids) {
Expand All @@ -101,19 +107,95 @@ void LightningGPUSimulator::ReleaseQubits(const std::vector<QubitIdType> &ids) {
if (deallocate_all) {
this->qubit_manager.ReleaseAll();
this->device_sv = std::make_unique<StateVectorT>(0);
this->needs_reduction = false;
return;
}
}

for (auto id : ids) {
this->qubit_manager.Release(id);
this->ReleaseQubit(id);
}
}

auto LightningGPUSimulator::GetNumQubits() const -> std::size_t {
return this->qubit_manager.getNumQubits();
}

auto LightningGPUSimulator::getMeasurements()
-> Pennylane::LightningGPU::Measures::Measurements<StateVectorT> {
reduceStateVector();
return Pennylane::LightningGPU::Measures::Measurements<StateVectorT>{
*(this->device_sv)};
}

void LightningGPUSimulator::reduceStateVector() {
if (!this->needs_reduction) {
return;
}

// Get active qubits
auto all_qubits = this->qubit_manager.getAllQubitIds();
std::vector<std::pair<size_t, QubitIdType>> wire_id_pairs;

for (auto qid : all_qubits) {
size_t device_wire = this->qubit_manager.getDeviceId(qid);
wire_id_pairs.push_back({device_wire, qid});
}

// Sort by device wire index
std::sort(wire_id_pairs.begin(), wire_id_pairs.end());

// Extract reduced state vector - need to copy from GPU first
auto old_data = this->device_sv->getDataVector();
size_t num_qubits_after = wire_id_pairs.size();
size_t new_size = 1UL << num_qubits_after;

std::vector<std::complex<double>> new_data(new_size);

// state[idx] = |q0 q1 ... q_{n-1}>
// where q_i = (idx >> (n-1-i)) & 1
// So device wire 0 corresponds to the MSB (bit n-1)
size_t old_num_qubits = this->device_sv->getNumQubits();

for (size_t old_idx = 0; old_idx < old_data.size(); old_idx++) {
size_t new_idx = 0;
for (size_t i = 0; i < num_qubits_after; i++) {
size_t old_wire = wire_id_pairs[i].first;
size_t old_bit_pos = old_num_qubits - 1 - old_wire;
size_t new_bit_pos = num_qubits_after - 1 - i;

if ((old_idx >> old_bit_pos) & 1) {
new_idx |= (1UL << new_bit_pos);
}
}

new_data[new_idx] += old_data[old_idx];
}

// Normalize the state vector
Pennylane::Util::normalizeStateVector(new_data);

// Replace the state vector
this->device_sv =
std::make_unique<StateVectorT>(new_data.data(), new_data.size());

// Remap device ids
std::unordered_map<size_t, size_t> old_to_new_device_id;
for (size_t new_idx = 0; new_idx < wire_id_pairs.size(); new_idx++) {
size_t old_device_id = wire_id_pairs[new_idx].first;
size_t new_device_id = new_idx;
if (old_device_id != new_device_id) {
old_to_new_device_id[old_device_id] = new_device_id;
}
}

if (!old_to_new_device_id.empty()) {
this->qubit_manager.RemapDeviceIds(old_to_new_device_id);
}

this->needs_reduction = false;
}

void LightningGPUSimulator::StartTapeRecording() {
RT_FAIL_IF(this->tape_recording, "Cannot re-activate the cache manager");
this->tape_recording = true;
Expand Down Expand Up @@ -264,9 +346,7 @@ auto LightningGPUSimulator::Expval(ObsIdType obsKey) -> double {

auto &&obs = this->obs_manager.getObservable(obsKey);

Pennylane::LightningGPU::Measures::Measurements<StateVectorT> m{
*(this->device_sv)};

auto m = getMeasurements();
m.setSeed(this->generateSeed());

return device_shots ? m.expval(*obs, device_shots, {}) : m.expval(*obs);
Expand All @@ -283,9 +363,7 @@ auto LightningGPUSimulator::Var(ObsIdType obsKey) -> double {

auto &&obs = this->obs_manager.getObservable(obsKey);

Pennylane::LightningGPU::Measures::Measurements<StateVectorT> m{
*(this->device_sv)};

auto m = getMeasurements();
m.setSeed(this->generateSeed());

return device_shots ? m.var(*obs, device_shots) : m.var(*obs);
Expand All @@ -307,9 +385,7 @@ void LightningGPUSimulator::State(DataView<std::complex<double>, 1> &state) {
}

void LightningGPUSimulator::Probs(DataView<double, 1> &probs) {
Pennylane::LightningGPU::Measures::Measurements<StateVectorT> m{
*(this->device_sv)};

auto m = getMeasurements();
m.setSeed(this->generateSeed());

auto &&dv_probs = device_shots ? m.probs(device_shots) : m.probs();
Expand All @@ -328,12 +404,11 @@ void LightningGPUSimulator::PartialProbs(
RT_FAIL_IF(numWires > numQubits, "Invalid number of wires");
RT_FAIL_IF(!isValidQubits(wires), "Invalid given wires to measure");

auto dev_wires = getDeviceWires(wires);
Pennylane::LightningGPU::Measures::Measurements<StateVectorT> m{
*(this->device_sv)};

auto m = getMeasurements();
m.setSeed(this->generateSeed());

auto dev_wires = getDeviceWires(wires);

auto &&dv_probs =
device_shots ? m.probs(dev_wires, device_shots) : m.probs(dev_wires);

Expand All @@ -344,10 +419,7 @@ void LightningGPUSimulator::PartialProbs(
}

std::vector<size_t> LightningGPUSimulator::GenerateSamples(size_t shots) {
// generate_samples is a member function of the Measures class.
Pennylane::LightningGPU::Measures::Measurements<StateVectorT> m{
*(this->device_sv)};

auto m = getMeasurements();
m.setSeed(this->generateSeed());

return m.generate_samples(shots);
Expand Down Expand Up @@ -383,11 +455,11 @@ void LightningGPUSimulator::PartialSample(
RT_FAIL_IF(samples.size() != device_shots * numWires,
"Invalid size for the pre-allocated partial-samples");

// get device wires
auto &&dev_wires = getDeviceWires(wires);

auto li_samples = this->GenerateSamples(device_shots);

// Get device wires
auto &&dev_wires = getDeviceWires(wires);

// The lightning samples are layed out as a single vector of size
// shots*qubits, where each element represents a single bit. The
// corresponding shape is (shots, qubits). Gather the desired bits
Expand Down Expand Up @@ -444,11 +516,11 @@ void LightningGPUSimulator::PartialCounts(
RT_FAIL_IF((eigvals.size() != numElements || counts.size() != numElements),
"Invalid size for the pre-allocated partial-counts");

// get device wires
auto &&dev_wires = getDeviceWires(wires);

auto li_samples = this->GenerateSamples(device_shots);

// Get device wires
auto &&dev_wires = getDeviceWires(wires);

// Fill the eigenvalues with the integer representation of the
// corresponding computational basis bitstring. In the future,
// eigenvalues can also be obtained from an observable, hence the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class LightningGPUSimulator final : public Catalyst::Runtime::QuantumDevice {
std::unique_ptr<StateVectorT> device_sv = std::make_unique<StateVectorT>(0);
LightningGPUObsManager<double> obs_manager{};

// Flag to indicate if state vector needs reduction
bool needs_reduction{false};

inline auto isValidQubit(QubitIdType wire) -> bool {
return this->qubit_manager.isValidQubitId(wire);
}
Expand Down Expand Up @@ -104,6 +107,13 @@ class LightningGPUSimulator final : public Catalyst::Runtime::QuantumDevice {

auto GenerateSamples(size_t shots) -> std::vector<size_t>;

// Reduce state vector by removing released qubits
void reduceStateVector();

// Helper to get Measurements object with reduced state vector
auto getMeasurements()
-> Pennylane::LightningGPU::Measures::Measurements<StateVectorT>;

public:
explicit LightningGPUSimulator(const std::string &kwargs = "{}") {
auto &&args = Catalyst::Runtime::parse_kwargs(kwargs);
Expand Down
Loading
Loading