Skip to content

Commit 1999f26

Browse files
committed
1 parent 4f6290a commit 1999f26

File tree

1 file changed

+293
-0
lines changed

1 file changed

+293
-0
lines changed
Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
/* Copyright 2020 The TensorFlow Quantum Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License.
11+
==============================================================================*/
12+
13+
#include <memory>
14+
#include <vector>
15+
16+
#include <chrono>
17+
#include <custatevec.h>
18+
19+
#include "../qsim/lib/circuit.h"
20+
#include "../qsim/lib/gate_appl.h"
21+
#include "../qsim/lib/gates_cirq.h"
22+
#include "../qsim/lib/gates_qsim.h"
23+
#include "../qsim/lib/seqfor.h"
24+
#include "../qsim/lib/simulator_custatevec.h"
25+
#include "../qsim/lib/statespace_custatevec.h"
26+
#include "tensorflow/core/framework/op_kernel.h"
27+
#include "tensorflow/core/framework/shape_inference.h"
28+
#include "tensorflow/core/framework/tensor_shape.h"
29+
#include "tensorflow/core/lib/core/error_codes.pb.h"
30+
#include "tensorflow/core/lib/core/status.h"
31+
#include "tensorflow/core/lib/core/threadpool.h"
32+
#include "tensorflow/core/platform/mutex.h"
33+
#include "tensorflow_quantum/core/ops/parse_context.h"
34+
#include "tensorflow_quantum/core/proto/pauli_sum.pb.h"
35+
#include "tensorflow_quantum/core/proto/program.pb.h"
36+
#include "tensorflow_quantum/core/src/util_qsim.h"
37+
38+
namespace tfq {
39+
40+
using ::tensorflow::Status;
41+
using ::tfq::proto::PauliSum;
42+
using ::tfq::proto::Program;
43+
44+
typedef qsim::Cirq::GateCirq<float> QsimGate;
45+
typedef qsim::Circuit<QsimGate> QsimCircuit;
46+
47+
48+
class TfqSimulateExpectationOpCuQuantum : public tensorflow::OpKernel {
49+
public:
50+
explicit TfqSimulateExpectationOpCuQuantum(tensorflow::OpKernelConstruction* context)
51+
: OpKernel(context) { }
52+
53+
void Compute(tensorflow::OpKernelContext* context) override {
54+
// TODO (mbbrough): add more dimension checks for other inputs here.
55+
const int num_inputs = context->num_inputs();
56+
OP_REQUIRES(context, num_inputs == 4,
57+
tensorflow::errors::InvalidArgument(absl::StrCat(
58+
"Expected 4 inputs, got ", num_inputs, " inputs.")));
59+
60+
// Create the output Tensor.
61+
const int output_dim_batch_size = context->input(0).dim_size(0);
62+
const int output_dim_op_size = context->input(3).dim_size(1);
63+
tensorflow::TensorShape output_shape;
64+
output_shape.AddDim(output_dim_batch_size);
65+
output_shape.AddDim(output_dim_op_size);
66+
67+
tensorflow::Tensor* output = nullptr;
68+
tensorflow::AllocatorAttributes alloc_attr;
69+
alloc_attr.set_on_host(true); // why??
70+
alloc_attr.set_gpu_compatible(true);
71+
OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output,
72+
alloc_attr));
73+
auto output_tensor = output->matrix<float>();
74+
// Parse program protos.
75+
std::vector<Program> programs;
76+
std::vector<int> num_qubits;
77+
std::vector<std::vector<PauliSum>> pauli_sums; // why is this a vector of vectors??
78+
OP_REQUIRES_OK(context, GetProgramsAndNumQubits(context, &programs,
79+
&num_qubits, &pauli_sums));
80+
81+
std::vector<SymbolMap> maps;
82+
OP_REQUIRES_OK(context, GetSymbolMaps(context, &maps));
83+
84+
OP_REQUIRES(context, programs.size() == maps.size(),
85+
tensorflow::errors::InvalidArgument(absl::StrCat(
86+
"Number of circuits and symbol_values do not match. Got ",
87+
programs.size(), " circuits and ", maps.size(),
88+
" symbol values.")));
89+
90+
// Construct qsim circuits.
91+
std::vector<QsimCircuit> qsim_circuits(programs.size(), QsimCircuit());
92+
std::vector<std::vector<qsim::GateFused<QsimGate>>> fused_circuits(
93+
programs.size(), std::vector<qsim::GateFused<QsimGate>>({}));
94+
95+
Status parse_status = Status::OK();
96+
auto p_lock = tensorflow::mutex();
97+
auto construct_f = [&](int start, int end) {
98+
for (int i = start; i < end; i++) {
99+
Status local =
100+
QsimCircuitFromProgram(programs[i], maps[i], num_qubits[i],
101+
&qsim_circuits[i], &fused_circuits[i]);
102+
NESTED_FN_STATUS_SYNC(parse_status, local, p_lock);
103+
}
104+
};
105+
106+
const int num_cycles = 1000;
107+
context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor(
108+
programs.size(), num_cycles, construct_f);
109+
OP_REQUIRES_OK(context, parse_status);
110+
111+
int max_num_qubits = 0;
112+
for (const int num : num_qubits) {
113+
max_num_qubits = std::max(max_num_qubits, num);
114+
}
115+
116+
// create handles for simulator
117+
cublasCreate(&cublas_handle_);
118+
custatevecCreate(&custatevec_handle_);
119+
if (max_num_qubits >= 26 || programs.size() == 1) {
120+
ComputeLarge(num_qubits, fused_circuits, pauli_sums, context,
121+
&output_tensor); // HOW TO manage extraWorkspace size?
122+
} else {
123+
ComputeSmall(num_qubits, max_num_qubits, fused_circuits, pauli_sums,
124+
context, &output_tensor);
125+
}
126+
// destroy handles in sync with simulator lifetime
127+
cublasDestroy(cublas_handle_);
128+
custatevecDestroy(custatevec_handle_);
129+
}
130+
131+
private:
132+
cublasHandle_t cublas_handle_;
133+
custatevecHandle_t custatevec_handle_;
134+
135+
// Define the GPU implementation that launches the CUDA kernel.
136+
void ComputeLarge(
137+
const std::vector<int>& num_qubits,
138+
const std::vector<std::vector<qsim::GateFused<QsimGate>>>& fused_circuits,
139+
const std::vector<std::vector<PauliSum>>& pauli_sums,
140+
tensorflow::OpKernelContext* context,
141+
tensorflow::TTypes<float, 1>::Matrix* output_tensor) {
142+
// Instantiate qsim objects.
143+
using Simulator = qsim::SimulatorCuStateVec<float>;
144+
using StateSpace = Simulator::StateSpace;
145+
146+
147+
// Launch the cuda kernel.
148+
// Begin simulation.
149+
int largest_nq = 1;
150+
Simulator sim = Simulator(custatevec_handle_);
151+
StateSpace ss = StateSpace(cublas_handle_, custatevec_handle_);
152+
auto sv = ss.Create(largest_nq);
153+
ss.SetStateZero(sv);
154+
auto scratch = ss.Create(largest_nq);
155+
156+
// Simulate programs one by one. Parallelizing over state vectors
157+
// we no longer parallelize over circuits. Each time we encounter a
158+
// a larger circuit we will grow the Statevector as necessary.
159+
for (int i = 0; i < fused_circuits.size(); i++) {
160+
int nq = num_qubits[i];
161+
162+
if (nq > largest_nq) {
163+
// need to switch to larger statespace.
164+
largest_nq = nq;
165+
sv = ss.Create(largest_nq);
166+
scratch = ss.Create(largest_nq);
167+
}
168+
// TODO: add heuristic here so that we do not always recompute
169+
// the state if there is a possibility that circuit[i] and
170+
// circuit[i + 1] produce the same state.
171+
ss.SetStateZero(sv);
172+
for (int j = 0; j < fused_circuits[i].size(); j++) {
173+
qsim::ApplyFusedGate(sim, fused_circuits[i][j], sv);
174+
}
175+
for (int j = 0; j < pauli_sums[i].size(); j++) {
176+
// (#679) Just ignore empty program
177+
if (fused_circuits[i].size() == 0) {
178+
(*output_tensor)(i, j) = -2.0;
179+
continue;
180+
}
181+
float exp_v = 0.0;
182+
OP_REQUIRES_OK(context,
183+
ComputeExpectationQsim(pauli_sums[i][j], sim, ss, sv,
184+
scratch, &exp_v));
185+
(*output_tensor)(i, j) = exp_v;
186+
}
187+
}
188+
}
189+
190+
void ComputeSmall(
191+
const std::vector<int>& num_qubits, const int max_num_qubits,
192+
const std::vector<std::vector<qsim::GateFused<QsimGate>>>& fused_circuits,
193+
const std::vector<std::vector<PauliSum>>& pauli_sums,
194+
tensorflow::OpKernelContext* context,
195+
tensorflow::TTypes<float, 1>::Matrix* output_tensor) {
196+
using Simulator = qsim::SimulatorCuStateVec<float>;
197+
using StateSpace = Simulator::StateSpace;
198+
199+
const int output_dim_op_size = output_tensor->dimension(1);
200+
201+
Status compute_status = Status::OK();
202+
auto c_lock = tensorflow::mutex();
203+
auto DoWork = [&](int start, int end) {
204+
int old_batch_index = -2;
205+
int cur_batch_index = -1;
206+
int largest_nq = 1;
207+
int cur_op_index;
208+
209+
// Launch custatevec, begin simulation.
210+
auto sim = Simulator(custatevec_handle_);
211+
auto ss = StateSpace(cublas_handle_, custatevec_handle_);
212+
auto sv = ss.Create(largest_nq);
213+
auto scratch = ss.Create(largest_nq);
214+
for (int i = start; i < end; i++) {
215+
cur_batch_index = i / output_dim_op_size;
216+
cur_op_index = i % output_dim_op_size;
217+
218+
const int nq = num_qubits[cur_batch_index];
219+
220+
// (#679) Just ignore empty program
221+
if (fused_circuits[cur_batch_index].size() == 0) {
222+
(*output_tensor)(cur_batch_index, cur_op_index) = -2.0;
223+
continue;
224+
}
225+
226+
if (cur_batch_index != old_batch_index) {
227+
// We've run into a new state vector we must compute.
228+
// Only compute a new state vector when we have to.
229+
if (nq > largest_nq) {
230+
largest_nq = nq;
231+
sv = ss.Create(largest_nq);
232+
scratch = ss.Create(largest_nq);
233+
}
234+
// no need to update scratch_state since ComputeExpectation
235+
// will take care of things for us.
236+
ss.SetStateZero(sv);
237+
for (int j = 0; j < fused_circuits[cur_batch_index].size(); j++) {
238+
qsim::ApplyFusedGate(sim, fused_circuits[cur_batch_index][j], sv);
239+
}
240+
}
241+
242+
float exp_v = 0.0;
243+
NESTED_FN_STATUS_SYNC(
244+
compute_status,
245+
ComputeExpectationQsim(pauli_sums[cur_batch_index][cur_op_index],
246+
sim, ss, sv, scratch, &exp_v),
247+
c_lock);
248+
(*output_tensor)(cur_batch_index, cur_op_index) = exp_v;
249+
old_batch_index = cur_batch_index;
250+
}
251+
};
252+
253+
const int64_t num_cycles =
254+
200 * (int64_t(1) << static_cast<int64_t>(max_num_qubits));
255+
context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor(
256+
fused_circuits.size() * output_dim_op_size, num_cycles, DoWork);
257+
OP_REQUIRES_OK(context, compute_status);
258+
}
259+
};
260+
261+
REGISTER_KERNEL_BUILDER(
262+
Name("TfqSimulateExpectationOpCuQuantum").Device(tensorflow::DEVICE_CPU),
263+
TfqSimulateExpectationOpCuQuantumOp);
264+
265+
REGISTER_OP("TfqSimulateExpectationOpCuQuantum")
266+
.Input("programs: string")
267+
.Input("symbol_names: string")
268+
.Input("symbol_values: float")
269+
.Input("pauli_sums: string")
270+
.Output("expectations: float")
271+
.SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) {
272+
tensorflow::shape_inference::ShapeHandle programs_shape;
273+
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &programs_shape));
274+
275+
tensorflow::shape_inference::ShapeHandle symbol_names_shape;
276+
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &symbol_names_shape));
277+
278+
tensorflow::shape_inference::ShapeHandle symbol_values_shape;
279+
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 2, &symbol_values_shape));
280+
281+
tensorflow::shape_inference::ShapeHandle pauli_sums_shape;
282+
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 2, &pauli_sums_shape));
283+
284+
tensorflow::shape_inference::DimensionHandle output_rows =
285+
c->Dim(programs_shape, 0);
286+
tensorflow::shape_inference::DimensionHandle output_cols =
287+
c->Dim(pauli_sums_shape, 1);
288+
c->set_output(0, c->Matrix(output_rows, output_cols));
289+
290+
return tensorflow::Status::OK();
291+
});
292+
293+
} // namespace tfq

0 commit comments

Comments
 (0)