Merge pull request #77 from mit-han-lab/feat-qaoa

Hanrui-Wang · web-flow · commit 75ae209560be · 2023-03-13T13:43:27.000-04:00
[major] add param shift version of qaoa
diff --git a/examples/qaoa/max_cut_backprop.py b/examples/qaoa/max_cut_backprop.py
@@ -43,7 +43,7 @@ def mixer(self, beta):
             tqf.rx(
                 self.q_device,
                 wires=wire,
-                params=2 * beta.unsqueeze(0),
+                params=beta.unsqueeze(0),
                 static=self.static_mode,
                 parent_graph=self.graph,
             )
@@ -116,8 +116,8 @@ def forward(self, measure_all=False):
             expVal = 0
             for edge in self.input_graph:
                 pauli_string = self.edge_to_PauliString(edge)
-                expVal -= 0.5 * (
-                    1 - expval_joint_analytical(self.q_device, observable=pauli_string)
+                expVal += 0.5 * (
+                    expval_joint_analytical(self.q_device, observable=pauli_string)
                 )
             return expVal
         else:
diff --git a/examples/qaoa/max_cut_backprop_new.py b/examples/qaoa/max_cut_backprop_new.py
@@ -40,7 +40,7 @@ def mixer(self, qdev, beta):
         for wire in range(self.n_wires):
             qdev.rx(
                 wires=wire,
-                params=2 * beta.unsqueeze(0),
+                params=beta.unsqueeze(0),
             ) # type: ignore
 
     def entangler(self, qdev, gamma):
@@ -74,6 +74,7 @@ def circuit(self, qdev):
         """
         execute the quantum circuit
         """
+        # print(self.betas, self.gammas)
         for wire in range(self.n_wires):
             qdev.h(
                 wires=wire,
@@ -92,15 +93,17 @@ def forward(self, measure_all=False):
         qdev = tq.QuantumDevice(n_wires=self.n_wires, device=self.betas.device)
 
         self.circuit(qdev)
-        print(tq.measure(qdev, n_shots=1024))
+        # print(tq.measure(qdev, n_shots=1024))
         # compute the expectation value
+        # print(qdev.get_states_1d())
         if measure_all is False:
             expVal = 0
             for edge in self.input_graph:
                 pauli_string = self.edge_to_PauliString(edge)
-                expVal -= 0.5 * (
-                    1 - expval_joint_analytical(qdev, observable=pauli_string)
-                )
+                expv = expval_joint_analytical(qdev, observable=pauli_string)
+                expVal += 0.5 * expv
+                # print(pauli_string, expv)
+            # print(expVal)
             return expVal
         else:
             return tq.measure(qdev, n_shots=1024, draw_id=0)
@@ -143,16 +146,16 @@ def main():
     n_wires = 4
     n_layers = 3
     model = MAXCUT(n_wires=n_wires, input_graph=input_graph, n_layers=n_layers)
-    model.to("cuda")
+    # model.to("cuda")
     # model.to(torch.device("cuda"))
-    circ = tq2qiskit(tq.QuantumDevice(n_wires=4), model)
-    print(circ)
+    # circ = tq2qiskit(tq.QuantumDevice(n_wires=4), model)
+    # print(circ)
     # print("The circuit is", circ.draw(output="mpl"))
     # circ.draw(output="mpl")
     # use backprop
     backprop_optimize(model, n_steps=300, lr=0.01)
     # use parameter shift rule
-    # param_shift_optimize(model, n_steps=10, step_size=0.1)
+    # param_shift_optimize(model, n_steps=500, step_size=100000)
 
 """
 Notes:
@@ -161,7 +164,7 @@ def main():
 """
 
 if __name__ == "__main__":
-    import pdb
-    pdb.set_trace()
+    # import pdb
+    # pdb.set_trace()
 
     main()
diff --git a/examples/qaoa/max_cut_paramshift_new.py b/examples/qaoa/max_cut_paramshift_new.py
@@ -0,0 +1,252 @@
+import torch
+import torchquantum as tq
+import torchquantum.functional as tqf
+
+import random
+import numpy as np
+
+from torchquantum.functional import mat_dict
+
+from torchquantum.plugins import tq2qiskit, qiskit2tq
+from torchquantum.measurement import expval_joint_analytical
+
+seed = 0
+random.seed(seed)
+np.random.seed(seed)
+torch.manual_seed(seed)
+
+class MAXCUT(tq.QuantumModule):
+    """computes the optimal cut for a given graph.
+    outputs: the most probable bitstring decides the set {0 or 1} each
+    node belongs to.
+    """
+
+    def __init__(self, n_wires, input_graph, n_layers):
+        super().__init__()
+
+        self.n_wires = n_wires
+
+        self.input_graph = input_graph  # list of edges
+        self.n_layers = n_layers
+        self.n_edges = len(input_graph)
+
+        self.betas = torch.nn.Parameter(0.01 * torch.rand(self.n_layers))
+        self.gammas = torch.nn.Parameter(0.01 * torch.rand(self.n_layers))
+
+        self.reset_shift_param()
+
+    def mixer(self, qdev, beta, layer_id):
+        """
+        Apply the single rotation and entangling layer of the QAOA ansatz.
+        mixer = exp(-i * beta * sigma_x)
+        """
+
+        for wire in range(self.n_wires):
+            if self.shift_param_name == 'beta' and self.shift_wire == wire and layer_id == self.shift_layer:
+                degree = self.shift_degree
+            else:
+                degree = 0
+            qdev.rx(
+                wires=wire,
+                params=(beta.unsqueeze(0) + degree),
+            ) # type: ignore
+
+    def entangler(self, qdev, gamma, layer_id):
+        """
+        Apply the single rotation and entangling layer of the QAOA ansatz.
+        entangler = exp(-i * gamma * (1 - sigma_z * sigma_z)/2)
+        """
+        for edge_id, edge in enumerate(self.input_graph):
+            if self.shift_param_name == 'gamma' and edge_id == self.shift_edge_id and layer_id == self.shift_layer:
+                degree = self.shift_degree
+            else:
+                degree = 0
+            qdev.cx(
+                [edge[0], edge[1]],
+            ) # type: ignore 
+            qdev.rz(
+                wires=edge[1],
+                params=(gamma.unsqueeze(0) + degree),
+            ) # type: ignore
+            qdev.cx(
+                [edge[0], edge[1]],
+            ) # type: ignore
+    
+    def set_shift_param(self, layer, wire, param_name, degree, edge_id):
+        """
+        set the shift parameter for the parameter shift rule
+        """
+        self.shift_layer = layer
+        self.shift_wire = wire
+        self.shift_param_name = param_name
+        self.shift_degree = degree
+        self.shift_edge_id = edge_id
+
+    def reset_shift_param(self):
+        """
+        reset the shift parameter
+        """
+        self.shift_layer = None
+        self.shift_wire = None
+        self.shift_param_name = None
+        self.shift_degree = None
+        self.shift_edge_id = None
+
+    def edge_to_PauliString(self, edge):
+        # construct pauli string
+        pauli_string = ""
+        for wire in range(self.n_wires):
+            if wire in edge:
+                pauli_string += "Z"
+            else:
+                pauli_string += "I"
+        return pauli_string
+
+    def circuit(self, qdev):
+        """
+        execute the quantum circuit
+        """
+        # print(self.betas, self.gammas)
+        for wire in range(self.n_wires):
+            qdev.h(
+                wires=wire,
+            ) # type: ignore
+
+        for i in range(self.n_layers):
+            self.mixer(qdev, self.betas[i], i)
+            self.entangler(qdev, self.gammas[i], i)
+
+    def forward(self, measure_all=False):
+        """
+        Apply the QAOA ansatz and only measure the edge qubit on z-basis.
+        Args:
+            if edge is None
+        """
+        qdev = tq.QuantumDevice(n_wires=self.n_wires, device=self.betas.device)
+
+        self.circuit(qdev)
+        # print(tq.measure(qdev, n_shots=1024))
+        # compute the expectation value
+        # print(qdev.get_states_1d())
+        if measure_all is False:
+            expVal = 0
+            for edge in self.input_graph:
+                pauli_string = self.edge_to_PauliString(edge)
+                expv = expval_joint_analytical(qdev, observable=pauli_string)
+                expVal += 0.5 * expv
+                # print(pauli_string, expv)
+            # print(expVal)
+            return expVal
+        else:
+            return tq.measure(qdev, n_shots=1024, draw_id=0)
+
+def main():
+    # create a input_graph
+    input_graph = [(0, 1), (0, 3), (1, 2), (2, 3)]
+    n_wires = 4
+    n_layers = 3
+    model = MAXCUT(n_wires=n_wires, input_graph=input_graph, n_layers=n_layers)
+    # model.to("cuda")
+    # model.to(torch.device("cuda"))
+    # circ = tq2qiskit(tq.QuantumDevice(n_wires=4), model)
+    # print(circ)
+    # print("The circuit is", circ.draw(output="mpl"))
+    # circ.draw(output="mpl")
+    # use backprop
+    # backprop_optimize(model, n_steps=300, lr=0.01)
+    # use parameter shift rule
+    param_shift_optimize(model, n_steps=500, step_size=0.01)
+
+def shift_and_run(model, use_qiskit=False):
+    # flatten the parameters into 1D array
+
+    grad_betas = []
+    grad_gammas = []
+    n_layers = model.n_layers
+    n_wires = model.n_wires
+    n_edges = model.n_edges
+
+    for i in range(n_layers):
+        grad_gamma = 0
+        for k in range(n_edges):
+            model.set_shift_param(i, None, 'gamma', np.pi * 0.5, k)
+            out1 = model(use_qiskit)
+            model.reset_shift_param()
+
+            model.set_shift_param(i, None, 'gamma', -np.pi * 0.5, k)
+            out2 = model(use_qiskit)
+            model.reset_shift_param()
+            
+            grad_gamma += 0.5 * (out1 - out2).squeeze().item()
+        grad_gammas.append(grad_gamma)
+
+        grad_beta = 0
+        for j in range(n_wires):
+            model.set_shift_param(i, j, 'beta', np.pi * 0.5, None)
+            out1 = model(use_qiskit)
+            model.reset_shift_param()
+        
+            model.set_shift_param(i, j, 'beta', -np.pi * 0.5, None)
+            out2 = model(use_qiskit)
+            model.reset_shift_param()
+
+            grad_beta += 0.5 * (out1 - out2).squeeze().item()
+        grad_betas.append(grad_beta)
+
+    return model(use_qiskit), [grad_betas, grad_gammas]
+
+
+def param_shift_optimize(model, n_steps=10, step_size=0.1):
+    """finds the optimal cut where parameter shift rule is used to compute the gradient"""
+    # optimize the parameters and return the optimal values
+    # print(
+        # "The initial parameters are betas = {} and gammas = {}".format(
+            # *model.parameters()
+        # )
+    # )
+    n_layers = model.n_layers
+    for step in range(n_steps):
+        with torch.no_grad():
+            loss, grad_list = shift_and_run(model)
+        # param_list = list(model.parameters())
+        # print(
+            # "The initial parameters are betas = {} and gammas = {}".format(
+                # *model.parameters()
+            # )
+        # )
+        # param_list = torch.cat([param.flatten() for param in param_list])
+
+        # print("The shape of the params", len(param_list), param_list[0].shape, param_list)
+        # print("")
+        # print("The shape of the grad_list = {}, 0th elem shape = {}, grad_list = {}".format(len(grad_list), grad_list[0].shape, grad_list))
+        # print(grad_list, loss, model.betas, model.gammas)
+        print(loss)
+        with torch.no_grad():
+            for i in range(n_layers):
+                model.betas[i].copy_(model.betas[i] - step_size * grad_list[0][i])
+                model.gammas[i].copy_(model.gammas[i] - step_size * grad_list[1][i])
+
+            # for param, grad in zip(param_list, grad_list):
+                # modify the parameters and ensure that there are no multiple views
+                # param.copy_(param - step_size * grad)
+        # if step % 5 == 0:
+            # print("Step: {}, Cost Objective: {}".format(step, loss.item()))
+
+        # print(
+        #     "The updated parameters are betas = {} and gammas = {}".format(
+        #         *model.parameters()
+        #     )
+        # )
+    return model(measure_all=True)
+
+"""
+Notes:
+1. input_graph = [(0, 1), (3, 0), (1, 2), (2, 3)], mixer 1st & entangler 2nd, n_layers >= 2, answer is correct.
+
+"""
+
+if __name__ == "__main__":
+    # import pdb
+    # pdb.set_trace()
+
+    main()