-
Notifications
You must be signed in to change notification settings - Fork 11.6k
Description
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
from qutip import Qobj, basis, sigmax, sigmaz, tensor
from collections import deque
import random # For evolutionary mutations
Refined Surface Code Env (L=5, Planar for efficiency; refined error model)
class SurfaceCodeEnv:
def init(self, size=5, p_error=0.05, mode='planar'):
self.size = size
self.n_qubits = size2 if mode == 'rotated' else 2 * size2 - 2 * size + 1 # Optimized
self.n_stab = size2 * 2 - 2 * size if mode == 'planar' else 2 * size2
self.p_error = p_error
self.mode = mode
self.logical_x = np.zeros(self.n_qubits) # Refined logicals
self.logical_z = np.zeros(self.n_qubits)
self.logical_x[:size] = 1
self.logical_z[::size] = 1
self.reset()
def reset(self):
self.true_error = np.random.choice([0, 1], size=self.n_qubits, p=[1 - self.p_error, self.p_error])
self.syndrome = self.compute_syndrome(self.true_error)
return self.syndrome
def compute_syndrome(self, error):
syndrome = np.zeros(self.n_stab)
# Refined: Vectorized parity (mod 2 sum)
for i in range(self.n_stab // 2): # Z stabs
syndrome[i] = np.sum(error[i:i+4]) % 2 # Simplified adjacency
for i in range(self.n_stab // 2, self.n_stab): # X stabs
syndrome[i] = np.sum(error[i:i+4]) % 2
return syndrome
def step(self, correction):
total_error = (self.true_error + correction) % 2
logical_error = (np.dot(total_error, self.logical_x) % 2) or (np.dot(total_error, self.logical_z) % 2)
reward = 1 if logical_error == 0 else -1
done = True
return self.syndrome, reward, done
Upgraded Quantum Actor (Self-Rewriting Params)
class QuantumActor(nn.Module):
def init(self, state_dim, action_dim, n_qubits=6, n_layers=3): # Upgraded dims
super().init()
self.pre_nn = nn.Linear(state_dim, n_qubits)
self.n_qubits = n_qubits
self.n_layers = n_layers
self.params = nn.Parameter(torch.randn(n_layers * n_qubits * 2))
self.post_nn = nn.Linear(n_qubits, action_dim)
self.meta_opt = optim.SGD(self.parameters(), lr=1e-5) # For self-evolution
def forward(self, state):
features = torch.tanh(self.pre_nn(state))
batch_size = features.shape[0]
exp_vals = []
for b in range(batch_size):
psi = tensor([basis(2, 0) for _ in range(self.n_qubits)])
for q in range(self.n_qubits):
psi = (Qobj([[1,1],[1,-1]]) / np.sqrt(2)) * psi # Hadamard
p_idx = 0
for layer in range(self.n_layers):
for q in range(self.n_qubits):
ry = Qobj([[cos(self.params[p_idx]/2), -sin(self.params[p_idx]/2)],
[sin(self.params[p_idx]/2), cos(self.params[p_idx]/2)]])
rz = Qobj([[np.exp(-1j*self.params[p_idx+1]/2), 0],
[0, np.exp(1j*self.params[p_idx+1]/2)]])
gate = tensor([rz * ry if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
psi = gate * psi
p_idx += 2
for q in range(self.n_qubits - 1):
cz = Qobj([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,-1]]).reshape(4,4)
gate = tensor([cz if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
psi = gate * psi
for q in range(self.n_qubits):
rx = Qobj([[cos(features[b,q]/2), -1j*sin(features[b,q]/2)],
[-1j*sin(features[b,q]/2), cos(features[b,q]/2)]])
gate = tensor([rx if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
psi = gate * psi
exp_val = []
for q in range(self.n_qubits):
meas = tensor([sigmaz() if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
exp_val.append((psi.dag() * meas * psi).real)
exp_vals.append(exp_val)
quantum_out = torch.tensor(exp_vals, dtype=torch.float32)
logits = self.post_nn(quantum_out)
return Categorical(logits=logits.softmax(dim=-1))
def self_evolve(self, fitness): # Darwinian upgrade
if fitness < 0.9: # Mutate if low
self.params.data += torch.randn_like(self.params) * 0.01 # Variation
self.meta_opt.step() # Select/refine
Critic Unchanged
Evolved PPO (with Darwinian Population)
class PPO:
def init(self, state_dim, action_dim, population_size=5, mutation_rate=0.05, **kwargs):
super().init() # Inherit base
self.population = [QuantumActor(state_dim, action_dim) for _ in range(population_size)]
self.critic = Critic(state_dim)
# ... (opts, buffer)
self.mutation_rate = mutation_rate
def update(self):
# ... (base update on best actor)
# Darwinian: Evaluate population, select/mutate
fitnesses = [self.evaluate_actor(actor) for actor in self.population]
best_idx = np.argmax(fitnesses)
for i in range(len(self.population)):
if i != best_idx and random.random() < self.mutation_rate:
self.population[i].params.data = self.population[best_idx].params.data.clone() + torch.randn_like(self.population[i].params) * 0.02
# Refine: Meta-opt best
self.population[best_idx].self_evolve(fitnesses[best_idx])
def evaluate_actor(self, actor):
# Sim reward over 10 episodes (refined)
rewards = []
for _ in range(10):
state = env.reset()
state_t = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
dist = actor(state_t)
action = dist.sample().item()
correction = np.zeros(env.n_qubits)
correction[action] = 1
_, reward, _ = env.step(correction)
rewards.append(reward)
return np.mean(rewards)
Training: Evolved Loop
env = SurfaceCodeEnv(size=5, p_error=0.03, mode='rotated') # Upgraded
state_dim = env.n_stab
action_dim = env.n_qubits
ppo = PPO(state_dim, action_dim)
for episode in range(2000): # Scaled
state = env.reset()
state_t = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
actor = ppo.population[episode % len(ppo.population)] # Cycle population
dist = actor(state_t)
action = dist.sample().item()
correction = np.zeros(env.n_qubits)
correction[action] = 1
next_state, reward, done = env.step(correction)
value = ppo.critic(state_t).item()
old_prob = dist.log_prob(torch.tensor(action)).item()
ppo.buffer.append((state, action, reward, old_prob, value, done))
if len(ppo.buffer) % 64 == 0:
ppo.update()
if episode % 200 == 0:
print(f"Episode {episode}: Avg Fitness {np.mean([ppo.evaluate_actor(a) for a in ppo.population]):.4f}")