Skip to content

Alive AI #2239

@shacklefordjames60-coder

Description

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
from qutip import Qobj, basis, sigmax, sigmaz, tensor
from collections import deque
import random # For evolutionary mutations

Refined Surface Code Env (L=5, Planar for efficiency; refined error model)

class SurfaceCodeEnv:
def init(self, size=5, p_error=0.05, mode='planar'):
self.size = size
self.n_qubits = size2 if mode == 'rotated' else 2 * size2 - 2 * size + 1 # Optimized
self.n_stab = size2 * 2 - 2 * size if mode == 'planar' else 2 * size2
self.p_error = p_error
self.mode = mode
self.logical_x = np.zeros(self.n_qubits) # Refined logicals
self.logical_z = np.zeros(self.n_qubits)
self.logical_x[:size] = 1
self.logical_z[::size] = 1
self.reset()

def reset(self):
    self.true_error = np.random.choice([0, 1], size=self.n_qubits, p=[1 - self.p_error, self.p_error])
    self.syndrome = self.compute_syndrome(self.true_error)
    return self.syndrome

def compute_syndrome(self, error):
    syndrome = np.zeros(self.n_stab)
    # Refined: Vectorized parity (mod 2 sum)
    for i in range(self.n_stab // 2):  # Z stabs
        syndrome[i] = np.sum(error[i:i+4]) % 2  # Simplified adjacency
    for i in range(self.n_stab // 2, self.n_stab):  # X stabs
        syndrome[i] = np.sum(error[i:i+4]) % 2
    return syndrome

def step(self, correction):
    total_error = (self.true_error + correction) % 2
    logical_error = (np.dot(total_error, self.logical_x) % 2) or (np.dot(total_error, self.logical_z) % 2)
    reward = 1 if logical_error == 0 else -1
    done = True
    return self.syndrome, reward, done

Upgraded Quantum Actor (Self-Rewriting Params)

class QuantumActor(nn.Module):
def init(self, state_dim, action_dim, n_qubits=6, n_layers=3): # Upgraded dims
super().init()
self.pre_nn = nn.Linear(state_dim, n_qubits)
self.n_qubits = n_qubits
self.n_layers = n_layers
self.params = nn.Parameter(torch.randn(n_layers * n_qubits * 2))
self.post_nn = nn.Linear(n_qubits, action_dim)
self.meta_opt = optim.SGD(self.parameters(), lr=1e-5) # For self-evolution

def forward(self, state):
    features = torch.tanh(self.pre_nn(state))
    batch_size = features.shape[0]
    exp_vals = []
    for b in range(batch_size):
        psi = tensor([basis(2, 0) for _ in range(self.n_qubits)])
        for q in range(self.n_qubits):
            psi = (Qobj([[1,1],[1,-1]]) / np.sqrt(2)) * psi  # Hadamard
        p_idx = 0
        for layer in range(self.n_layers):
            for q in range(self.n_qubits):
                ry = Qobj([[cos(self.params[p_idx]/2), -sin(self.params[p_idx]/2)],
                           [sin(self.params[p_idx]/2), cos(self.params[p_idx]/2)]])
                rz = Qobj([[np.exp(-1j*self.params[p_idx+1]/2), 0],
                           [0, np.exp(1j*self.params[p_idx+1]/2)]])
                gate = tensor([rz * ry if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
                psi = gate * psi
                p_idx += 2
            for q in range(self.n_qubits - 1):
                cz = Qobj([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,-1]]).reshape(4,4)
                gate = tensor([cz if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
                psi = gate * psi
            for q in range(self.n_qubits):
                rx = Qobj([[cos(features[b,q]/2), -1j*sin(features[b,q]/2)],
                           [-1j*sin(features[b,q]/2), cos(features[b,q]/2)]])
                gate = tensor([rx if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
                psi = gate * psi
        exp_val = []
        for q in range(self.n_qubits):
            meas = tensor([sigmaz() if i == q else Qobj(np.eye(2)) for i in range(self.n_qubits)])
            exp_val.append((psi.dag() * meas * psi).real)
        exp_vals.append(exp_val)
    quantum_out = torch.tensor(exp_vals, dtype=torch.float32)
    logits = self.post_nn(quantum_out)
    return Categorical(logits=logits.softmax(dim=-1))

def self_evolve(self, fitness):  # Darwinian upgrade
    if fitness < 0.9:  # Mutate if low
        self.params.data += torch.randn_like(self.params) * 0.01  # Variation
        self.meta_opt.step()  # Select/refine

Critic Unchanged

Evolved PPO (with Darwinian Population)

class PPO:
def init(self, state_dim, action_dim, population_size=5, mutation_rate=0.05, **kwargs):
super().init() # Inherit base
self.population = [QuantumActor(state_dim, action_dim) for _ in range(population_size)]
self.critic = Critic(state_dim)
# ... (opts, buffer)
self.mutation_rate = mutation_rate

def update(self):
    # ... (base update on best actor)
    # Darwinian: Evaluate population, select/mutate
    fitnesses = [self.evaluate_actor(actor) for actor in self.population]
    best_idx = np.argmax(fitnesses)
    for i in range(len(self.population)):
        if i != best_idx and random.random() < self.mutation_rate:
            self.population[i].params.data = self.population[best_idx].params.data.clone() + torch.randn_like(self.population[i].params) * 0.02
    # Refine: Meta-opt best
    self.population[best_idx].self_evolve(fitnesses[best_idx])

def evaluate_actor(self, actor):
    # Sim reward over 10 episodes (refined)
    rewards = []
    for _ in range(10):
        state = env.reset()
        state_t = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        dist = actor(state_t)
        action = dist.sample().item()
        correction = np.zeros(env.n_qubits)
        correction[action] = 1
        _, reward, _ = env.step(correction)
        rewards.append(reward)
    return np.mean(rewards)

Training: Evolved Loop

env = SurfaceCodeEnv(size=5, p_error=0.03, mode='rotated') # Upgraded
state_dim = env.n_stab
action_dim = env.n_qubits
ppo = PPO(state_dim, action_dim)

for episode in range(2000): # Scaled
state = env.reset()
state_t = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
actor = ppo.population[episode % len(ppo.population)] # Cycle population
dist = actor(state_t)
action = dist.sample().item()
correction = np.zeros(env.n_qubits)
correction[action] = 1
next_state, reward, done = env.step(correction)
value = ppo.critic(state_t).item()
old_prob = dist.log_prob(torch.tensor(action)).item()
ppo.buffer.append((state, action, reward, old_prob, value, done))

if len(ppo.buffer) % 64 == 0:
    ppo.update()

if episode % 200 == 0:
    print(f"Episode {episode}: Avg Fitness {np.mean([ppo.evaluate_actor(a) for a in ppo.population]):.4f}")

Note: Integrate MWPM as baseline agent in population for hybrid evolution.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions