Add some TPS code and baselines

plainerman · plainerman · commit fb7158916064 · 2024-04-21T17:43:48.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 models_variational_gp/
+baselines/
 
 # Created by https://www.toptal.com/developers/gitignore/api/macos,python,pycharm+all
 # Edit at https://www.toptal.com/developers/gitignore?templates=macos,python,pycharm+all
diff --git a/tps.py b/tps.py
@@ -0,0 +1,163 @@
+import jax
+import jax.numpy as jnp
+from tqdm import tqdm
+
+MAX_STEPS = 1_000
+
+
+class System:
+    def __init__(self, start_state, target_state, step):
+        self.start_state = start_state
+        self.target_state = target_state
+        self.step = step
+
+
+def one_way_shooting(system, trajectory, fixed_length, key):
+    key = jax.random.split(key)
+
+    # pick a random point along the trajectory
+    point_idx = jax.random.randint(key[0], (1,), 1, len(trajectory) - 1)[0]
+    # pick a random direction, either forward or backward
+    direction = jax.random.randint(key[1], (1,), 0,  2)[0]
+
+    print(f'testing path with id={point_idx} and direction={"forward" if direction == 0 else "backward"}')
+
+    if direction == 0:
+        trajectory = trajectory[:point_idx + 1]
+
+    if direction == 1:
+        trajectory = trajectory[point_idx:][::-1]
+
+    steps = MAX_STEPS if fixed_length == 0 else fixed_length
+
+    key, iter_key = jax.random.split(key[2])
+    while len(trajectory) < steps:
+        key, iter_key = jax.random.split(key)
+        point = system.step(trajectory[-1], iter_key)
+        trajectory.append(point)
+
+        if jnp.isnan(point).any():
+            print('Nan detected!!!')
+            return False, trajectory
+
+        if system.start_state(trajectory[0]) and system.target_state(trajectory[-1]):
+            if fixed_length == 0 or len(trajectory) == fixed_length:
+                return True, trajectory
+            return False, trajectory
+
+        if system.target_state(trajectory[0]) and system.start_state(trajectory[-1]):
+            if fixed_length == 0 or len(trajectory) == fixed_length:
+                return True, trajectory[::-1]
+            return False, trajectory
+
+    return False, trajectory
+
+
+def two_way_shooting(system, trajectory, fixed_length, key):
+    key = jax.random.split(key)
+
+    # pick a random point along the trajectory
+    point_idx = jax.random.randint(key[0], (1,), 1, len(trajectory) - 1)[0]
+    point = trajectory[point_idx]
+    # simulate forward from the point until max_steps
+
+    steps = MAX_STEPS if fixed_length == 0 else fixed_length
+
+    key, iter_key = jax.random.split(key[1])
+    new_trajectory = [point]
+    while len(new_trajectory) < steps:
+        key, iter_key = jax.random.split(key)
+        point = system.step(new_trajectory[-1], iter_key)
+        new_trajectory.append(point)
+
+        if jnp.isnan(point).any():
+            print('Nan detected!!!')
+            return False, trajectory
+
+        if system.start_state(point) or system.target_state(point):
+            break
+
+    while len(new_trajectory) < steps:
+        key, iter_key = jax.random.split(key)
+        point = system.step(new_trajectory[0], iter_key)
+        new_trajectory.insert(0, point)
+
+        if jnp.isnan(point).any():
+            return False, trajectory
+
+        if system.start_state(point) or system.target_state(point):
+            break
+
+    # throw away the trajectory if it's not the right length
+    if fixed_length != 0 and len(new_trajectory) != fixed_length:
+        return False, trajectory
+
+    if system.start_state(new_trajectory[0]) and system.target_state(new_trajectory[-1]):
+        return True, new_trajectory
+
+    if system.target_state(new_trajectory[0]) and system.start_state(new_trajectory[-1]):
+        return True, new_trajectory[::-1]
+
+    return False, trajectory
+
+
+def mcmc_shooting(system, proposal, initial_trajectory, num_paths, key, fixed_length=0, warmup=50):
+    # pick an initial trajectory
+    trajectories = [initial_trajectory]
+
+    with tqdm(total=num_paths) as pbar:
+        while len(trajectories) <= num_paths + warmup:
+            key, iter_key, accept_key = jax.random.split(key, 3)
+            found, new_trajectory = proposal(system, trajectories[-1], fixed_length, iter_key)
+            if not found:
+                continue
+
+            ratio = len(trajectories[-1]) / len(new_trajectory)
+            # The first trajectory might have a very unreasonable length, so we skip it
+            if len(trajectories) == 1 or jax.random.uniform(accept_key, shape=(1,)) < ratio:
+                print('path accepted')
+                trajectories.append(new_trajectory)
+
+                if len(trajectories) > warmup:
+                    pbar.update(1)
+            else:
+                print('Rejected path')
+
+    return trajectories[warmup + 1:]
+
+
+def unguided_md(system, initial_point, num_paths, key, fixed_length=0):
+    trajectories = []
+    current_frame = initial_point.clone()
+    current_trajectory = []
+
+    with tqdm(total=num_paths) as pbar:
+        while len(trajectories) < num_paths:
+            key, iter_key = jax.random.split(key)
+            next_frame = system.step(current_frame, iter_key)
+
+            is_transition = not (system.start_state(next_frame) or system.target_state(next_frame))
+            if is_transition:
+                if len(current_trajectory) == 0:
+                    current_trajectory.append(current_frame)
+
+                if fixed_length != 0 and len(current_trajectory) > fixed_length:
+                    current_trajectory = []
+                    is_transition = False
+                else:
+                    current_trajectory.append(next_frame)
+            elif len(current_trajectory) > 0:
+                current_trajectory.append(next_frame)
+
+                if fixed_length == 0 or len(current_trajectory) == fixed_length:
+                    if system.start_state(current_trajectory[0]) and system.target_state(current_trajectory[-1]):
+                        trajectories.append(current_trajectory)
+                        pbar.update(1)
+                    elif system.target_state(current_trajectory[0]) and system.start_state(current_trajectory[-1]):
+                        trajectories.append(current_trajectory[::-1])
+                        pbar.update(1)
+                current_trajectory = []
+
+            current_frame = next_frame
+
+    return trajectories
diff --git a/tps_baseline.py b/tps_baseline.py
@@ -0,0 +1,226 @@
+import os
+import jax
+import numpy as np
+import matplotlib.pyplot as plt
+import jax.numpy as jnp
+import optax
+from flax import linen as nn
+from flax.training import train_state
+from tqdm import trange
+
+# install openmm (from conda)
+import openmm.app as app
+import openmm.unit as unit
+# install dmff (from source)
+from dmff import Hamiltonian, NeighborList
+# install mdtraj
+import mdtraj as md
+
+import tps
+# helper function for plotting in 2D
+from utils.PlotPathsAlanine_jax import PlotPathsAlanine
+from matplotlib import colors
+
+
+def human_format(num):
+    """https://stackoverflow.com/a/45846841/4417954"""
+    num = float('{:.3g}'.format(num))
+    if num >= 1:
+        magnitude = 0
+        while abs(num) >= 1000:
+            magnitude += 1
+            num /= 1000.0
+        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
+    else:
+        magnitude = 0
+        while abs(num) < 1:
+            magnitude += 1
+            num *= 1000.0
+        return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'm', 'µ', 'n', 'p', 'f'][magnitude])
+
+
+def interpolate(points, steps):
+    def interpolate_two_points(start, stop, steps):
+        t = jnp.linspace(0, 1, steps + 1).reshape(steps + 1, 1)
+        interpolated_tensors = jnp.array(start) * (1 - t) + jnp.array(stop) * t
+        return interpolated_tensors
+
+    step_size = steps // (len(points) - 1)
+    remaining = steps % (len(points) - 1)
+
+    interpolation = []
+    for i in range(len(points) - 1):
+        cur_step_size = step_size + (1 if i < remaining else 0)
+        current = interpolate_two_points(points[i], points[i + 1], cur_step_size)
+        interpolation.extend(current if i == 0 else current[1:])
+
+    return interpolation
+
+
+def phis_psis(position, mdtraj_topology):
+    traj = md.Trajectory(position.reshape(-1, mdtraj_topology.n_atoms, 3), mdtraj_topology)
+    phi = md.compute_phi(traj)[1].squeeze()
+    psi = md.compute_psi(traj)[1].squeeze()
+    return jnp.array([phi, psi]).T
+
+
+def ramachandran(samples, bins=100, path=None, paths=None):
+    if samples is not None:
+        plt.hist2d(samples[:, 0], samples[:, 1], bins=bins, norm=colors.LogNorm(), rasterized=True)
+    plt.xlim(-np.pi, np.pi)
+    plt.ylim(-np.pi, np.pi)
+
+    # set ticks
+    plt.gca().set_xticks([-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi])
+    plt.gca().set_xticklabels([r'$-\pi$', r'$-\frac {\pi} {2}$', '0', r'$\frac {\pi} {2}$', r'$\pi$'])
+
+    plt.gca().set_yticks([-np.pi, -np.pi / 2, 0, np.pi / 2, np.pi])
+    plt.gca().set_yticklabels([r'$-\pi$', r'$-\frac {\pi} {2}$', '0', r'$\frac {\pi} {2}$', r'$\pi$'])
+
+    plt.xlabel(r'$\phi$')
+    plt.ylabel(r'$\psi$')
+
+    plt.gca().set_aspect('equal', adjustable='box')
+
+    def draw_path(_path, **kwargs):
+        dist = jnp.sqrt(np.sum(jnp.diff(_path, axis=0) ** 2, axis=1))
+        mask = jnp.hstack([dist > jnp.pi, jnp.array([False])])
+        masked_path_x, masked_path_y = np.ma.MaskedArray(_path[:, 0], mask), np.ma.MaskedArray(_path[:, 1], mask)
+        plt.plot(masked_path_x, masked_path_y, **kwargs)
+
+
+    if path is not None:
+        draw_path(path, color='red')
+
+    if paths is not None:
+        for path in paths:
+            draw_path(path, color='blue')
+
+
+T = 2.0
+dt = 1.0 * unit.microsecond
+dt = dt.value_in_unit(unit.second)
+
+temp = 298.15
+temp = 10000  # TODO: remove this
+kbT = 1.380649 * 6.02214076 * 1e-3 * temp
+
+if __name__ == '__main__':
+    init_pdb = app.PDBFile("./files/AD_c7eq.pdb")
+    target_pdb = app.PDBFile("./files/AD_c7ax.pdb")
+    mdtraj_topology = md.Topology.from_openmm(init_pdb.topology)
+
+    savedir = f"baselines/alanine"
+    os.makedirs(savedir, exist_ok=True)
+
+    # Construct the mass matrix
+    mass = [a.element.mass.value_in_unit(unit.dalton) for a in init_pdb.topology.atoms()]
+    new_mass = []
+    for mass_ in mass:
+        for _ in range(3):
+            new_mass.append(mass_)
+    mass = jnp.array(new_mass)
+    # Obtain sigma, gamma is by default 1
+    sigma = jnp.sqrt(2 * kbT / mass)
+
+    # Initial and target shape [BS, 66]
+    A = jnp.array(init_pdb.getPositions(asNumpy=True).value_in_unit(unit.nanometer)).reshape(1, -1)
+    B = jnp.array(target_pdb.getPositions(asNumpy=True).value_in_unit(unit.nanometer)).reshape(1, -1)
+
+    # Initialize the potential energy with amber forcefields
+    ff = Hamiltonian('amber14/protein.ff14SB.xml', 'amber14/tip3p.xml')
+    potentials = ff.createPotential(init_pdb.topology,
+                                    nonbondedMethod=app.NoCutoff,
+                                    nonbondedCutoff=1.0 * unit.nanometers,
+                                    constraints=None,
+                                    ewaldErrorTolerance=0.0005)
+    # Create a box used when calling
+    # Calling U by U(x, box, pairs, ff.paramset.parameters), x is [22, 3] and output the energy, if it is batched, use vmap
+    box = np.array([[50.0, 0.0, 0.0], [0.0, 50.0, 0.0], [0.0, 0.0, 50.0]])
+    nbList = NeighborList(box, 4.0, potentials.meta["cov_map"])
+    nbList.allocate(init_pdb.getPositions(asNumpy=True).value_in_unit(unit.nanometer))
+    pairs = nbList.pairs
+
+
+    def U(_x):
+        _U = potentials.getPotentialFunc()
+
+        return _U(_x.reshape(22, 3), box, pairs, ff.paramset.parameters)
+
+
+    #TODO: we can introduce gamma here
+    def dUdx_fn(_x):
+        return jax.grad(lambda _x: U(_x).sum())(_x) / mass
+
+
+    dUdx_fn = jax.vmap(dUdx_fn)
+    dUdx_fn = jax.jit(dUdx_fn)
+
+    @jax.jit
+    def step(_x, _key):
+        """Perform one step of forward euler"""
+        return _x - dt * dUdx_fn(_x) + jnp.sqrt(dt) * sigma * jax.random.normal(_key, _x.shape)
+
+    key = jax.random.PRNGKey(1)
+
+    trajectory = [A]  # or [B]
+    _x = trajectory[-1]
+    steps = 20_000
+    for i in trange(steps):
+        key, iter_key = jax.random.split(key)
+        _x = step(_x, iter_key)
+        trajectory.append(_x)
+
+    trajectory = jnp.array(trajectory).reshape(-1, 66)
+    assert not jnp.isnan(trajectory).any()
+    trajectory_phi_psi = phis_psis(trajectory, mdtraj_topology)
+
+    trajs = None
+    for i in range(10000, 11000):
+        traj = md.load_pdb('./files/AD_c7eq.pdb')
+        traj.xyz = trajectory[i].reshape(22, 3)
+        if trajs is None:
+            trajs = traj
+        else:
+            trajs = trajs.join(traj)
+    trajs.save(f'{savedir}/ALDP_forward_euler.pdb')
+
+    plt.title(f"{human_format(steps)} steps @ {temp} K, dt = {human_format(dt)}s")
+    ramachandran(trajectory_phi_psi)
+    plt.show()
+
+
+    # TODO: this is work in progress. Get some baselines with tps
+    system = tps.System(
+        jax.jit(
+            lambda s: jnp.all(jnp.linalg.norm(A.reshape(-1, 22, 3) - s.reshape(-1, 22, 3), axis=2) <= 5e-2, axis=1)),
+        jax.jit(
+            lambda s: jnp.all(jnp.linalg.norm(B.reshape(-1, 22, 3) - s.reshape(-1, 22, 3), axis=2) <= 5e-2, axis=1)),
+        step
+    )
+
+    # initial_trajectory = [t.reshape(1, -1) for t in interpolate([A, B], 100)]
+
+    #
+    # for i in range(10):
+    #     key, iter_key = jax.random.split(key)
+    #
+    #     # ramachandran(None, path=phis_psis(jnp.vstack(initial_trajectory), mdtraj_topology))
+    #     # plt.show()
+    #
+    #
+    #     ok, trajectory = tps.one_way_shooting(system, initial_trajectory, 0, key)
+    #     trajectory = jnp.array(trajectory)
+    #     trajectory = phis_psis(trajectory, mdtraj_topology)
+    #     print('ok?', ok)
+    #
+    #     ramachandran(None, path=phis_psis(jnp.vstack(initial_trajectory), mdtraj_topology), paths=[trajectory])
+    #     plt.show()
+    #
+
+    # paths = tps.mcmc_shooting(system, tps.one_way_shooting, initial_trajectory, 10, key, warmup=0)
+    # paths = [jnp.array(p) for p in paths]
+    #
+    # print(paths)
+    # ramachandran(None, path=[phis_psis(p, mdtraj_topology) for p in paths][-1])
+    # plt.show()
diff --git a/utils/PlotPathsAlanine.py b/utils/PlotPathsAlanine.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`models_variational_gp/`
	`2`	`+baselines/`
`2`	`3`
`3`	`4`	`# Created by https://www.toptal.com/developers/gitignore/api/macos,python,pycharm+all`
`4`	`5`	`# Edit at https://www.toptal.com/developers/gitignore?templates=macos,python,pycharm+all`