diff --git a/controllers/mat_rapid/algorithms/__init__.py b/controllers/mat_rapid/algorithms/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/controllers/mat_rapid/algorithms/mat/__init__.py b/controllers/mat_rapid/algorithms/mat/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/controllers/mat_rapid/algorithms/mat/algorithm/ma_transformer.py b/controllers/mat_rapid/algorithms/mat/algorithm/ma_transformer.py
new file mode 100644
index 0000000..f879684
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/algorithm/ma_transformer.py
@@ -0,0 +1,312 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import math
+import numpy as np
+from torch.distributions import Categorical
+from algorithms.utils.util import check, init
+from algorithms.utils.transformer_act import discrete_autoregreesive_act
+from algorithms.utils.transformer_act import discrete_parallel_act
+from algorithms.utils.transformer_act import continuous_autoregreesive_act
+from algorithms.utils.transformer_act import continuous_parallel_act
+
+def init_(m, gain=0.01, activate=False):
+    if activate:
+        gain = nn.init.calculate_gain('relu')
+    return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=gain)
+
+
+class SelfAttention(nn.Module):
+
+    def __init__(self, n_embd, n_head, n_agent, masked=False):
+        super(SelfAttention, self).__init__()
+
+        assert n_embd % n_head == 0
+        self.masked = masked
+        self.n_head = n_head
+        # key, query, value projections for all heads
+        self.key = init_(nn.Linear(n_embd, n_embd))
+        self.query = init_(nn.Linear(n_embd, n_embd))
+        self.value = init_(nn.Linear(n_embd, n_embd))
+        # output projection
+        self.proj = init_(nn.Linear(n_embd, n_embd))
+        # if self.masked:
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        self.register_buffer("mask", torch.tril(torch.ones(n_agent + 1, n_agent + 1))
+                             .view(1, 1, n_agent + 1, n_agent + 1))
+
+        self.att_bp = None
+
+    def forward(self, key, value, query):
+        B, L, D = query.size()
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = self.key(key).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        q = self.query(query).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        v = self.value(value).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+
+        # causal attention: (B, nh, L, hs) x (B, nh, hs, L) -> (B, nh, L, L)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+
+        # self.att_bp = F.softmax(att, dim=-1)
+
+        if self.masked:
+            att = att.masked_fill(self.mask[:, :, :L, :L] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+
+        y = att @ v  # (B, nh, L, L) x (B, nh, L, hs) -> (B, nh, L, hs)
+        y = y.transpose(1, 2).contiguous().view(B, L, D)  # re-assemble all head outputs side by side
+
+        # output projection
+        y = self.proj(y)
+        return y
+
+
+class EncodeBlock(nn.Module):
+    """ an unassuming Transformer block """
+
+    def __init__(self, n_embd, n_head, n_agent):
+        super(EncodeBlock, self).__init__()
+
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+        # self.attn = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.attn = SelfAttention(n_embd, n_head, n_agent, masked=False)
+        self.mlp = nn.Sequential(
+            init_(nn.Linear(n_embd, 1 * n_embd), activate=True),
+            nn.GELU(),
+            init_(nn.Linear(1 * n_embd, n_embd))
+        )
+
+    def forward(self, x):
+        x = self.ln1(x + self.attn(x, x, x))
+        x = self.ln2(x + self.mlp(x))
+        return x
+
+
+class DecodeBlock(nn.Module):
+    """ an unassuming Transformer block """
+
+    def __init__(self, n_embd, n_head, n_agent):
+        super(DecodeBlock, self).__init__()
+
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+        self.ln3 = nn.LayerNorm(n_embd)
+        self.attn1 = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.attn2 = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.mlp = nn.Sequential(
+            init_(nn.Linear(n_embd, 1 * n_embd), activate=True),
+            nn.GELU(),
+            init_(nn.Linear(1 * n_embd, n_embd))
+        )
+
+    def forward(self, x, rep_enc):
+        x = self.ln1(x + self.attn1(x, x, x))
+        x = self.ln2(rep_enc + self.attn2(key=x, value=x, query=rep_enc))
+        x = self.ln3(x + self.mlp(x))
+        return x
+
+
+class Encoder(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, n_block, n_embd, n_head, n_agent, encode_state):
+        super(Encoder, self).__init__()
+
+        self.state_dim = state_dim
+        self.obs_dim = obs_dim
+        self.n_embd = n_embd
+        self.n_agent = n_agent
+        self.encode_state = encode_state
+        # self.agent_id_emb = nn.Parameter(torch.zeros(1, n_agent, n_embd))
+
+        self.state_encoder = nn.Sequential(nn.LayerNorm(state_dim),
+                                           init_(nn.Linear(state_dim, n_embd), activate=True), nn.GELU())
+        self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+
+        self.ln = nn.LayerNorm(n_embd)
+        self.blocks = nn.Sequential(*[EncodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+        self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, 1)))
+
+    def forward(self, state, obs):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        if self.encode_state:
+            state_embeddings = self.state_encoder(state)
+            x = state_embeddings
+        else:
+            obs_embeddings = self.obs_encoder(obs)
+            x = obs_embeddings
+
+        rep = self.blocks(self.ln(x))
+        v_loc = self.head(rep)
+
+        return v_loc, rep
+
+
+class Decoder(nn.Module):
+
+    def __init__(self, obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(Decoder, self).__init__()
+
+        self.action_dim = action_dim
+        self.n_embd = n_embd
+        self.dec_actor = dec_actor
+        self.share_actor = share_actor
+        self.action_type = action_type
+
+        if action_type != 'Discrete':
+            log_std = torch.ones(action_dim)
+            # log_std = torch.zeros(action_dim)
+            self.log_std = torch.nn.Parameter(log_std)
+            # self.log_std = torch.nn.Parameter(torch.zeros(action_dim))
+
+        if self.dec_actor:
+            if self.share_actor:
+                print("mac_dec!!!!!")
+                self.mlp = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                         init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                         init_(nn.Linear(n_embd, action_dim)))
+            else:
+                self.mlp = nn.ModuleList()
+                for n in range(n_agent):
+                    actor = nn.Sequential(nn.LayerNorm(obs_dim),
+                                          init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                          init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                          init_(nn.Linear(n_embd, action_dim)))
+                    self.mlp.append(actor)
+        else:
+            # self.agent_id_emb = nn.Parameter(torch.zeros(1, n_agent, n_embd))
+            if action_type == 'Discrete':
+                self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim + 1, n_embd, bias=False), activate=True),
+                                                    nn.GELU())
+            else:
+                self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim, n_embd), activate=True), nn.GELU())
+            self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                             init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+            self.ln = nn.LayerNorm(n_embd)
+            self.blocks = nn.Sequential(*[DecodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+            self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                      init_(nn.Linear(n_embd, action_dim)))
+
+    def zero_std(self, device):
+        if self.action_type != 'Discrete':
+            log_std = torch.zeros(self.action_dim).to(device)
+            self.log_std.data = log_std
+
+    # state, action, and return
+    def forward(self, action, obs_rep, obs):
+        # action: (batch, n_agent, action_dim), one-hot/logits?
+        # obs_rep: (batch, n_agent, n_embd)
+        if self.dec_actor:
+            if self.share_actor:
+                logit = self.mlp(obs)
+            else:
+                logit = []
+                for n in range(len(self.mlp)):
+                    logit_n = self.mlp[n](obs[:, n, :])
+                    logit.append(logit_n)
+                logit = torch.stack(logit, dim=1)
+        else:
+            action_embeddings = self.action_encoder(action)
+            x = self.ln(action_embeddings)
+            for block in self.blocks:
+                x = block(x, obs_rep)
+            logit = self.head(x)
+
+        return logit
+
+
+class MultiAgentTransformer(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, action_dim, n_agent,
+                 n_block, n_embd, n_head, encode_state=False, device=torch.device("cpu"),
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(MultiAgentTransformer, self).__init__()
+
+        self.n_agent = n_agent
+        self.action_dim = action_dim
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.action_type = action_type
+        self.device = device
+
+        # state unused
+        state_dim = 37
+
+        self.encoder = Encoder(state_dim, obs_dim, n_block, n_embd, n_head, n_agent, encode_state)
+        self.decoder = Decoder(obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                               self.action_type, dec_actor=dec_actor, share_actor=share_actor)
+        self.to(device)
+
+    def zero_std(self):
+        if self.action_type != 'Discrete':
+            self.decoder.zero_std(self.device)
+
+    def forward(self, state, obs, action, available_actions=None):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        # action: (batch, n_agent, 1)
+        # available_actions: (batch, n_agent, act_dim)
+
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        action = check(action).to(**self.tpdv)
+
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(state)[0]
+        v_loc, obs_rep = self.encoder(state, obs)
+        if self.action_type == 'Discrete':
+            action = action.long()
+            action_log, entropy = discrete_parallel_act(self.decoder, obs_rep, obs, action, batch_size,
+                                                        self.n_agent, self.action_dim, self.tpdv, available_actions)
+        else:
+            action_log, entropy = continuous_parallel_act(self.decoder, obs_rep, obs, action, batch_size,
+                                                          self.n_agent, self.action_dim, self.tpdv)
+
+        return action_log, v_loc, entropy
+
+    def get_actions(self, state, obs, available_actions=None, deterministic=False):
+        # state unused
+        ori_shape = np.shape(obs)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(obs)[0]
+        v_loc, obs_rep = self.encoder(state, obs)
+        if self.action_type == "Discrete":
+            output_action, output_action_log = discrete_autoregreesive_act(self.decoder, obs_rep, obs, batch_size,
+                                                                           self.n_agent, self.action_dim, self.tpdv,
+                                                                           available_actions, deterministic)
+        else:
+            output_action, output_action_log = continuous_autoregreesive_act(self.decoder, obs_rep, obs, batch_size,
+                                                                             self.n_agent, self.action_dim, self.tpdv,
+                                                                             deterministic)
+
+        return output_action, output_action_log, v_loc
+
+    def get_values(self, state, obs, available_actions=None):
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        v_tot, obs_rep = self.encoder(state, obs)
+        return v_tot
+
+
+
diff --git a/controllers/mat_rapid/algorithms/mat/algorithm/mat_decoder.py b/controllers/mat_rapid/algorithms/mat/algorithm/mat_decoder.py
new file mode 100644
index 0000000..4553bc7
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/algorithm/mat_decoder.py
@@ -0,0 +1,297 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import math
+import numpy as np
+from torch.distributions import Categorical, Normal
+from algorithms.utils.util import check, init
+
+
+def init_(m, gain=0.01, activate=False):
+    if activate:
+        gain = nn.init.calculate_gain('relu')
+    return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=gain)
+
+
+def discrete_autoregreesive_act(decoder, obs_rep, obs, batch_size, n_agent, action_dim, tpdv,
+                                available_actions=None, deterministic=False):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim + 1)).to(**tpdv)
+    shifted_action[:, 0, 0] = 1
+    output_action = torch.zeros((batch_size, n_agent, 1), dtype=torch.long)
+    output_action_log = torch.zeros_like(output_action, dtype=torch.float32)
+
+    for i in range(n_agent):
+        logit, v_loc = decoder(shifted_action, obs_rep, obs)
+        logit = logit[:, i, :]
+        if available_actions is not None:
+            logit[available_actions[:, i, :] == 0] = -1e10
+
+        distri = Categorical(logits=logit)
+        action = distri.probs.argmax(dim=-1) if deterministic else distri.sample()
+        action_log = distri.log_prob(action)
+
+        output_action[:, i, :] = action.unsqueeze(-1)
+        output_action_log[:, i, :] = action_log.unsqueeze(-1)
+        if i + 1 < n_agent:
+            shifted_action[:, i + 1, 1:] = F.one_hot(action, num_classes=action_dim)
+    return output_action, output_action_log, v_loc
+
+
+def discrete_parallel_act(decoder, obs_rep, obs, action, batch_size, n_agent, action_dim, tpdv,
+                          available_actions=None):
+    one_hot_action = F.one_hot(action.squeeze(-1), num_classes=action_dim)  # (batch, n_agent, action_dim)
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim + 1)).to(**tpdv)
+    shifted_action[:, 0, 0] = 1
+    shifted_action[:, 1:, 1:] = one_hot_action[:, :-1, :]
+    logit, v_loc = decoder(shifted_action, obs_rep, obs)
+    if available_actions is not None:
+        logit[available_actions == 0] = -1e10
+
+    distri = Categorical(logits=logit)
+    action_log = distri.log_prob(action.squeeze(-1)).unsqueeze(-1)
+    entropy = distri.entropy().unsqueeze(-1)
+    return action_log, entropy, v_loc
+
+
+def continuous_autoregreesive_act(decoder, obs_rep, obs, batch_size, n_agent, action_dim, tpdv,
+                                  deterministic=False):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim)).to(**tpdv)
+    output_action = torch.zeros((batch_size, n_agent, action_dim), dtype=torch.float32)
+    output_action_log = torch.zeros_like(output_action, dtype=torch.float32)
+
+    for i in range(n_agent):
+        act_mean, v_loc = decoder(shifted_action, obs_rep, obs)
+        act_mean = act_mean[:, i, :]
+        action_std = torch.sigmoid(decoder.log_std) * 0.5
+
+        # log_std = torch.zeros_like(act_mean).to(**tpdv) + decoder.log_std
+        # distri = Normal(act_mean, log_std.exp())
+        distri = Normal(act_mean, action_std)
+        action = act_mean if deterministic else distri.sample()
+        action_log = distri.log_prob(action)
+
+        output_action[:, i, :] = action
+        output_action_log[:, i, :] = action_log
+        if i + 1 < n_agent:
+            shifted_action[:, i + 1, :] = action
+
+        # print("act_mean: ", act_mean)
+        # print("action: ", action)
+
+    return output_action, output_action_log, v_loc
+
+
+def continuous_parallel_act(decoder, obs_rep, obs, action, batch_size, n_agent, action_dim, tpdv):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim)).to(**tpdv)
+    shifted_action[:, 1:, :] = action[:, :-1, :]
+
+    act_mean, v_loc = decoder(shifted_action, obs_rep, obs)
+    action_std = torch.sigmoid(decoder.log_std) * 0.5
+    distri = Normal(act_mean, action_std)
+
+    # log_std = torch.zeros_like(act_mean).to(**tpdv) + decoder.log_std
+    # distri = Normal(act_mean, log_std.exp())
+
+    action_log = distri.log_prob(action)
+    entropy = distri.entropy()
+    return action_log, entropy, v_loc
+
+
+
+class SelfAttention(nn.Module):
+
+    def __init__(self, n_embd, n_head, n_agent, masked=False):
+        super(SelfAttention, self).__init__()
+
+        assert n_embd % n_head == 0
+        self.masked = masked
+        self.n_head = n_head
+        # key, query, value projections for all heads
+        self.key = init_(nn.Linear(n_embd, n_embd))
+        self.query = init_(nn.Linear(n_embd, n_embd))
+        self.value = init_(nn.Linear(n_embd, n_embd))
+        # output projection
+        self.proj = init_(nn.Linear(n_embd, n_embd))
+        # if self.masked:
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        self.register_buffer("mask", torch.tril(torch.ones(n_agent + 1, n_agent + 1))
+                             .view(1, 1, n_agent + 1, n_agent + 1))
+
+        self.att_bp = None
+
+    def forward(self, key, value, query):
+        B, L, D = query.size()
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = self.key(key).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        q = self.query(query).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        v = self.value(value).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+
+        # causal attention: (B, nh, L, hs) x (B, nh, hs, L) -> (B, nh, L, L)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+
+        # self.att_bp = F.softmax(att, dim=-1)
+
+        if self.masked:
+            att = att.masked_fill(self.mask[:, :, :L, :L] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+
+        y = att @ v  # (B, nh, L, L) x (B, nh, L, hs) -> (B, nh, L, hs)
+        y = y.transpose(1, 2).contiguous().view(B, L, D)  # re-assemble all head outputs side by side
+
+        # output projection
+        y = self.proj(y)
+        return y
+
+
+class DecodeBlock(nn.Module):
+
+    def __init__(self, n_embd, n_head, n_agent):
+        super(DecodeBlock, self).__init__()
+
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+        self.ln3 = nn.LayerNorm(n_embd)
+        self.attn1 = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.attn2 = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.mlp = nn.Sequential(
+            init_(nn.Linear(n_embd, 1 * n_embd), activate=True),
+            nn.GELU(),
+            init_(nn.Linear(1 * n_embd, n_embd))
+        )
+
+    def forward(self, x, rep_enc):
+        x = self.ln1(x + self.attn1(x, x, x))
+        x = self.ln2(rep_enc + self.attn2(key=x, value=x, query=rep_enc))
+        x = self.ln3(x + self.mlp(x))
+        return x
+
+
+class Decoder(nn.Module):
+
+    def __init__(self, obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(Decoder, self).__init__()
+
+        self.action_dim = action_dim
+        self.n_embd = n_embd
+        self.dec_actor = dec_actor
+        self.share_actor = share_actor
+        self.action_type = action_type
+
+        if action_type == 'Discrete':
+            self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim + 1, n_embd, bias=False), activate=True),
+                                                nn.GELU())
+        else:
+            log_std = torch.ones(action_dim)
+            # log_std = torch.zeros(action_dim)
+            self.log_std = torch.nn.Parameter(log_std)
+            # self.log_std = torch.nn.Parameter(torch.zeros(action_dim))
+            self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim, n_embd), activate=True), nn.GELU())
+        self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+        self.ln = nn.LayerNorm(n_embd)
+        self.blocks = nn.Sequential(*[DecodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+        self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, action_dim)))
+        self.val_head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                      init_(nn.Linear(n_embd, 1)))
+
+    def zero_std(self, device):
+        if self.action_type != 'Discrete':
+            log_std = torch.zeros(self.action_dim).to(device)
+            self.log_std.data = log_std
+
+    # state, action, and return
+    def forward(self, action, obs_rep, obs):
+        # action: (batch, n_agent, action_dim), one-hot/logits?
+        # obs_rep: (batch, n_agent, n_embd)
+        obs_embeddings = self.obs_encoder(obs)
+        action_embeddings = self.action_encoder(action)
+        x = action_embeddings
+        x = self.ln(x)
+        for block in self.blocks:
+            x = block(x, obs_embeddings)
+        logit = self.head(x)
+        val = self.val_head(x)
+
+        return logit, val
+
+
+class MultiAgentDecoder(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, action_dim, n_agent,
+                 n_block, n_embd, n_head, encode_state=False, device=torch.device("cpu"),
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(MultiAgentDecoder, self).__init__()
+
+        self.n_agent = n_agent
+        self.action_dim = action_dim
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.action_type = action_type
+        self.device = device
+
+        self.decoder = Decoder(obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                               self.action_type, dec_actor=dec_actor, share_actor=share_actor)
+        self.to(device)
+
+    def zero_std(self):
+        if self.action_type != 'Discrete':
+            self.decoder.zero_std(self.device)
+
+    def forward(self, state, obs, action, available_actions=None):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        # action: (batch, n_agent, 1)
+        # available_actions: (batch, n_agent, act_dim)
+
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        action = check(action).to(**self.tpdv)
+
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(state)[0]
+        if self.action_type == 'Discrete':
+            action = action.long()
+            action_log, entropy, v_loc = discrete_parallel_act(self.decoder, None, obs, action, batch_size,
+                                                        self.n_agent, self.action_dim, self.tpdv, available_actions)
+        else:
+            action_log, entropy, v_loc = continuous_parallel_act(self.decoder, None, obs, action, batch_size,
+                                                          self.n_agent, self.action_dim, self.tpdv)
+        return action_log, v_loc, entropy
+
+    def get_actions(self, state, obs, available_actions=None, deterministic=False):
+        # state unused
+        ori_shape = np.shape(obs)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(obs)[0]
+        if self.action_type == "Discrete":
+            output_action, output_action_log, v_loc = discrete_autoregreesive_act(self.decoder, None, obs, batch_size,
+                                                                           self.n_agent, self.action_dim, self.tpdv,
+                                                                           available_actions, deterministic)
+        else:
+            output_action, output_action_log, v_loc = continuous_autoregreesive_act(self.decoder, None, obs, batch_size,
+                                                                             self.n_agent, self.action_dim, self.tpdv,
+                                                                             deterministic)
+
+        return output_action, output_action_log, v_loc
+
+    def get_values(self, state, obs, available_actions=None):
+        _, __, v_loc = self.get_actions(state, obs, available_actions)
+
+        return v_loc
+
+
+
diff --git a/controllers/mat_rapid/algorithms/mat/algorithm/mat_encoder.py b/controllers/mat_rapid/algorithms/mat/algorithm/mat_encoder.py
new file mode 100644
index 0000000..10fa7e6
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/algorithm/mat_encoder.py
@@ -0,0 +1,240 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import math
+import numpy as np
+from torch.distributions import Categorical, Normal
+from algorithms.utils.util import check, init
+from algorithms.utils.transformer_act import discrete_autoregreesive_act
+from algorithms.utils.transformer_act import discrete_parallel_act
+from algorithms.utils.transformer_act import continuous_autoregreesive_act
+from algorithms.utils.transformer_act import continuous_parallel_act
+
+def init_(m, gain=0.01, activate=False):
+    if activate:
+        gain = nn.init.calculate_gain('relu')
+    return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=gain)
+
+
+class SelfAttention(nn.Module):
+
+    def __init__(self, n_embd, n_head, n_agent, masked=False):
+        super(SelfAttention, self).__init__()
+
+        assert n_embd % n_head == 0
+        self.masked = masked
+        self.n_head = n_head
+        # key, query, value projections for all heads
+        self.key = init_(nn.Linear(n_embd, n_embd))
+        self.query = init_(nn.Linear(n_embd, n_embd))
+        self.value = init_(nn.Linear(n_embd, n_embd))
+        # output projection
+        self.proj = init_(nn.Linear(n_embd, n_embd))
+        # if self.masked:
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        self.register_buffer("mask", torch.tril(torch.ones(n_agent + 1, n_agent + 1))
+                             .view(1, 1, n_agent + 1, n_agent + 1))
+
+        self.att_bp = None
+
+    def forward(self, key, value, query):
+        B, L, D = query.size()
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = self.key(key).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        q = self.query(query).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+        v = self.value(value).view(B, L, self.n_head, D // self.n_head).transpose(1, 2)  # (B, nh, L, hs)
+
+        # causal attention: (B, nh, L, hs) x (B, nh, hs, L) -> (B, nh, L, L)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+
+        # self.att_bp = F.softmax(att, dim=-1)
+
+        if self.masked:
+            att = att.masked_fill(self.mask[:, :, :L, :L] == 0, float('-inf'))
+        att = F.softmax(att, dim=-1)
+
+        y = att @ v  # (B, nh, L, L) x (B, nh, L, hs) -> (B, nh, L, hs)
+        y = y.transpose(1, 2).contiguous().view(B, L, D)  # re-assemble all head outputs side by side
+
+        # output projection
+        y = self.proj(y)
+        return y
+
+
+class EncodeBlock(nn.Module):
+    """ an unassuming Transformer block """
+
+    def __init__(self, n_embd, n_head, n_agent):
+        super(EncodeBlock, self).__init__()
+
+        self.ln1 = nn.LayerNorm(n_embd)
+        self.ln2 = nn.LayerNorm(n_embd)
+        # self.attn = SelfAttention(n_embd, n_head, n_agent, masked=True)
+        self.attn = SelfAttention(n_embd, n_head, n_agent, masked=False)
+        self.mlp = nn.Sequential(
+            init_(nn.Linear(n_embd, 1 * n_embd), activate=True),
+            nn.GELU(),
+            init_(nn.Linear(1 * n_embd, n_embd))
+        )
+
+    def forward(self, x):
+        x = self.ln1(x + self.attn(x, x, x))
+        x = self.ln2(x + self.mlp(x))
+        return x
+
+
+class Encoder(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, action_dim, n_block, n_embd,
+                 n_head, n_agent, encode_state, action_type='Discrete'):
+        super(Encoder, self).__init__()
+
+        self.state_dim = state_dim
+        self.obs_dim = obs_dim
+        self.action_dim = action_dim
+        self.n_embd = n_embd
+        self.n_agent = n_agent
+        self.encode_state = encode_state
+        self.action_type = action_type
+
+        self.state_encoder = nn.Sequential(nn.LayerNorm(state_dim),
+                                           init_(nn.Linear(state_dim, n_embd), activate=True), nn.GELU())
+        self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+
+        self.ln = nn.LayerNorm(n_embd)
+        self.blocks = nn.Sequential(*[EncodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+        self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, 1)))
+        self.act_head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, action_dim)))
+        if action_type != 'Discrete':
+            log_std = torch.ones(action_dim)
+            # log_std = torch.zeros(action_dim)
+            self.log_std = torch.nn.Parameter(log_std)
+            # self.log_std = torch.nn.Parameter(torch.zeros(action_dim))
+
+    def zero_std(self, device):
+        if self.action_type != 'Discrete':
+            log_std = torch.zeros(self.action_dim).to(device)
+            self.log_std.data = log_std
+
+    def forward(self, state, obs):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        if self.encode_state:
+            state_embeddings = self.state_encoder(state)
+            x = state_embeddings
+        else:
+            obs_embeddings = self.obs_encoder(obs)
+            x = obs_embeddings
+
+        rep = self.blocks(self.ln(x))
+        v_loc = self.head(rep)
+        logit = self.act_head(rep)
+
+        return v_loc, rep, logit
+
+
+class MultiAgentEncoder(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, action_dim, n_agent,
+                 n_block, n_embd, n_head, encode_state=False, device=torch.device("cpu"),
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(MultiAgentEncoder, self).__init__()
+
+        self.n_agent = n_agent
+        self.action_dim = action_dim
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.action_type = action_type
+        self.device = device
+
+        # state unused
+        state_dim = 37
+
+        self.encoder = Encoder(state_dim, obs_dim, action_dim, n_block, n_embd, n_head, n_agent, encode_state,
+                               action_type=self.action_type)
+        self.to(device)
+
+    def zero_std(self):
+        if self.action_type != 'Discrete':
+            self.encoder.zero_std(self.device)
+
+    def forward(self, state, obs, action, available_actions=None):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        # action: (batch, n_agent, 1)
+        # available_actions: (batch, n_agent, act_dim)
+
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        action = check(action).to(**self.tpdv)
+
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(state)[0]
+        v_loc, obs_rep, logit = self.encoder(state, obs)
+        if self.action_type == 'Discrete':
+            action = action.long()
+            if available_actions is not None:
+                logit[available_actions == 0] = -1e10
+
+            distri = Categorical(logits=logit)
+            action_log = distri.log_prob(action.squeeze(-1)).unsqueeze(-1)
+            entropy = distri.entropy().unsqueeze(-1)
+        else:
+            act_mean = logit
+            action_std = torch.sigmoid(self.encoder.log_std) * 0.5
+            distri = Normal(act_mean, action_std)
+            action_log = distri.log_prob(action)
+            entropy = distri.entropy()
+
+        return action_log, v_loc, entropy
+
+    def get_actions(self, state, obs, available_actions=None, deterministic=False):
+        # state unused
+        ori_shape = np.shape(obs)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        v_loc, obs_rep, logit = self.encoder(state, obs)
+        if self.action_type == "Discrete":
+            if available_actions is not None:
+                logit[available_actions == 0] = -1e10
+
+            distri = Categorical(logits=logit)
+            output_action = distri.probs.argmax(dim=-1) if deterministic else distri.sample()
+            output_action_log = distri.log_prob(output_action)
+            output_action = output_action.unsqueeze(-1)
+            output_action_log = output_action_log.unsqueeze(-1)
+        else:
+            act_mean = logit
+            action_std = torch.sigmoid(self.encoder.log_std) * 0.5
+            distri = Normal(act_mean, action_std)
+            output_action = act_mean if deterministic else distri.sample()
+            output_action_log = distri.log_prob(output_action)
+
+        return output_action, output_action_log, v_loc
+
+    def get_values(self, state, obs, available_actions=None):
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        v_tot, _, _ = self.encoder(state, obs)
+        return v_tot
+
+
+
diff --git a/controllers/mat_rapid/algorithms/mat/algorithm/mat_gru.py b/controllers/mat_rapid/algorithms/mat/algorithm/mat_gru.py
new file mode 100644
index 0000000..141a671
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/algorithm/mat_gru.py
@@ -0,0 +1,188 @@
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import math
+import numpy as np
+from torch.distributions import Categorical
+from algorithms.utils.util import check, init
+from algorithms.utils.transformer_act import discrete_autoregreesive_act
+from algorithms.utils.transformer_act import discrete_parallel_act
+from algorithms.utils.transformer_act import continuous_autoregreesive_act
+from algorithms.utils.transformer_act import continuous_parallel_act
+
+
+def init_(m, gain=0.01, activate=False):
+    if activate:
+        gain = nn.init.calculate_gain('relu')
+    return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=gain)
+
+
+class Encoder(nn.Module):
+
+    def __init__(self, state_dim, obs_dim, n_block, n_embd, n_head, n_agent, encode_state):
+        super(Encoder, self).__init__()
+
+        self.state_dim = state_dim
+        self.obs_dim = obs_dim
+        self.n_embd = n_embd
+        self.n_agent = n_agent
+        self.encode_state = encode_state
+
+        self.state_encoder = nn.Sequential(nn.LayerNorm(state_dim),
+                                           init_(nn.Linear(state_dim, n_embd), activate=True), nn.GELU())
+        self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+
+        self.ln = nn.LayerNorm(n_embd)
+        # self.blocks = nn.Sequential(*[EncodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+        self.gru = nn.GRU(n_embd, n_embd, num_layers=2, batch_first=True)
+        self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, 1)))
+
+    def forward(self, state, obs):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        obs_embeddings = self.obs_encoder(obs)
+        x = obs_embeddings
+
+        rep, _ = self.gru(self.ln(x))
+        v_loc = self.head(rep)
+
+        return v_loc, rep
+
+
+class Decoder(nn.Module):
+
+    def __init__(self, obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(Decoder, self).__init__()
+
+        self.action_dim = action_dim
+        self.n_embd = n_embd
+        self.dec_actor = dec_actor
+        self.share_actor = share_actor
+        self.action_type = action_type
+
+        if action_type == 'Discrete':
+            self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim + 1, n_embd, bias=False), activate=True),
+                                                nn.GELU())
+        else:
+            log_std = torch.ones(action_dim)
+            # log_std = torch.zeros(action_dim)
+            self.log_std = torch.nn.Parameter(log_std)
+            # self.log_std = torch.nn.Parameter(torch.zeros(action_dim))
+            self.action_encoder = nn.Sequential(init_(nn.Linear(action_dim, n_embd), activate=True), nn.GELU())
+        self.obs_encoder = nn.Sequential(nn.LayerNorm(obs_dim),
+                                         init_(nn.Linear(obs_dim, n_embd), activate=True), nn.GELU())
+        self.ln = nn.LayerNorm(n_embd)
+        # self.blocks = nn.Sequential(*[DecodeBlock(n_embd, n_head, n_agent) for _ in range(n_block)])
+        self.gru = nn.GRU(n_embd, n_embd, num_layers=2, batch_first=True)
+        self.head = nn.Sequential(init_(nn.Linear(n_embd, n_embd), activate=True), nn.GELU(), nn.LayerNorm(n_embd),
+                                  init_(nn.Linear(n_embd, action_dim)))
+
+    def zero_std(self, device):
+        if self.action_type != 'Discrete':
+            log_std = torch.zeros(self.action_dim).to(device)
+            self.log_std.data = log_std
+
+    # state, action, and return
+    def forward(self, action, obs_rep, obs):
+        # action: (batch, n_agent, action_dim), one-hot/logits?
+        # obs_rep: (batch, n_agent, n_embd)
+        action_embeddings = self.action_encoder(action)
+        x = action_embeddings
+        x += obs_rep
+        x, _ = self.gru(self.ln(x))
+        logit = self.head(x)
+
+        return logit
+
+
+class MultiAgentGRU(nn.Module):
+    def __init__(self, state_dim, obs_dim, action_dim, n_agent,
+                 n_block, n_embd, n_head, encode_state=False, device=torch.device("cpu"),
+                 action_type='Discrete', dec_actor=False, share_actor=False):
+        super(MultiAgentGRU, self).__init__()
+
+        self.n_agent = n_agent
+        self.action_dim = action_dim
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.action_type = action_type
+        self.device = device
+
+        # state unused
+        state_dim = 37
+
+        self.encoder = Encoder(state_dim, obs_dim, n_block, n_embd, n_head, n_agent, encode_state)
+        self.decoder = Decoder(obs_dim, action_dim, n_block, n_embd, n_head, n_agent,
+                               self.action_type, dec_actor=dec_actor, share_actor=share_actor)
+        self.to(device)
+
+    def zero_std(self):
+        if self.action_type != 'Discrete':
+            self.decoder.zero_std(self.device)
+
+    def forward(self, state, obs, action, available_actions=None):
+        # state: (batch, n_agent, state_dim)
+        # obs: (batch, n_agent, obs_dim)
+        # action: (batch, n_agent, 1)
+        # available_actions: (batch, n_agent, act_dim)
+
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        action = check(action).to(**self.tpdv)
+
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(state)[0]
+        v_loc, obs_rep = self.encoder(state, obs)
+        if self.action_type == 'Discrete':
+            action = action.long()
+            action_log, entropy = discrete_parallel_act(self.decoder, obs_rep, obs, action, batch_size,
+                                                        self.n_agent, self.action_dim, self.tpdv, available_actions)
+        else:
+            action_log, entropy = continuous_parallel_act(self.decoder, obs_rep, obs, action, batch_size,
+                                                          self.n_agent, self.action_dim, self.tpdv)
+
+        return action_log, v_loc, entropy
+
+    def get_actions(self, state, obs, available_actions=None, deterministic=False):
+        # state unused
+        ori_shape = np.shape(obs)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        if available_actions is not None:
+            available_actions = check(available_actions).to(**self.tpdv)
+
+        batch_size = np.shape(obs)[0]
+        v_loc, obs_rep = self.encoder(state, obs)
+        if self.action_type == "Discrete":
+            output_action, output_action_log = discrete_autoregreesive_act(self.decoder, obs_rep, obs, batch_size,
+                                                                           self.n_agent, self.action_dim, self.tpdv,
+                                                                           available_actions, deterministic)
+        else:
+            output_action, output_action_log = continuous_autoregreesive_act(self.decoder, obs_rep, obs, batch_size,
+                                                                             self.n_agent, self.action_dim, self.tpdv,
+                                                                             deterministic)
+
+        return output_action, output_action_log, v_loc
+
+    def get_values(self, state, obs, available_actions=None):
+        # state unused
+        ori_shape = np.shape(state)
+        state = np.zeros((*ori_shape[:-1], 37), dtype=np.float32)
+
+        state = check(state).to(**self.tpdv)
+        obs = check(obs).to(**self.tpdv)
+        v_tot, obs_rep = self.encoder(state, obs)
+        return v_tot
+
+
+
diff --git a/controllers/mat_rapid/algorithms/mat/algorithm/transformer_policy.py b/controllers/mat_rapid/algorithms/mat/algorithm/transformer_policy.py
new file mode 100644
index 0000000..5792c71
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/algorithm/transformer_policy.py
@@ -0,0 +1,228 @@
+import torch
+import numpy as np
+from utils.util import update_linear_schedule
+from utils.util import get_shape_from_obs_space, get_shape_from_act_space
+from algorithms.utils.util import check
+from algorithms.mat.algorithm.ma_transformer import MultiAgentTransformer
+
+
+class TransformerPolicy:
+    """
+    MAT Policy  class. Wraps actor and critic networks to compute actions and value function predictions.
+
+    :param args: (argparse.Namespace) arguments containing relevant model and policy information.
+    :param obs_space: (gym.Space) observation space.
+    :param cent_obs_space: (gym.Space) value function input space (centralized input for MAPPO, decentralized for IPPO).
+    :param action_space: (gym.Space) action space.
+    :param device: (torch.device) specifies the device to run on (cpu/gpu).
+    """
+
+    def __init__(self, args, obs_space, cent_obs_space, act_space, num_agents, device=torch.device("cpu")):
+        self.device = device
+        self.algorithm_name = args.algorithm_name
+        self.lr = args.lr
+        self.opti_eps = args.opti_eps
+        self.weight_decay = args.weight_decay
+        self._use_policy_active_masks = args.use_policy_active_masks
+        if act_space.__class__.__name__ == 'Box':
+            self.action_type = 'Continuous'
+        else:
+            self.action_type = 'Discrete'
+
+        self.obs_dim = get_shape_from_obs_space(obs_space)[0]
+        self.share_obs_dim = get_shape_from_obs_space(cent_obs_space)[0]
+        if self.action_type == 'Discrete':
+            self.act_dim = act_space.n
+            self.act_num = 1
+        else:
+            print("act high: ", act_space.high)
+            self.act_dim = act_space.shape[0]
+            self.act_num = self.act_dim
+
+        print("obs_dim: ", self.obs_dim)
+        print("share_obs_dim: ", self.share_obs_dim)
+        print("act_dim: ", self.act_dim)
+
+        self.num_agents = num_agents
+        self.tpdv = dict(dtype=torch.float32, device=device)
+
+        if self.algorithm_name in ["mat", "mat_dec"]:
+            from algorithms.mat.algorithm.ma_transformer import MultiAgentTransformer as MAT
+        elif self.algorithm_name == "mat_gru":
+            from mat.algorithms.mat.algorithm.mat_gru import MultiAgentGRU as MAT
+        elif self.algorithm_name == "mat_decoder":
+            from mat.algorithms.mat.algorithm.mat_decoder import MultiAgentDecoder as MAT
+        elif self.algorithm_name == "mat_encoder":
+            from mat.algorithms.mat.algorithm.mat_encoder import MultiAgentEncoder as MAT
+        else:
+            raise NotImplementedError
+
+        self.transformer = MAT(self.share_obs_dim, self.obs_dim, self.act_dim, num_agents,
+                               n_block=args.n_block, n_embd=args.n_embd, n_head=args.n_head,
+                               encode_state=args.encode_state, device=device,
+                               action_type=self.action_type, dec_actor=args.dec_actor,
+                               share_actor=args.share_actor)
+        if args.env_name == "hands":
+            self.transformer.zero_std()
+
+        # count the volume of parameters of model
+        # Total_params = 0
+        # Trainable_params = 0
+        # NonTrainable_params = 0
+        # for param in self.transformer.parameters():
+        #     mulValue = np.prod(param.size())
+        #     Total_params += mulValue
+        #     if param.requires_grad:
+        #         Trainable_params += mulValue
+        #     else:
+        #         NonTrainable_params += mulValue
+        # print(f'Total params: {Total_params}')
+        # print(f'Trainable params: {Trainable_params}')
+        # print(f'Non-trainable params: {NonTrainable_params}')
+
+        self.optimizer = torch.optim.Adam(self.transformer.parameters(),
+                                          lr=self.lr, eps=self.opti_eps,
+                                          weight_decay=self.weight_decay)
+
+    def lr_decay(self, episode, episodes):
+        """
+        Decay the actor and critic learning rates.
+        :param episode: (int) current training episode.
+        :param episodes: (int) total number of training episodes.
+        """
+        update_linear_schedule(self.optimizer, episode, episodes, self.lr)
+
+    def get_actions(self, cent_obs, obs, rnn_states_actor, rnn_states_critic, masks, available_actions=None,
+                    deterministic=False):
+        """
+        Compute actions and value function predictions for the given inputs.
+        :param cent_obs (np.ndarray): centralized input to the critic.
+        :param obs (np.ndarray): local agent inputs to the actor.
+        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
+        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
+        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
+        :param available_actions: (np.ndarray) denotes which actions are available to agent
+                                  (if None, all actions available)
+        :param deterministic: (bool) whether the action should be mode of distribution or should be sampled.
+
+        :return values: (torch.Tensor) value function predictions.
+        :return actions: (torch.Tensor) actions to take.
+        :return action_log_probs: (torch.Tensor) log probabilities of chosen actions.
+        :return rnn_states_actor: (torch.Tensor) updated actor network RNN states.
+        :return rnn_states_critic: (torch.Tensor) updated critic network RNN states.
+        """
+
+        cent_obs = cent_obs.reshape(-1, self.num_agents, self.share_obs_dim)
+        obs = obs.reshape(-1, self.num_agents, self.obs_dim)
+        if available_actions is not None:
+            available_actions = available_actions.reshape(-1, self.num_agents, self.act_dim)
+
+        actions, action_log_probs, values = self.transformer.get_actions(cent_obs,
+                                                                         obs,
+                                                                         available_actions,
+                                                                         deterministic)
+
+        actions = actions.view(-1, self.act_num)
+        action_log_probs = action_log_probs.view(-1, self.act_num)
+        values = values.view(-1, 1)
+
+        # unused, just for compatibility
+        rnn_states_actor = check(rnn_states_actor).to(**self.tpdv)
+        rnn_states_critic = check(rnn_states_critic).to(**self.tpdv)
+        return values, actions, action_log_probs, rnn_states_actor, rnn_states_critic
+
+    def get_values(self, cent_obs, obs, rnn_states_critic, masks, available_actions=None):
+        """
+        Get value function predictions.
+        :param cent_obs (np.ndarray): centralized input to the critic.
+        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
+        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
+
+        :return values: (torch.Tensor) value function predictions.
+        """
+
+        cent_obs = cent_obs.reshape(-1, self.num_agents, self.share_obs_dim)
+        obs = obs.reshape(-1, self.num_agents, self.obs_dim)
+        if available_actions is not None:
+            available_actions = available_actions.reshape(-1, self.num_agents, self.act_dim)
+
+        values = self.transformer.get_values(cent_obs, obs, available_actions)
+
+        values = values.view(-1, 1)
+
+        return values
+
+    def evaluate_actions(self, cent_obs, obs, rnn_states_actor, rnn_states_critic, actions, masks,
+                         available_actions=None, active_masks=None):
+        """
+        Get action logprobs / entropy and value function predictions for actor update.
+        :param cent_obs (np.ndarray): centralized input to the critic.
+        :param obs (np.ndarray): local agent inputs to the actor.
+        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
+        :param rnn_states_critic: (np.ndarray) if critic is RNN, RNN states for critic.
+        :param actions: (np.ndarray) actions whose log probabilites and entropy to compute.
+        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
+        :param available_actions: (np.ndarray) denotes which actions are available to agent
+                                  (if None, all actions available)
+        :param active_masks: (torch.Tensor) denotes whether an agent is active or dead.
+
+        :return values: (torch.Tensor) value function predictions.
+        :return action_log_probs: (torch.Tensor) log probabilities of the input actions.
+        :return dist_entropy: (torch.Tensor) action distribution entropy for the given inputs.
+        """
+        cent_obs = cent_obs.reshape(-1, self.num_agents, self.share_obs_dim)
+        obs = obs.reshape(-1, self.num_agents, self.obs_dim)
+        actions = actions.reshape(-1, self.num_agents, self.act_num)
+        if available_actions is not None:
+            available_actions = available_actions.reshape(-1, self.num_agents, self.act_dim)
+
+        action_log_probs, values, entropy = self.transformer(cent_obs, obs, actions, available_actions)
+
+        action_log_probs = action_log_probs.view(-1, self.act_num)
+        values = values.view(-1, 1)
+        entropy = entropy.view(-1, self.act_num)
+
+        if self._use_policy_active_masks and active_masks is not None:
+            entropy = (entropy*active_masks).sum()/active_masks.sum()
+        else:
+            entropy = entropy.mean()
+
+        return values, action_log_probs, entropy
+
+    def act(self, cent_obs, obs, rnn_states_actor, masks, available_actions=None, deterministic=True):
+        """
+        Compute actions using the given inputs.
+        :param obs (np.ndarray): local agent inputs to the actor.
+        :param rnn_states_actor: (np.ndarray) if actor is RNN, RNN states for actor.
+        :param masks: (np.ndarray) denotes points at which RNN states should be reset.
+        :param available_actions: (np.ndarray) denotes which actions are available to agent
+                                  (if None, all actions available)
+        :param deterministic: (bool) whether the action should be mode of distribution or should be sampled.
+        """
+
+        # this function is just a wrapper for compatibility
+        rnn_states_critic = np.zeros_like(rnn_states_actor)
+        _, actions, _, rnn_states_actor, _ = self.get_actions(cent_obs,
+                                                              obs,
+                                                              rnn_states_actor,
+                                                              rnn_states_critic,
+                                                              masks,
+                                                              available_actions,
+                                                              deterministic)
+
+        return actions, rnn_states_actor
+
+    def save(self, save_dir, episode):
+        torch.save(self.transformer.state_dict(), str(save_dir) + "/transformer_" + str(episode) + ".pt")
+
+    def restore(self, model_dir):
+        transformer_state_dict = torch.load(model_dir)
+        self.transformer.load_state_dict(transformer_state_dict)
+        # self.transformer.reset_std()
+
+    def train(self):
+        self.transformer.train()
+
+    def eval(self):
+        self.transformer.eval()
+
diff --git a/controllers/mat_rapid/algorithms/mat/mat_trainer.py b/controllers/mat_rapid/algorithms/mat/mat_trainer.py
new file mode 100644
index 0000000..a6979e4
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/mat/mat_trainer.py
@@ -0,0 +1,205 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from utils.util import get_gard_norm, huber_loss, mse_loss
+from utils.valuenorm import ValueNorm
+from algorithms.utils.util import check
+
+
+class MATTrainer:
+    """
+    Trainer class for MAT to update policies.
+    :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information.
+    :param policy: (R_MAPPO_Policy) policy to update.
+    :param device: (torch.device) specifies the device to run on (cpu/gpu).
+    """
+    def __init__(self,
+                 args,
+                 policy,
+                 num_agents,
+                 device=torch.device("cpu")):
+
+        self.device = device
+        self.tpdv = dict(dtype=torch.float32, device=device)
+        self.policy = policy
+        self.num_agents = num_agents
+
+        self.clip_param = args.clip_param
+        self.ppo_epoch = args.ppo_epoch
+        self.num_mini_batch = args.num_mini_batch
+        self.data_chunk_length = args.data_chunk_length
+        self.value_loss_coef = args.value_loss_coef
+        self.entropy_coef = args.entropy_coef
+        self.max_grad_norm = args.max_grad_norm       
+        self.huber_delta = args.huber_delta
+
+        self._use_recurrent_policy = args.use_recurrent_policy
+        self._use_naive_recurrent = args.use_naive_recurrent_policy
+        self._use_max_grad_norm = args.use_max_grad_norm
+        self._use_clipped_value_loss = args.use_clipped_value_loss
+        self._use_huber_loss = args.use_huber_loss
+        self._use_valuenorm = args.use_valuenorm
+        self._use_value_active_masks = args.use_value_active_masks
+        self._use_policy_active_masks = args.use_policy_active_masks
+        self.dec_actor = args.dec_actor
+        
+        if self._use_valuenorm:
+            self.value_normalizer = ValueNorm(1, device=self.device)
+        else:
+            self.value_normalizer = None
+
+    def cal_value_loss(self, values, value_preds_batch, return_batch, active_masks_batch):
+        """
+        Calculate value function loss.
+        :param values: (torch.Tensor) value function predictions.
+        :param value_preds_batch: (torch.Tensor) "old" value  predictions from data batch (used for value clip loss)
+        :param return_batch: (torch.Tensor) reward to go returns.
+        :param active_masks_batch: (torch.Tensor) denotes if agent is active or dead at a given timesep.
+
+        :return value_loss: (torch.Tensor) value function loss.
+        """
+
+        value_pred_clipped = value_preds_batch + (values - value_preds_batch).clamp(-self.clip_param,
+                                                                                    self.clip_param)
+
+        if self._use_valuenorm:
+            self.value_normalizer.update(return_batch)
+            error_clipped = self.value_normalizer.normalize(return_batch) - value_pred_clipped
+            error_original = self.value_normalizer.normalize(return_batch) - values
+        else:
+            error_clipped = return_batch - value_pred_clipped
+            error_original = return_batch - values
+
+        if self._use_huber_loss:
+            value_loss_clipped = huber_loss(error_clipped, self.huber_delta)
+            value_loss_original = huber_loss(error_original, self.huber_delta)
+        else:
+            value_loss_clipped = mse_loss(error_clipped)
+            value_loss_original = mse_loss(error_original)
+
+        if self._use_clipped_value_loss:
+            value_loss = torch.max(value_loss_original, value_loss_clipped)
+        else:
+            value_loss = value_loss_original
+
+        # if self._use_value_active_masks and not self.dec_actor:
+        if self._use_value_active_masks:
+            value_loss = (value_loss * active_masks_batch).sum() / active_masks_batch.sum()
+        else:
+            value_loss = value_loss.mean()
+
+        return value_loss
+
+    def ppo_update(self, sample):
+        """
+        Update actor and critic networks.
+        :param sample: (Tuple) contains data batch with which to update networks.
+        :update_actor: (bool) whether to update actor network.
+
+        :return value_loss: (torch.Tensor) value function loss.
+        :return critic_grad_norm: (torch.Tensor) gradient norm from critic up9date.
+        ;return policy_loss: (torch.Tensor) actor(policy) loss value.
+        :return dist_entropy: (torch.Tensor) action entropies.
+        :return actor_grad_norm: (torch.Tensor) gradient norm from actor update.
+        :return imp_weights: (torch.Tensor) importance sampling weights.
+        """
+        share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, \
+        value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, \
+        adv_targ, available_actions_batch = sample
+
+        old_action_log_probs_batch = check(old_action_log_probs_batch).to(**self.tpdv)
+        adv_targ = check(adv_targ).to(**self.tpdv)
+        value_preds_batch = check(value_preds_batch).to(**self.tpdv)
+        return_batch = check(return_batch).to(**self.tpdv)
+        active_masks_batch = check(active_masks_batch).to(**self.tpdv)
+
+        # Reshape to do in a single forward pass for all steps
+        values, action_log_probs, dist_entropy = self.policy.evaluate_actions(share_obs_batch,
+                                                                              obs_batch, 
+                                                                              rnn_states_batch, 
+                                                                              rnn_states_critic_batch, 
+                                                                              actions_batch, 
+                                                                              masks_batch, 
+                                                                              available_actions_batch,
+                                                                              active_masks_batch)
+        # actor update
+        imp_weights = torch.exp(action_log_probs - old_action_log_probs_batch)
+
+        surr1 = imp_weights * adv_targ
+        surr2 = torch.clamp(imp_weights, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv_targ
+
+        if self._use_policy_active_masks:
+            policy_loss = (-torch.sum(torch.min(surr1, surr2),
+                                      dim=-1,
+                                      keepdim=True) * active_masks_batch).sum() / active_masks_batch.sum()
+        else:
+            policy_loss = -torch.sum(torch.min(surr1, surr2), dim=-1, keepdim=True).mean()
+
+        # critic update
+        value_loss = self.cal_value_loss(values, value_preds_batch, return_batch, active_masks_batch)
+
+        loss = policy_loss - dist_entropy * self.entropy_coef + value_loss * self.value_loss_coef
+
+        self.policy.optimizer.zero_grad()
+        loss.backward()
+
+        if self._use_max_grad_norm:
+            grad_norm = nn.utils.clip_grad_norm_(self.policy.transformer.parameters(), self.max_grad_norm)
+        else:
+            grad_norm = get_gard_norm(self.policy.transformer.parameters())
+
+        self.policy.optimizer.step()
+
+        return value_loss, grad_norm, policy_loss, dist_entropy, grad_norm, imp_weights
+
+    def train(self, buffer):
+        """
+        Perform a training update using minibatch GD.
+        :param buffer: (SharedReplayBuffer) buffer containing training data.
+        :param update_actor: (bool) whether to update actor network.
+
+        :return train_info: (dict) contains information regarding training update (e.g. loss, grad norms, etc).
+        """
+        advantages_copy = buffer.advantages.copy()
+        advantages_copy[buffer.active_masks[:-1] == 0.0] = np.nan
+        mean_advantages = np.nanmean(advantages_copy)
+        std_advantages = np.nanstd(advantages_copy)
+        advantages = (buffer.advantages - mean_advantages) / (std_advantages + 1e-5)
+        
+
+        train_info = {}
+
+        train_info['value_loss'] = 0
+        train_info['policy_loss'] = 0
+        train_info['dist_entropy'] = 0
+        train_info['actor_grad_norm'] = 0
+        train_info['critic_grad_norm'] = 0
+        train_info['ratio'] = 0
+
+        for _ in range(self.ppo_epoch):
+            data_generator = buffer.feed_forward_generator_transformer(advantages, self.num_mini_batch)
+
+            for sample in data_generator:
+
+                value_loss, critic_grad_norm, policy_loss, dist_entropy, actor_grad_norm, imp_weights \
+                    = self.ppo_update(sample)
+
+                train_info['value_loss'] += value_loss.item()
+                train_info['policy_loss'] += policy_loss.item()
+                train_info['dist_entropy'] += dist_entropy.item()
+                train_info['actor_grad_norm'] += actor_grad_norm
+                train_info['critic_grad_norm'] += critic_grad_norm
+                train_info['ratio'] += imp_weights.mean()
+
+        num_updates = self.ppo_epoch * self.num_mini_batch
+
+        for k in train_info.keys():
+            train_info[k] /= num_updates
+ 
+        return train_info
+
+    def prep_training(self):
+        self.policy.train()
+
+    def prep_rollout(self):
+        self.policy.eval()
diff --git a/controllers/mat_rapid/algorithms/utils/transformer_act.py b/controllers/mat_rapid/algorithms/utils/transformer_act.py
new file mode 100644
index 0000000..9364b69
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/utils/transformer_act.py
@@ -0,0 +1,85 @@
+import torch
+from torch.distributions import Categorical, Normal
+from torch.nn import functional as F
+
+
+def discrete_autoregreesive_act(decoder, obs_rep, obs, batch_size, n_agent, action_dim, tpdv,
+                                available_actions=None, deterministic=False):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim + 1)).to(**tpdv)
+    shifted_action[:, 0, 0] = 1
+    output_action = torch.zeros((batch_size, n_agent, 1), dtype=torch.long)
+    output_action_log = torch.zeros_like(output_action, dtype=torch.float32)
+
+    for i in range(n_agent):
+        logit = decoder(shifted_action, obs_rep, obs)[:, i, :]
+        if available_actions is not None:
+            logit[available_actions[:, i, :] == 0] = -1e10
+
+        distri = Categorical(logits=logit)
+        action = distri.probs.argmax(dim=-1) if deterministic else distri.sample()
+        action_log = distri.log_prob(action)
+
+        output_action[:, i, :] = action.unsqueeze(-1)
+        output_action_log[:, i, :] = action_log.unsqueeze(-1)
+        if i + 1 < n_agent:
+            shifted_action[:, i + 1, 1:] = F.one_hot(action, num_classes=action_dim)
+    return output_action, output_action_log
+
+
+def discrete_parallel_act(decoder, obs_rep, obs, action, batch_size, n_agent, action_dim, tpdv,
+                          available_actions=None):
+    one_hot_action = F.one_hot(action.squeeze(-1), num_classes=action_dim)  # (batch, n_agent, action_dim)
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim + 1)).to(**tpdv)
+    shifted_action[:, 0, 0] = 1
+    shifted_action[:, 1:, 1:] = one_hot_action[:, :-1, :]
+    logit = decoder(shifted_action, obs_rep, obs)
+    if available_actions is not None:
+        logit[available_actions == 0] = -1e10
+
+    distri = Categorical(logits=logit)
+    action_log = distri.log_prob(action.squeeze(-1)).unsqueeze(-1)
+    entropy = distri.entropy().unsqueeze(-1)
+    return action_log, entropy
+
+
+def continuous_autoregreesive_act(decoder, obs_rep, obs, batch_size, n_agent, action_dim, tpdv,
+                                  deterministic=False):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim)).to(**tpdv)
+    output_action = torch.zeros((batch_size, n_agent, action_dim), dtype=torch.float32)
+    output_action_log = torch.zeros_like(output_action, dtype=torch.float32)
+
+    for i in range(n_agent):
+        act_mean = decoder(shifted_action, obs_rep, obs)[:, i, :]
+        action_std = torch.sigmoid(decoder.log_std) * 0.5
+
+        # log_std = torch.zeros_like(act_mean).to(**tpdv) + decoder.log_std
+        # distri = Normal(act_mean, log_std.exp())
+        distri = Normal(act_mean, action_std)
+        action = act_mean if deterministic else distri.sample()
+        action_log = distri.log_prob(action)
+
+        output_action[:, i, :] = action
+        output_action_log[:, i, :] = action_log
+        if i + 1 < n_agent:
+            shifted_action[:, i + 1, :] = action
+
+        # print("act_mean: ", act_mean)
+        # print("action: ", action)
+
+    return output_action, output_action_log
+
+
+def continuous_parallel_act(decoder, obs_rep, obs, action, batch_size, n_agent, action_dim, tpdv):
+    shifted_action = torch.zeros((batch_size, n_agent, action_dim)).to(**tpdv)
+    shifted_action[:, 1:, :] = action[:, :-1, :]
+
+    act_mean = decoder(shifted_action, obs_rep, obs)
+    action_std = torch.sigmoid(decoder.log_std) * 0.5
+    distri = Normal(act_mean, action_std)
+
+    # log_std = torch.zeros_like(act_mean).to(**tpdv) + decoder.log_std
+    # distri = Normal(act_mean, log_std.exp())
+
+    action_log = distri.log_prob(action)
+    entropy = distri.entropy()
+    return action_log, entropy
diff --git a/controllers/mat_rapid/algorithms/utils/util.py b/controllers/mat_rapid/algorithms/utils/util.py
new file mode 100644
index 0000000..de231cf
--- /dev/null
+++ b/controllers/mat_rapid/algorithms/utils/util.py
@@ -0,0 +1,18 @@
+import copy
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+def init(module, weight_init, bias_init, gain=1):
+    weight_init(module.weight.data, gain=gain)
+    if module.bias is not None:
+        bias_init(module.bias.data)
+    return module
+
+def get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+def check(input):
+    output = torch.from_numpy(input) if type(input) == np.ndarray else input
+    return output
diff --git a/controllers/mat_rapid/config.py b/controllers/mat_rapid/config.py
new file mode 100644
index 0000000..6c77953
--- /dev/null
+++ b/controllers/mat_rapid/config.py
@@ -0,0 +1,300 @@
+import argparse
+
+
+def get_config():
+    """
+    The configuration parser for hyper-parameters of all environment.
+    Please reach each `scripts/train/<env>_runner.py` file to find private hyper-parameters
+    only used in <env>.
+
+    Prepare parameters:
+        --algorithm_name <algorithm_name>
+            specifiy the algorithm, including `["mat", "mat_dec"]`
+        --experiment_name <str>
+            an identifier to distinguish different experiment.
+        --seed <int>
+            set seed for numpy and torch 
+        --cuda
+            by default True, will use GPU to train; or else will use CPU; 
+        --cuda_deterministic
+            by default, make sure random seed effective. if set, bypass such function.
+        --n_training_threads <int>
+            number of training threads working in parallel. by default 1
+        --n_rollout_threads <int>
+            number of parallel envs for training rollout. by default 32
+        --n_eval_rollout_threads <int>
+            number of parallel envs for evaluating rollout. by default 1
+        --n_render_rollout_threads <int>
+            number of parallel envs for rendering, could only be set as 1 for some environments.
+        --num_env_steps <int>
+            number of env steps to train (default: 10e6)
+        --user_name <str>
+            [for wandb usage], to specify user's name for simply collecting training data.
+        --use_wandb
+            [for wandb usage], by default True, will log date to wandb server. or else will use tensorboard to log data.
+    
+    Env parameters:
+        --env_name <str>
+            specify the name of environment
+        --use_obs_instead_of_state
+            [only for some env] by default False, will use global state; or else will use concatenated local obs.
+    
+    Replay Buffer parameters:
+        --episode_length <int>
+            the max length of episode in the buffer. 
+    
+    Network parameters:
+        --share_policy
+            by default True, all agents will share the same network; set to make training agents use different policies. 
+        --use_centralized_V
+            by default True, use centralized training mode; or else will decentralized training mode.
+        --stacked_frames <int>
+            Number of input frames which should be stack together.
+        --hidden_size <int>
+            Dimension of hidden layers for actor/critic networks
+        --layer_N <int>
+            Number of layers for actor/critic networks
+        --use_ReLU
+            by default True, will use ReLU. or else will use Tanh.
+        --use_popart
+            by default True, use PopArt to normalize rewards. 
+        --use_valuenorm
+            by default True, use running mean and std to normalize rewards. 
+        --use_feature_normalization
+            by default True, apply layernorm to normalize inputs. 
+        --use_orthogonal
+            by default True, use Orthogonal initialization for weights and 0 initialization for biases. or else, will use xavier uniform inilialization.
+        --gain
+            by default 0.01, use the gain # of last action layer
+        --use_naive_recurrent_policy
+            by default False, use the whole trajectory to calculate hidden states.
+        --use_recurrent_policy
+            by default, use Recurrent Policy. If set, do not use.
+        --recurrent_N <int>
+            The number of recurrent layers ( default 1).
+        --data_chunk_length <int>
+            Time length of chunks used to train a recurrent_policy, default 10.
+    
+    Optimizer parameters:
+        --lr <float>
+            learning rate parameter,  (default: 5e-4, fixed).
+        --critic_lr <float>
+            learning rate of critic  (default: 5e-4, fixed)
+        --opti_eps <float>
+            RMSprop optimizer epsilon (default: 1e-5)
+        --weight_decay <float>
+            coefficience of weight decay (default: 0)
+    
+    PPO parameters:
+        --ppo_epoch <int>
+            number of ppo epochs (default: 15)
+        --use_clipped_value_loss 
+            by default, clip loss value. If set, do not clip loss value.
+        --clip_param <float>
+            ppo clip parameter (default: 0.2)
+        --num_mini_batch <int>
+            number of batches for ppo (default: 1)
+        --entropy_coef <float>
+            entropy term coefficient (default: 0.01)
+        --use_max_grad_norm 
+            by default, use max norm of gradients. If set, do not use.
+        --max_grad_norm <float>
+            max norm of gradients (default: 0.5)
+        --use_gae
+            by default, use generalized advantage estimation. If set, do not use gae.
+        --gamma <float>
+            discount factor for rewards (default: 0.99)
+        --gae_lambda <float>
+            gae lambda parameter (default: 0.95)
+        --use_proper_time_limits
+            by default, the return value does consider limits of time. If set, compute returns with considering time limits factor.
+        --use_huber_loss
+            by default, use huber loss. If set, do not use huber loss.
+        --use_value_active_masks
+            by default True, whether to mask useless data in value loss.  
+        --huber_delta <float>
+            coefficient of huber loss.  
+    
+    PPG parameters:
+        --aux_epoch <int>
+            number of auxiliary epochs. (default: 4)
+        --clone_coef <float>
+            clone term coefficient (default: 0.01)
+    
+    Run parameters：
+        --use_linear_lr_decay
+            by default, do not apply linear decay to learning rate. If set, use a linear schedule on the learning rate
+    
+    Save & Log parameters:
+        --save_interval <int>
+            time duration between contiunous twice models saving.
+        --log_interval <int>
+            time duration between contiunous twice log printing.
+    
+    Eval parameters:
+        --use_eval
+            by default, do not start evaluation. If set`, start evaluation alongside with training.
+        --eval_interval <int>
+            time duration between contiunous twice evaluation progress.
+        --eval_episodes <int>
+            number of episodes of a single evaluation.
+    
+    Render parameters:
+        --save_gifs
+            by default, do not save render video. If set, save video.
+        --use_render
+            by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam.
+        --render_episodes <int>
+            the number of episodes to render a given env
+        --ifi <float>
+            the play interval of each rendered image in saved video.
+    
+    Pretrained parameters:
+        --model_dir <str>
+            by default None. set the path to pretrained model.
+    """
+    parser = argparse.ArgumentParser(
+        description='onpolicy', formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    # prepare parameters
+    parser.add_argument("--algorithm_name", type=str,
+                        default='mat', choices=["mat", "mat_dec", "mat_encoder", "mat_decoder", "mat_gru"])
+
+    parser.add_argument("--experiment_name", type=str, default="check", help="an identifier to distinguish different experiment.")
+    parser.add_argument("--seed", type=int, default=1, help="Random seed for numpy/torch")
+    parser.add_argument("--cuda", action='store_false', default=True, help="by default True, will use GPU to train; or else will use CPU;")
+    parser.add_argument("--cuda_deterministic",
+                        action='store_false', default=True, help="by default, make sure random seed effective. if set, bypass such function.")
+    parser.add_argument("--n_training_threads", type=int,
+                        default=1, help="Number of torch threads for training")
+    parser.add_argument("--n_rollout_threads", type=int, default=32,
+                        help="Number of parallel envs for training rollouts")
+    parser.add_argument("--n_eval_rollout_threads", type=int, default=1,
+                        help="Number of parallel envs for evaluating rollouts")
+    parser.add_argument("--n_render_rollout_threads", type=int, default=1,
+                        help="Number of parallel envs for rendering rollouts")
+    parser.add_argument("--num_env_steps", type=int, default=10e6,
+                        help='Number of environment steps to train (default: 10e6)')
+    parser.add_argument("--user_name", type=str, default='xxx',help="[for wandb usage], to specify user's name for simply collecting training data.")
+    parser.add_argument("--use_wandb", action='store_false', default=False, help="[for wandb usage], by default True, will log date to wandb server. or else will use tensorboard to log data.")
+
+    # env parameters
+    parser.add_argument("--env_name", type=str, default='StarCraft2', help="specify the name of environment")
+    parser.add_argument("--use_obs_instead_of_state", action='store_true',
+                        default=False, help="Whether to use global state or concatenated obs")
+
+    # replay buffer parameters
+    parser.add_argument("--episode_length", type=int,
+                        default=200, help="Max length for any episode")
+
+    # network parameters
+    parser.add_argument("--share_policy", action='store_false',
+                        default=True, help='Whether agent share the same policy')
+    parser.add_argument("--use_centralized_V", action='store_false',
+                        default=True, help="Whether to use centralized V function")
+    parser.add_argument("--stacked_frames", type=int, default=1,
+                        help="Dimension of hidden layers for actor/critic networks")
+    parser.add_argument("--use_stacked_frames", action='store_true',
+                        default=False, help="Whether to use stacked_frames")
+    parser.add_argument("--hidden_size", type=int, default=64,
+                        help="Dimension of hidden layers for actor/critic networks") 
+    parser.add_argument("--layer_N", type=int, default=2,
+                        help="Number of layers for actor/critic networks")
+    parser.add_argument("--use_ReLU", action='store_false',
+                        default=True, help="Whether to use ReLU")
+    parser.add_argument("--use_popart", action='store_true', default=False, help="by default False, use PopArt to normalize rewards.")
+    parser.add_argument("--use_valuenorm", action='store_false', default=True, help="by default True, use running mean and std to normalize rewards.")
+    parser.add_argument("--use_feature_normalization", action='store_false',
+                        default=True, help="Whether to apply layernorm to the inputs")
+    parser.add_argument("--use_orthogonal", action='store_false', default=True,
+                        help="Whether to use Orthogonal initialization for weights and 0 initialization for biases")
+    parser.add_argument("--gain", type=float, default=0.01,
+                        help="The gain # of last action layer")
+
+    # recurrent parameters
+    parser.add_argument("--use_naive_recurrent_policy", action='store_true',
+                        default=False, help='Whether to use a naive recurrent policy')
+    parser.add_argument("--use_recurrent_policy", action='store_true',
+                        default=False, help='use a recurrent policy')
+    parser.add_argument("--recurrent_N", type=int, default=1, help="The number of recurrent layers.")
+    parser.add_argument("--data_chunk_length", type=int, default=10,
+                        help="Time length of chunks used to train a recurrent_policy")
+
+    # optimizer parameters
+    parser.add_argument("--lr", type=float, default=5e-4,
+                        help='learning rate (default: 5e-4)')
+    parser.add_argument("--critic_lr", type=float, default=5e-4,
+                        help='critic learning rate (default: 5e-4)')
+    parser.add_argument("--opti_eps", type=float, default=1e-5,
+                        help='RMSprop optimizer epsilon (default: 1e-5)')
+    parser.add_argument("--weight_decay", type=float, default=0)
+
+    # ppo parameters
+    parser.add_argument("--ppo_epoch", type=int, default=15,
+                        help='number of ppo epochs (default: 15)')
+    parser.add_argument("--use_clipped_value_loss",
+                        action='store_false', default=True, help="by default, clip loss value. If set, do not clip loss value.")
+    parser.add_argument("--clip_param", type=float, default=0.2,
+                        help='ppo clip parameter (default: 0.2)')
+    parser.add_argument("--num_mini_batch", type=int, default=1,
+                        help='number of batches for ppo (default: 1)')
+    parser.add_argument("--entropy_coef", type=float, default=0.01,
+                        help='entropy term coefficient (default: 0.01)')
+    parser.add_argument("--value_loss_coef", type=float,
+                        default=1, help='value loss coefficient (default: 0.5)')
+    parser.add_argument("--use_max_grad_norm",
+                        action='store_false', default=True, help="by default, use max norm of gradients. If set, do not use.")
+    parser.add_argument("--max_grad_norm", type=float, default=10.0,
+                        help='max norm of gradients (default: 0.5)')
+    parser.add_argument("--use_gae", action='store_false',
+                        default=True, help='use generalized advantage estimation')
+    parser.add_argument("--gamma", type=float, default=0.99,
+                        help='discount factor for rewards (default: 0.99)')
+    parser.add_argument("--gae_lambda", type=float, default=0.95,
+                        help='gae lambda parameter (default: 0.95)')
+    parser.add_argument("--use_proper_time_limits", action='store_true',
+                        default=False, help='compute returns taking into account time limits')
+    parser.add_argument("--use_huber_loss", action='store_false', default=True, help="by default, use huber loss. If set, do not use huber loss.")
+    parser.add_argument("--use_value_active_masks",
+                        action='store_false', default=True, help="by default True, whether to mask useless data in value loss.")
+    parser.add_argument("--use_policy_active_masks",
+                        action='store_false', default=True, help="by default True, whether to mask useless data in policy loss.")
+    parser.add_argument("--huber_delta", type=float, default=10.0, help=" coefficience of huber loss.")
+
+    # run parameters
+    parser.add_argument("--use_linear_lr_decay", action='store_true',
+                        default=False, help='use a linear schedule on the learning rate')
+    # save parameters
+    parser.add_argument("--save_interval", type=int, default=100, help="time duration between contiunous twice models saving.")
+
+    # log parameters
+    parser.add_argument("--log_interval", type=int, default=5, help="time duration between contiunous twice log printing.")
+
+    # eval parameters
+    parser.add_argument("--use_eval", action='store_true', default=False, help="by default, do not start evaluation. If set`, start evaluation alongside with training.")
+    parser.add_argument("--eval_interval", type=int, default=25, help="time duration between contiunous twice evaluation progress.")
+    parser.add_argument("--eval_episodes", type=int, default=32, help="number of episodes of a single evaluation.")
+
+    # render parameters
+    parser.add_argument("--save_gifs", action='store_true', default=False, help="by default, do not save render video. If set, save video.")
+    parser.add_argument("--use_render", action='store_true', default=False, help="by default, do not render the env during training. If set, start render. Note: something, the environment has internal render process which is not controlled by this hyperparam.")
+    parser.add_argument("--render_episodes", type=int, default=5, help="the number of episodes to render a given env")
+    parser.add_argument("--ifi", type=float, default=0.1, help="the play interval of each rendered image in saved video.")
+
+    # pretrained parameters
+    parser.add_argument("--model_dir", type=str, default=None, help="by default None. set the path to pretrained model.")
+
+
+    # add for transformer
+    parser.add_argument("--encode_state", action='store_true', default=False)
+    parser.add_argument("--n_block", type=int, default=1)
+    parser.add_argument("--n_embd", type=int, default=64)
+    parser.add_argument("--n_head", type=int, default=1)
+    parser.add_argument("--dec_actor", action='store_true', default=False)
+    parser.add_argument("--share_actor", action='store_true', default=False)
+
+    # add for online multi-task
+    parser.add_argument("--train_maps", type=str, nargs='+', default=None)
+    parser.add_argument("--eval_maps", type=str, nargs='+', default=None)
+
+    return parser
diff --git a/controllers/mat_rapid/mat_rapid.py b/controllers/mat_rapid/mat_rapid.py
new file mode 100644
index 0000000..42ec25e
--- /dev/null
+++ b/controllers/mat_rapid/mat_rapid.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+import sys
+import os
+import wandb
+import socket
+import setproctitle
+import numpy as np
+from pathlib import Path
+import torch
+#sys.path.append("../../")
+from config import get_config
+#from mat.envs.mpe.MPE_env import MPEEnv
+from soccer.soccer_env import SoccerEnv
+from runner.soccer_runner import SoccerRunner as Runner
+from soccer.env_wrappers import SubprocVecEnv, DummyVecEnv
+
+"""Train script for MPEs."""
+
+def make_train_env(all_args):
+    def get_env_fn(rank):
+        def init_env():
+            if all_args.env_name == "soccer":
+                #env_args = {"scenario": all_args.scenario,
+                #            "n_agent": all_args.n_agent}
+                #env = FootballEnv(env_args=env_args)
+                
+                env_args = {"scenario": all_args.scenario_name,
+                            "episode_length": all_args.episode_length}
+                env = SoccerEnv(env_args=env_args)
+            else:
+                print("Can not support the " +
+                      all_args.env_name + "environment.")
+                raise NotImplementedError
+            env.seed(all_args.seed + rank * 1000)
+            return env
+        return init_env
+    if all_args.n_rollout_threads == 1:
+        return DummyVecEnv([get_env_fn(0)])
+    else:
+        return SubprocVecEnv([get_env_fn(i) for i in range(all_args.n_rollout_threads)])
+
+
+def make_eval_env(all_args):
+    def get_env_fn(rank):
+        def init_env():
+            if all_args.env_name == "soccer":
+                env = SoccerEnv()
+            else:
+                print("Can not support the " +
+                      all_args.env_name + "environment.")
+                raise NotImplementedError
+            env.seed(all_args.seed * 50000 + rank * 10000)
+            return env
+        return init_env
+    if all_args.n_eval_rollout_threads == 1:
+        return DummyVecEnv([get_env_fn(0)])
+    else:
+        return SubprocVecEnv([get_env_fn(i) for i in range(all_args.n_eval_rollout_threads)])
+
+
+def parse_args(args, parser):
+    parser.add_argument('--scenario_name', type=str,
+                        default='soccer', help="Which scenario to run on")
+    #parser.add_argument("--num_landmarks", type=int, default=3)
+    parser.add_argument('--num_agents', type=int,
+                        default=3, help="number of players")
+
+    all_args = parser.parse_known_args(args)[0]
+
+    return all_args
+
+
+def main(args):
+    parser = get_config()
+    all_args = parse_args(args, parser)
+
+    if all_args.algorithm_name == "rmappo":
+        all_args.use_recurrent_policy = True
+        assert (all_args.use_recurrent_policy or all_args.use_naive_recurrent_policy), ("check recurrent policy!")
+    elif all_args.algorithm_name == "mappo" or all_args.algorithm_name == "mat" or all_args.algorithm_name == "mat_dec":
+        assert (all_args.use_recurrent_policy == False and all_args.use_naive_recurrent_policy == False), (
+            "check recurrent policy!")
+    else:
+        raise NotImplementedError
+
+    if all_args.algorithm_name == "mat_dec":
+        all_args.dec_actor = True
+        all_args.share_actor = True
+
+    # cuda
+    if all_args.cuda and torch.cuda.is_available():
+        print("choose to use gpu...")
+        device = torch.device("cuda:0")
+        torch.set_num_threads(all_args.n_training_threads)
+        if all_args.cuda_deterministic:
+            torch.backends.cudnn.benchmark = False
+            torch.backends.cudnn.deterministic = True
+    else:
+        print("choose to use cpu...")
+        device = torch.device("cpu")
+        torch.set_num_threads(all_args.n_training_threads)
+
+    # run dir
+    print( Path(os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] + "/mat_rapid" +"/results") / all_args.env_name / all_args.scenario_name / all_args.algorithm_name / all_args.experiment_name)
+    #run_dir = Path(os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] + "/results") / all_args.env_name / all_args.scenario_name / all_args.algorithm_name / all_args.experiment_name
+    run_dir = Path(os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] + "/mat_rapid" + "/results") / all_args.env_name / all_args.scenario_name / all_args.algorithm_name / all_args.experiment_name
+    if not run_dir.exists():
+        os.makedirs(str(run_dir))
+
+    # wandb
+    if all_args.use_wandb:
+        run = wandb.init(config=all_args,
+                         project=all_args.env_name,
+                         entity=all_args.user_name,
+                         notes=socket.gethostname(),
+                         name=str(all_args.algorithm_name) + "_" +
+                         str(all_args.experiment_name) +
+                         "_seed" + str(all_args.seed),
+                         group=all_args.scenario_name,
+                         dir=str(run_dir),
+                         job_type="training",
+                         reinit=True)
+    else:
+        if not run_dir.exists():
+            curr_run = 'run1'
+        else:
+            exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in run_dir.iterdir() if str(folder.name).startswith('run')]
+            if len(exst_run_nums) == 0:
+                curr_run = 'run1'
+            else:
+                curr_run = 'run%i' % (max(exst_run_nums) + 1)
+        run_dir = run_dir / curr_run
+        if not run_dir.exists():
+            os.makedirs(str(run_dir))
+
+    setproctitle.setproctitle(str(all_args.algorithm_name) + "-" + \
+        str(all_args.env_name) + "-" + str(all_args.experiment_name) + "@" + str(all_args.user_name))
+
+    # seed
+    torch.manual_seed(all_args.seed)
+    torch.cuda.manual_seed_all(all_args.seed)
+    np.random.seed(all_args.seed)
+
+    # env init
+    envs = make_train_env(all_args)
+    eval_envs = make_eval_env(all_args) if all_args.use_eval else None
+    num_agents = all_args.num_agents
+
+    config = {
+        "all_args": all_args,
+        "envs": envs,
+        "eval_envs": eval_envs,
+        "num_agents": num_agents,
+        "device": device,
+        "run_dir": run_dir
+    }
+
+    runner = Runner(config)
+    runner.run()
+    
+    # post process
+    envs.close()
+    if all_args.use_eval and eval_envs is not envs:
+        eval_envs.close()
+
+    if all_args.use_wandb:
+        run.finish()
+    else:
+        runner.writter.export_scalars_to_json(str(runner.log_dir + '/summary.json'))
+        runner.writter.close()
+
+
+if __name__ == "__main__":
+    main(['--seed', '1', '--env_name', 'soccer', '--algorithm_name', 'mat_dec', '--experiment_name', 'single', '--scenario_name', 'soccer', '--num_agents', '3', '--lr', '5e-4', '--entropy_coef', '0.01', '--max_grad_norm', '0.5', '--n_training_threads', '16', '--n_rollout_threads', '1', '--num_mini_batch', '1', '--episode_length', '1000', '--num_env_steps', '10000000', '--ppo_epoch', '10', '--clip_param', '0.05', '--use_value_active_masks', '--use_policy_active_masks'])
+#    main(['--seed', '1', '--env_name', 'soccer', '--algorithm_name', 'mat_dec', '--experiment_name', 'single', '--scenario_name', 'soccer', '--num_agents', '3', '--lr', '5e-4', '--entropy_coef', '0.01', '--max_grad_norm', '0.5', '--n_training_threads', '16', '--n_rollout_threads', '1', '--num_mini_batch', '1', '--episode_length', '1000', '--num_env_steps', '10000000', '--ppo_epoch', '10', '--clip_param', '0.05', '--use_value_active_masks', '--use_policy_active_masks', '--model_dir', './transformer_1400.pt'])
diff --git a/controllers/mat_rapid/requirements.txt b/controllers/mat_rapid/requirements.txt
new file mode 100644
index 0000000..f4b63ed
--- /dev/null
+++ b/controllers/mat_rapid/requirements.txt
@@ -0,0 +1,93 @@
+akro==0.0.8
+asynctest==0.13.0
+backcall==0.2.0
+blinker==1.4
+brotlipy==0.7.0
+certifi==2021.10.8
+cloudpickle==2.0.0
+cycler==0.11.0
+Cython==0.29.28
+decorator==5.1.1
+deepdiff==5.8.1
+docker-pycreds==0.4.0
+docopt==0.6.2
+dowel==0.0.4
+enum34==1.1.10
+fasteners==0.17.3
+fonttools==4.29.1
+future==0.18.2
+gitdb==4.0.9
+GitPython==3.1.27
+glfw==2.5.1
+google-auth-oauthlib==0.4.1
+gym==0.12.4
+gym-notices==0.0.5
+imageio==2.16.1
+importlib-metadata==4.11.2
+ipdb==0.13.9
+ipython==7.33.0
+jedi==0.18.1
+Jinja2==3.0.3
+joblib==1.1.0
+kiwisolver==1.3.2
+MarkupSafe==2.1.0
+matplotlib==3.5.1
+matplotlib-inline==0.1.3
+mkl-fft==1.3.1
+mkl-service==2.4.0
+mock==4.0.3
+mpyq==0.2.5
+mujoco-py==2.1.2.14
+ninja==1.10.2.3
+opencv-python==4.5.5.64
+ordered-set==4.1.0
+packaging==21.3
+parso==0.8.3
+patchelf==0.14.5.0
+pathtools==0.1.2
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.0.1
+pipreqs==0.4.11
+portpicker==1.5.0
+promise==2.3
+prompt-toolkit==3.0.29
+protobuf==3.19.1
+psutil==5.9.0
+ptyprocess==0.7.0
+pyasn1-modules==0.2.8
+pygame==2.1.2
+pyglet==1.5.26
+Pygments==2.12.0
+pyparsing==3.0.7
+PySC2==3.0.0
+python-dateutil==2.8.2
+PyYAML==6.0
+requests-oauthlib==1.3.0
+s2clientprotocol==5.0.9.87702.0
+s2protocol==5.0.9.87702.0
+scikit-learn==1.0.2
+sentry-sdk==1.5.7
+setproctitle==1.2.2
+shortuuid==1.0.8
+sk-video==1.1.10
+sklearn==0.0
+smmap==5.0.0
+some-package==0.1
+tabulate==0.8.9
+tb-nightly==2.9.0a20220309
+tensorboard-plugin-wit==1.7.0
+tensorboardX==2.5
+termcolor==1.1.0
+threadpoolctl==3.1.0
+toml==0.10.2
+torch==1.10.2
+torchvision==0.11.3
+tqdm==4.64.0
+traitlets==5.1.1
+wandb==0.12.11
+wcwidth==0.2.5
+websocket-client==1.3.2
+whichcraft==0.6.1
+yarg==0.1.9
+yaspin==2.1.0
diff --git a/controllers/mat_rapid/runner/base_runner.py b/controllers/mat_rapid/runner/base_runner.py
new file mode 100644
index 0000000..d867f19
--- /dev/null
+++ b/controllers/mat_rapid/runner/base_runner.py
@@ -0,0 +1,171 @@
+import wandb
+import os
+import numpy as np
+import torch
+from tensorboardX import SummaryWriter
+from utils.shared_buffer import SharedReplayBuffer
+from algorithms.mat.mat_trainer import MATTrainer as TrainAlgo
+from algorithms.mat.algorithm.transformer_policy import TransformerPolicy as Policy
+
+def _t2n(x):
+    """Convert torch tensor to a numpy array."""
+    return x.detach().cpu().numpy()
+
+class Runner(object):
+    """
+    Base class for training recurrent policies.
+    :param config: (dict) Config dictionary containing parameters for training.
+    """
+    def __init__(self, config):
+
+        self.all_args = config['all_args']
+        self.envs = config['envs']
+        self.eval_envs = config['eval_envs']
+        self.device = config['device']
+        self.num_agents = config['num_agents']
+        if config.__contains__("render_envs"):
+            self.render_envs = config['render_envs']       
+
+        # parameters
+        self.env_name = self.all_args.env_name
+        self.algorithm_name = self.all_args.algorithm_name
+        self.experiment_name = self.all_args.experiment_name
+        self.use_centralized_V = self.all_args.use_centralized_V
+        self.use_obs_instead_of_state = self.all_args.use_obs_instead_of_state
+        self.num_env_steps = self.all_args.num_env_steps
+        self.episode_length = self.all_args.episode_length
+        self.n_rollout_threads = self.all_args.n_rollout_threads
+        self.n_eval_rollout_threads = self.all_args.n_eval_rollout_threads
+        self.n_render_rollout_threads = self.all_args.n_render_rollout_threads
+        self.use_linear_lr_decay = self.all_args.use_linear_lr_decay
+        self.hidden_size = self.all_args.hidden_size
+        self.use_wandb = self.all_args.use_wandb
+        self.use_render = self.all_args.use_render
+        self.recurrent_N = self.all_args.recurrent_N
+
+        # interval
+        self.save_interval = self.all_args.save_interval
+        self.use_eval = self.all_args.use_eval
+        self.eval_interval = self.all_args.eval_interval
+        self.log_interval = self.all_args.log_interval
+
+        # dir
+        self.model_dir = self.all_args.model_dir
+
+        if self.use_wandb:
+            self.save_dir = str(wandb.run.dir)
+            self.run_dir = str(wandb.run.dir)
+        else:
+            self.run_dir = config["run_dir"]
+            self.log_dir = str(self.run_dir / 'logs')
+            if not os.path.exists(self.log_dir):
+                os.makedirs(self.log_dir)
+            self.writter = SummaryWriter(self.log_dir)
+            self.save_dir = str(self.run_dir / 'models')
+            if not os.path.exists(self.save_dir):
+                os.makedirs(self.save_dir)
+
+        share_observation_space = self.envs.share_observation_space[0] if self.use_centralized_V else self.envs.observation_space[0]
+
+        print("obs_space: ", self.envs.observation_space)
+        print("share_obs_space: ", self.envs.share_observation_space)
+        print("act_space: ", self.envs.action_space)
+
+        # policy network
+        self.policy = Policy(self.all_args,
+                             self.envs.observation_space[0],
+                             share_observation_space,
+                             self.envs.action_space[0],
+                             self.num_agents,
+                             device=self.device)
+
+        if self.model_dir is not None:
+            self.restore(self.model_dir)
+
+        # algorithm
+        self.trainer = TrainAlgo(self.all_args, self.policy, self.num_agents, device=self.device)
+        
+        # buffer
+        self.buffer = SharedReplayBuffer(self.all_args,
+                                        self.num_agents,
+                                        self.envs.observation_space[0],
+                                        share_observation_space,
+                                        self.envs.action_space[0],
+                                         self.all_args.env_name)
+
+    def run(self):
+        """Collect training data, perform training updates, and evaluate policy."""
+        raise NotImplementedError
+
+    def warmup(self):
+        """Collect warmup pre-training data."""
+        raise NotImplementedError
+
+    def collect(self, step):
+        """Collect rollouts for training."""
+        raise NotImplementedError
+
+    def insert(self, data):
+        """
+        Insert data into buffer.
+        :param data: (Tuple) data to insert into training buffer.
+        """
+        raise NotImplementedError
+    
+    @torch.no_grad()
+    def compute(self):
+        """Calculate returns for the collected data."""
+        self.trainer.prep_rollout()
+        if self.buffer.available_actions is None:
+            next_values = self.trainer.policy.get_values(np.concatenate(self.buffer.share_obs[-1]),
+                                                         np.concatenate(self.buffer.obs[-1]),
+                                                         np.concatenate(self.buffer.rnn_states_critic[-1]),
+                                                         np.concatenate(self.buffer.masks[-1]))
+        else:
+            next_values = self.trainer.policy.get_values(np.concatenate(self.buffer.share_obs[-1]),
+                                                         np.concatenate(self.buffer.obs[-1]),
+                                                         np.concatenate(self.buffer.rnn_states_critic[-1]),
+                                                         np.concatenate(self.buffer.masks[-1]),
+                                                         np.concatenate(self.buffer.available_actions[-1]))
+        next_values = np.array(np.split(_t2n(next_values), self.n_rollout_threads))
+        self.buffer.compute_returns(next_values, self.trainer.value_normalizer)
+    
+    def train(self):
+        """Train policies with data in buffer. """
+        self.trainer.prep_training()
+        train_infos = self.trainer.train(self.buffer)      
+        self.buffer.after_update()
+        return train_infos
+
+    def save(self, episode):
+        """Save policy's actor and critic networks."""
+        self.policy.save(self.save_dir, episode)
+
+    def restore(self, model_dir):
+        """Restore policy's networks from a saved model."""
+        self.policy.restore(model_dir)
+ 
+    def log_train(self, train_infos, total_num_steps):
+        """
+        Log training info.
+        :param train_infos: (dict) information about training update.
+        :param total_num_steps: (int) total number of training env steps.
+        """
+        for k, v in train_infos.items():
+            if self.use_wandb:
+                wandb.log({k: v}, step=total_num_steps)
+            else:
+                self.writter.add_scalars(k, {k: v}, total_num_steps)
+
+    def log_env(self, env_infos, total_num_steps):
+        """
+        Log env info.
+        :param env_infos: (dict) information about env state.
+        :param total_num_steps: (int) total number of training env steps.
+        """
+        for k, v in env_infos.items():
+            if len(v)>0:
+                if self.use_wandb:
+                    wandb.log({k: np.mean(v)}, step=total_num_steps)
+                else:
+                    self.writter.add_scalars(k, {k: np.mean(v)}, total_num_steps)
diff --git a/controllers/mat_rapid/runner/soccer_runner.py b/controllers/mat_rapid/runner/soccer_runner.py
new file mode 100644
index 0000000..def5d50
--- /dev/null
+++ b/controllers/mat_rapid/runner/soccer_runner.py
@@ -0,0 +1,265 @@
+import time
+import wandb
+import numpy as np
+import torch
+from runner.base_runner import Runner
+
+
+def _t2n(x):
+    return x.detach().cpu().numpy()
+
+class SoccerRunner(Runner):
+    """Runner class to perform training, evaluation. and data collection for SMAC. See parent class for details."""
+    def __init__(self, config):
+        super(SoccerRunner, self).__init__(config)
+
+    def run(self):
+        self.warmup()
+
+        start = time.time()
+        episodes = int(self.num_env_steps) // self.episode_length // self.n_rollout_threads
+
+        train_episode_rewards = [0 for _ in range(self.n_rollout_threads)]
+        done_episodes_rewards = []
+
+        #train_episode_scores = [0 for _ in range(self.n_rollout_threads)]
+        #done_episodes_scores = []
+
+        train_individual_rewards = [0 for _ in range(self.num_agents)]
+        done_individual_rewards = []
+
+        for episode in range(episodes):
+            if self.use_linear_lr_decay:
+                self.trainer.policy.lr_decay(episode, episodes)
+
+            for step in range(self.episode_length):
+                # Sample actions
+                values, actions, action_log_probs, rnn_states, rnn_states_critic = self.collect(step)
+
+                # Obser reward and next obs
+
+                #obs, rewards, dones, infos, available_actions = self.envs.step(actions)
+                obs, rewards, dones, infos = self.envs.step(actions)
+
+                dones_env = np.all(dones, axis=1)
+                reward_env = np.mean(rewards, axis=1).flatten()
+                train_episode_rewards += reward_env
+
+                for agent_id in range(self.num_agents):
+                    for info in infos:
+                        if 'individual_reward' in info[agent_id].keys():
+                            train_individual_rewards[agent_id] += info[agent_id]['individual_reward']
+
+
+                #score_env = [t_info[0]["score_reward"] for t_info in infos]
+                #train_episode_scores += np.array(score_env)
+                for t in range(self.n_rollout_threads):
+                    if dones_env[t]:
+                        done_episodes_rewards.append(train_episode_rewards[t])
+                        train_episode_rewards[t] = 0
+                        #done_episodes_scores.append(train_episode_scores[t])
+                        #train_episode_scores[t] = 0
+                        done_individual_rewards.append(train_individual_rewards)
+                        train_individual_rewards = [0 for _ in range(self.num_agents)]
+
+                #data = obs, rewards, dones, infos, available_actions, \
+                data = obs, rewards, dones, infos, \
+                       values, actions, action_log_probs, \
+                       rnn_states, rnn_states_critic
+
+                # insert data into buffer
+                self.insert(data)
+
+            # compute return and update network
+            self.compute()
+            train_infos = self.train()
+
+            # post process
+            total_num_steps = (episode + 1) * self.episode_length * self.n_rollout_threads
+            # save model
+            if (episode % self.save_interval == 0 or episode == episodes - 1):
+                self.save(episode)
+
+            # log information
+            if episode % self.log_interval == 0:
+                end = time.time()
+                print("\n Scenario {} Algo {} Exp {} updates {}/{} episodes, total num timesteps {}/{}, FPS {}.\n"
+                        .format(self.all_args.scenario_name,
+                                self.algorithm_name,
+                                self.experiment_name,
+                                episode,
+                                episodes,
+                                total_num_steps,
+                                self.num_env_steps,
+                                int(total_num_steps / (end - start))))
+
+                self.log_train(train_infos, total_num_steps)
+
+                if len(done_episodes_rewards) > 0:
+                    aver_episode_rewards = np.mean(done_episodes_rewards)
+                    self.writter.add_scalars("train_episode_rewards", {"aver_rewards": aver_episode_rewards}, total_num_steps)
+                    done_episodes_rewards = []
+
+                    #aver_episode_scores = np.mean(done_episodes_scores)
+                    #self.writter.add_scalars("train_episode_scores", {"aver_scores": aver_episode_scores}, total_num_steps)
+                    #done_episodes_scores = []
+                    #print("some episodes done, average rewards: {}, scores: {}"
+                    #      .format(aver_episode_rewards, aver_episode_scores))
+                    print("some episodes done, average rewards: {}".format(aver_episode_rewards))
+
+                env_infos = {}
+                last_individual_rewards = done_individual_rewards[-1]
+                done_individual_rewards = []
+                for agent_id in range(self.num_agents):
+                    agent_k = 'agent%i/individual_rewards' % (agent_id+1)
+                    env_infos[agent_k] = [last_individual_rewards[agent_id]]
+                self.log_env(env_infos, total_num_steps)
+
+
+            # eval
+            if episode % self.eval_interval == 0 and self.use_eval:
+                self.eval(total_num_steps)
+
+    def warmup(self):
+        # reset env
+        obs = self.envs.reset()
+
+        # replay buffer
+        if self.use_centralized_V:
+            share_obs = obs.reshape(self.n_rollout_threads, -1)
+            share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents, axis=1)
+        else:
+            share_obs = obs
+
+        self.buffer.share_obs[0] = share_obs.copy()
+        self.buffer.obs[0] = obs.copy()
+
+    @torch.no_grad()
+    def collect(self, step):
+        self.trainer.prep_rollout()
+        #value, action, action_log_prob, rnn_state, rnn_state_critic \
+        #    = self.trainer.policy.get_actions(np.concatenate(self.buffer.share_obs[step]),
+        #                                      np.concatenate(self.buffer.obs[step]),
+        #                                      np.concatenate(self.buffer.rnn_states[step]),
+        #                                      np.concatenate(self.buffer.rnn_states_critic[step]),
+        #                                      np.concatenate(self.buffer.masks[step]),
+        #                                      np.concatenate(self.buffer.available_actions[step]))
+        value, action, action_log_prob, rnn_state, rnn_state_critic \
+            = self.trainer.policy.get_actions(np.concatenate(self.buffer.share_obs[step]),
+                                              np.concatenate(self.buffer.obs[step]),
+                                              np.concatenate(self.buffer.rnn_states[step]),
+                                              np.concatenate(self.buffer.rnn_states_critic[step]),
+                                              np.concatenate(self.buffer.masks[step]))
+ 
+        # [self.envs, agents, dim]
+        values = np.array(np.split(_t2n(value), self.n_rollout_threads))
+        actions = np.array(np.split(_t2n(action), self.n_rollout_threads))
+        action_log_probs = np.array(np.split(_t2n(action_log_prob), self.n_rollout_threads))
+        rnn_states = np.array(np.split(_t2n(rnn_state), self.n_rollout_threads))
+        rnn_states_critic = np.array(np.split(_t2n(rnn_state_critic), self.n_rollout_threads))
+
+        return values, actions, action_log_probs, rnn_states, rnn_states_critic
+
+    def insert(self, data):
+        #obs, rewards, dones, infos, available_actions, \
+        obs, rewards, dones, infos, \
+        values, actions, action_log_probs, rnn_states, rnn_states_critic = data
+
+        dones_env = np.all(dones, axis=1)
+
+        rnn_states[dones_env == True] = np.zeros(((dones_env == True).sum(), self.num_agents, self.recurrent_N, self.hidden_size), dtype=np.float32)
+        rnn_states_critic[dones_env == True] = np.zeros(((dones_env == True).sum(), self.num_agents, *self.buffer.rnn_states_critic.shape[3:]), dtype=np.float32)
+
+        masks = np.ones((self.n_rollout_threads, self.num_agents, 1), dtype=np.float32)
+        masks[dones_env == True] = np.zeros(((dones_env == True).sum(), self.num_agents, 1), dtype=np.float32)
+
+        active_masks = np.ones((self.n_rollout_threads, self.num_agents, 1), dtype=np.float32)
+        active_masks[dones == True] = np.zeros(((dones == True).sum(), 1), dtype=np.float32)
+        active_masks[dones_env == True] = np.ones(((dones_env == True).sum(), self.num_agents, 1), dtype=np.float32)
+
+        # bad_masks = np.array([[[0.0] if info[agent_id]['bad_transition'] else [1.0] for agent_id in range(self.num_agents)] for info in infos])
+
+        if self.use_centralized_V:
+            share_obs = obs.reshape(self.n_rollout_threads, -1)
+            share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents, axis=1)
+        else:
+            share_obs = obs
+
+        #self.buffer.insert(share_obs, obs, rnn_states, rnn_states_critic,
+        #                   actions, action_log_probs, values, rewards, masks, None, active_masks,
+        #                   available_actions)
+        self.buffer.insert(share_obs, obs, rnn_states, rnn_states_critic,
+                           actions, action_log_probs, values, rewards, masks, None, active_masks)
+
+
+
+    def log_train(self, train_infos, total_num_steps):
+        train_infos["average_step_rewards"] = np.mean(self.buffer.rewards)
+        print("average_step_rewards is {}.".format(train_infos["average_step_rewards"]))
+        for k, v in train_infos.items():
+            if self.use_wandb:
+                wandb.log({k: v}, step=total_num_steps)
+            else:
+                self.writter.add_scalars(k, {k: v}, total_num_steps)
+
+    @torch.no_grad()
+    def eval(self, total_num_steps):
+        eval_episode = 0
+        eval_episode_rewards = []
+        one_episode_rewards = [0 for _ in range(self.all_args.eval_episodes)]
+        eval_episode_scores = []
+        one_episode_scores = [0 for _ in range(self.all_args.eval_episodes)]
+
+        eval_obs, eval_share_obs, ava = self.eval_envs.reset()
+        eval_rnn_states = np.zeros((self.all_args.eval_episodes, self.num_agents, self.recurrent_N,
+                                    self.hidden_size), dtype=np.float32)
+        eval_masks = np.ones((self.all_args.eval_episodes, self.num_agents, 1), dtype=np.float32)
+
+        while True:
+            self.trainer.prep_rollout()
+            eval_actions, eval_rnn_states = \
+                self.trainer.policy.act(np.concatenate(eval_share_obs),
+                                        np.concatenate(eval_obs),
+                                        np.concatenate(eval_rnn_states),
+                                        np.concatenate(eval_masks),
+                                        np.concatenate(ava),
+                                        deterministic=True)
+            eval_actions = np.array(np.split(_t2n(eval_actions), self.all_args.eval_episodes))
+            eval_rnn_states = np.array(np.split(_t2n(eval_rnn_states), self.all_args.eval_episodes))
+
+            # Obser reward and next obs
+            eval_obs, eval_share_obs, eval_rewards, eval_dones, eval_infos, ava = self.eval_envs.step(eval_actions)
+            eval_rewards = np.mean(eval_rewards, axis=1).flatten()
+            one_episode_rewards += eval_rewards
+
+            eval_scores = [t_info[0]["score_reward"] for t_info in eval_infos]
+            one_episode_scores += np.array(eval_scores)
+
+            eval_dones_env = np.all(eval_dones, axis=1)
+            eval_rnn_states[eval_dones_env == True] = np.zeros(((eval_dones_env == True).sum(), self.num_agents,
+                                                                self.recurrent_N, self.hidden_size), dtype=np.float32)
+            eval_masks = np.ones((self.all_args.eval_episodes, self.num_agents, 1), dtype=np.float32)
+            eval_masks[eval_dones_env == True] = np.zeros(((eval_dones_env == True).sum(), self.num_agents, 1),
+                                                          dtype=np.float32)
+
+            for eval_i in range(self.all_args.eval_episodes):
+                if eval_dones_env[eval_i]:
+                    eval_episode += 1
+                    eval_episode_rewards.append(one_episode_rewards[eval_i])
+                    one_episode_rewards[eval_i] = 0
+
+                    eval_episode_scores.append(one_episode_scores[eval_i])
+                    one_episode_scores[eval_i] = 0
+
+            if eval_episode >= self.all_args.eval_episodes:
+                key_average = '/eval_average_episode_rewards'
+                key_max = '/eval_max_episode_rewards'
+                key_scores = '/eval_average_episode_scores'
+                eval_env_infos = {key_average: eval_episode_rewards,
+                                  key_max: [np.max(eval_episode_rewards)],
+                                  key_scores: eval_episode_scores}
+                self.log_env(eval_env_infos, total_num_steps)
+
+                print("eval average episode rewards: {}, scores: {}."
+                      .format(np.mean(eval_episode_rewards), np.mean(eval_episode_scores)))
+                break
diff --git a/controllers/mat_rapid/soccer/env_wrappers.py b/controllers/mat_rapid/soccer/env_wrappers.py
new file mode 100644
index 0000000..12cf941
--- /dev/null
+++ b/controllers/mat_rapid/soccer/env_wrappers.py
@@ -0,0 +1,831 @@
+"""
+Modified from OpenAI Baselines code to work with multi-agent envs
+"""
+import numpy as np
+import torch
+from multiprocessing import Process, Pipe
+from abc import ABC, abstractmethod
+from utils.util import tile_images
+
+class CloudpickleWrapper(object):
+    """
+    Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
+    """
+
+    def __init__(self, x):
+        self.x = x
+
+    def __getstate__(self):
+        import cloudpickle
+        return cloudpickle.dumps(self.x)
+
+    def __setstate__(self, ob):
+        import pickle
+        self.x = pickle.loads(ob)
+
+
+class ShareVecEnv(ABC):
+    """
+    An abstract asynchronous, vectorized environment.
+    Used to batch data from multiple copies of an environment, so that
+    each observation becomes an batch of observations, and expected action is a batch of actions to
+    be applied per-environment.
+    """
+    closed = False
+    viewer = None
+
+    metadata = {
+        'render.modes': ['human', 'rgb_array']
+    }
+
+    def __init__(self, num_envs, observation_space, share_observation_space, action_space):
+        self.num_envs = num_envs
+        self.observation_space = observation_space
+        self.share_observation_space = share_observation_space
+        self.action_space = action_space
+
+    @abstractmethod
+    def reset(self):
+        """
+        Reset all the environments and return an array of
+        observations, or a dict of observation arrays.
+
+        If step_async is still doing work, that work will
+        be cancelled and step_wait() should not be called
+        until step_async() is invoked again.
+        """
+        pass
+
+    @abstractmethod
+    def step_async(self, actions):
+        """
+        Tell all the environments to start taking a step
+        with the given actions.
+        Call step_wait() to get the results of the step.
+
+        You should not call this if a step_async run is
+        already pending.
+        """
+        pass
+
+    @abstractmethod
+    def step_wait(self):
+        """
+        Wait for the step taken with step_async().
+
+        Returns (obs, rews, dones, infos):
+         - obs: an array of observations, or a dict of
+                arrays of observations.
+         - rews: an array of rewards
+         - dones: an array of "episode done" booleans
+         - infos: a sequence of info objects
+        """
+        pass
+
+    def close_extras(self):
+        """
+        Clean up the  extra resources, beyond what's in this base class.
+        Only runs when not self.closed.
+        """
+        pass
+
+    def close(self):
+        if self.closed:
+            return
+        if self.viewer is not None:
+            self.viewer.close()
+        self.close_extras()
+        self.closed = True
+
+    def step(self, actions):
+        """
+        Step the environments synchronously.
+
+        This is available for backwards compatibility.
+        """
+        self.step_async(actions)
+        return self.step_wait()
+
+    def render(self, mode='human'):
+        imgs = self.get_images()
+        bigimg = tile_images(imgs)
+        if mode == 'human':
+            self.get_viewer().imshow(bigimg)
+            return self.get_viewer().isopen
+        elif mode == 'rgb_array':
+            return bigimg
+        else:
+            raise NotImplementedError
+
+    def get_images(self):
+        """
+        Return RGB images from each environment
+        """
+        raise NotImplementedError
+
+    @property
+    def unwrapped(self):
+        if isinstance(self, VecEnvWrapper):
+            return self.venv.unwrapped
+        else:
+            return self
+
+    def get_viewer(self):
+        if self.viewer is None:
+            from gym.envs.classic_control import rendering
+            self.viewer = rendering.SimpleImageViewer()
+        return self.viewer
+
+
+def worker(remote, parent_remote, env_fn_wrapper):
+    parent_remote.close()
+    env = env_fn_wrapper.x()
+    while True:
+        cmd, data = remote.recv()
+        if cmd == 'step':
+            ob, reward, done, info = env.step(data)
+            if 'bool' in done.__class__.__name__:
+                if done:
+                    ob = env.reset()
+            else:
+                if np.all(done):
+                    ob = env.reset()
+
+            remote.send((ob, reward, done, info))
+        elif cmd == 'reset':
+            ob = env.reset()
+            remote.send((ob))
+        elif cmd == 'render':
+            if data == "rgb_array":
+                fr = env.render(mode=data)
+                remote.send(fr)
+            elif data == "human":
+                env.render(mode=data)
+        elif cmd == 'reset_task':
+            ob = env.reset_task()
+            remote.send(ob)
+        elif cmd == 'close':
+            env.close()
+            remote.close()
+            break
+        elif cmd == 'get_spaces':
+            remote.send((env.observation_space, env.share_observation_space, env.action_space))
+        else:
+            raise NotImplementedError
+
+
+class GuardSubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = False  # could cause zombie process
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv()
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, rews, dones, infos = zip(*results)
+        return np.stack(obs), np.stack(rews), np.stack(dones), infos
+
+    def reset(self):
+        for remote in self.remotes:
+            remote.send(('reset', None))
+        obs = [remote.recv() for remote in self.remotes]
+        return np.stack(obs)
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+
+class SubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = True  # if the main process crashes, we should not cause things to hang
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv()
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, rews, dones, infos = zip(*results)
+        return np.stack(obs), np.stack(rews), np.stack(dones), infos
+
+    def reset(self):
+        for remote in self.remotes:
+            remote.send(('reset', None))
+        obs = [remote.recv() for remote in self.remotes]
+        return np.stack(obs)
+
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+    def render(self, mode="rgb_array"):
+        for remote in self.remotes:
+            remote.send(('render', mode))
+        if mode == "rgb_array":   
+            frame = [remote.recv() for remote in self.remotes]
+            return np.stack(frame) 
+
+
+def shareworker(remote, parent_remote, env_fn_wrapper):
+    parent_remote.close()
+    env = env_fn_wrapper.x()
+    while True:
+        cmd, data = remote.recv()
+        if cmd == 'step':
+            ob, s_ob, reward, done, info, available_actions = env.step(data)
+            if 'bool' in done.__class__.__name__:
+                if done:
+                    ob, s_ob, available_actions = env.reset()
+            else:
+                if np.all(done):
+                    ob, s_ob, available_actions = env.reset()
+
+            remote.send((ob, s_ob, reward, done, info, available_actions))
+        elif cmd == 'reset':
+            ob, s_ob, available_actions = env.reset()
+            remote.send((ob, s_ob, available_actions))
+        elif cmd == 'reset_task':
+            ob = env.reset_task()
+            remote.send(ob)
+        elif cmd == 'render':
+            if data == "rgb_array":
+                fr = env.render(mode=data)
+                remote.send(fr)
+            elif data == "human":
+                env.render(mode=data)
+        elif cmd == 'close':
+            env.close()
+            remote.close()
+            break
+        elif cmd == 'get_num_agents':
+            remote.send((env.n_agents))
+        elif cmd == 'get_spaces':
+            remote.send(
+                (env.observation_space, env.share_observation_space, env.action_space))
+        elif cmd == 'render_vulnerability':
+            fr = env.render_vulnerability(data)
+            remote.send((fr))
+        else:
+            raise NotImplementedError
+
+
+class ShareSubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=shareworker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = True  # if the main process crashes, we should not cause things to hang
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+        self.remotes[0].send(('get_num_agents', None))
+        self.n_agents = self.remotes[0].recv()
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv(
+        )
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, share_obs, rews, dones, infos, available_actions = zip(*results)
+        return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack(dones), infos, np.stack(available_actions)
+
+    def reset(self):
+        for remote in self.remotes:
+            remote.send(('reset', None))
+        results = [remote.recv() for remote in self.remotes]
+        obs, share_obs, available_actions = zip(*results)
+        return np.stack(obs), np.stack(share_obs), np.stack(available_actions)
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+
+def choosesimpleworker(remote, parent_remote, env_fn_wrapper):
+    parent_remote.close()
+    env = env_fn_wrapper.x()
+    while True:
+        cmd, data = remote.recv()
+        if cmd == 'step':
+            ob, reward, done, info = env.step(data)
+            remote.send((ob, reward, done, info))
+        elif cmd == 'reset':
+            ob = env.reset(data)
+            remote.send((ob))
+        elif cmd == 'reset_task':
+            ob = env.reset_task()
+            remote.send(ob)
+        elif cmd == 'close':
+            env.close()
+            remote.close()
+            break
+        elif cmd == 'render':
+            if data == "rgb_array":
+                fr = env.render(mode=data)
+                remote.send(fr)
+            elif data == "human":
+                env.render(mode=data)
+        elif cmd == 'get_spaces':
+            remote.send(
+                (env.observation_space, env.share_observation_space, env.action_space))
+        else:
+            raise NotImplementedError
+
+
+class ChooseSimpleSubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=choosesimpleworker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = True  # if the main process crashes, we should not cause things to hang
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv()
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, rews, dones, infos = zip(*results)
+        return np.stack(obs), np.stack(rews), np.stack(dones), infos
+
+    def reset(self, reset_choose):
+        for remote, choose in zip(self.remotes, reset_choose):
+            remote.send(('reset', choose))
+        obs = [remote.recv() for remote in self.remotes]
+        return np.stack(obs)
+
+    def render(self, mode="rgb_array"):
+        for remote in self.remotes:
+            remote.send(('render', mode))
+        if mode == "rgb_array":   
+            frame = [remote.recv() for remote in self.remotes]
+            return np.stack(frame)
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+
+def chooseworker(remote, parent_remote, env_fn_wrapper):
+    parent_remote.close()
+    env = env_fn_wrapper.x()
+    while True:
+        cmd, data = remote.recv()
+        if cmd == 'step':
+            ob, s_ob, reward, done, info, available_actions = env.step(data)
+            remote.send((ob, s_ob, reward, done, info, available_actions))
+        elif cmd == 'reset':
+            ob, s_ob, available_actions = env.reset(data)
+            remote.send((ob, s_ob, available_actions))
+        elif cmd == 'reset_task':
+            ob = env.reset_task()
+            remote.send(ob)
+        elif cmd == 'close':
+            env.close()
+            remote.close()
+            break
+        elif cmd == 'render':
+            remote.send(env.render(mode='rgb_array'))
+        elif cmd == 'get_spaces':
+            remote.send(
+                (env.observation_space, env.share_observation_space, env.action_space))
+        else:
+            raise NotImplementedError
+
+
+class ChooseSubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=chooseworker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = True  # if the main process crashes, we should not cause things to hang
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv(
+        )
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, share_obs, rews, dones, infos, available_actions = zip(*results)
+        return np.stack(obs), np.stack(share_obs), np.stack(rews), np.stack(dones), infos, np.stack(available_actions)
+
+    def reset(self, reset_choose):
+        for remote, choose in zip(self.remotes, reset_choose):
+            remote.send(('reset', choose))
+        results = [remote.recv() for remote in self.remotes]
+        obs, share_obs, available_actions = zip(*results)
+        return np.stack(obs), np.stack(share_obs), np.stack(available_actions)
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+
+def chooseguardworker(remote, parent_remote, env_fn_wrapper):
+    parent_remote.close()
+    env = env_fn_wrapper.x()
+    while True:
+        cmd, data = remote.recv()
+        if cmd == 'step':
+            ob, reward, done, info = env.step(data)
+            remote.send((ob, reward, done, info))
+        elif cmd == 'reset':
+            ob = env.reset(data)
+            remote.send((ob))
+        elif cmd == 'reset_task':
+            ob = env.reset_task()
+            remote.send(ob)
+        elif cmd == 'close':
+            env.close()
+            remote.close()
+            break
+        elif cmd == 'get_spaces':
+            remote.send(
+                (env.observation_space, env.share_observation_space, env.action_space))
+        else:
+            raise NotImplementedError
+
+
+class ChooseGuardSubprocVecEnv(ShareVecEnv):
+    def __init__(self, env_fns, spaces=None):
+        """
+        envs: list of gym environments to run in subprocesses
+        """
+        self.waiting = False
+        self.closed = False
+        nenvs = len(env_fns)
+        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
+        self.ps = [Process(target=chooseguardworker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
+                   for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
+        for p in self.ps:
+            p.daemon = False  # if the main process crashes, we should not cause things to hang
+            p.start()
+        for remote in self.work_remotes:
+            remote.close()
+        self.remotes[0].send(('get_spaces', None))
+        observation_space, share_observation_space, action_space = self.remotes[0].recv(
+        )
+        ShareVecEnv.__init__(self, len(env_fns), observation_space,
+                             share_observation_space, action_space)
+
+    def step_async(self, actions):
+        for remote, action in zip(self.remotes, actions):
+            remote.send(('step', action))
+        self.waiting = True
+
+    def step_wait(self):
+        results = [remote.recv() for remote in self.remotes]
+        self.waiting = False
+        obs, rews, dones, infos = zip(*results)
+        return np.stack(obs), np.stack(rews), np.stack(dones), infos
+
+    def reset(self, reset_choose):
+        for remote, choose in zip(self.remotes, reset_choose):
+            remote.send(('reset', choose))
+        obs = [remote.recv() for remote in self.remotes]
+        return np.stack(obs)
+
+    def reset_task(self):
+        for remote in self.remotes:
+            remote.send(('reset_task', None))
+        return np.stack([remote.recv() for remote in self.remotes])
+
+    def close(self):
+        if self.closed:
+            return
+        if self.waiting:
+            for remote in self.remotes:
+                remote.recv()
+        for remote in self.remotes:
+            remote.send(('close', None))
+        for p in self.ps:
+            p.join()
+        self.closed = True
+
+
+# single env
+class DummyVecEnv(ShareVecEnv):
+    def __init__(self, env_fns):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]
+        ShareVecEnv.__init__(self, len(
+            env_fns), env.observation_space, env.share_observation_space, env.action_space)
+        self.actions = None
+
+    def step_async(self, actions):
+        self.actions = actions
+
+    def step_wait(self):
+        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
+        obs, rews, dones, infos = map(np.array, zip(*results))
+
+        for (i, done) in enumerate(dones):
+            if 'bool' in done.__class__.__name__:
+                if done:
+                    obs[i] = self.envs[i].reset()
+            else:
+                if np.all(done):
+                    obs[i] = self.envs[i].reset()
+
+        self.actions = None
+        return obs, rews, dones, infos
+
+    def reset(self):
+        obs = [env.reset() for env in self.envs]
+        return np.array(obs)
+
+    def close(self):
+        for env in self.envs:
+            env.close()
+
+    def render(self, mode="human"):
+        if mode == "rgb_array":
+            return np.array([env.render(mode=mode) for env in self.envs])
+        elif mode == "human":
+            for env in self.envs:
+                env.render(mode=mode)
+        else:
+            raise NotImplementedError
+
+
+
+class ShareDummyVecEnv(ShareVecEnv):
+    def __init__(self, env_fns):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]
+        self.n_agents = env.n_agents
+        ShareVecEnv.__init__(self, len(
+            env_fns), env.observation_space, env.share_observation_space, env.action_space)
+        self.actions = None
+
+    def step_async(self, actions):
+        self.actions = actions
+
+    def step_wait(self):
+        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
+        obs, share_obs, rews, dones, infos, available_actions = map(
+            np.array, zip(*results))
+
+        for (i, done) in enumerate(dones):
+            if 'bool' in done.__class__.__name__:
+                if done:
+                    obs[i], share_obs[i], available_actions[i] = self.envs[i].reset()
+            else:
+                if np.all(done):
+                    obs[i], share_obs[i], available_actions[i] = self.envs[i].reset()
+        self.actions = None
+
+        return obs, share_obs, rews, dones, infos, available_actions
+
+    def reset(self):
+        results = [env.reset() for env in self.envs]
+        obs, share_obs, available_actions = map(np.array, zip(*results))
+        return obs, share_obs, available_actions
+
+    def close(self):
+        for env in self.envs:
+            env.close()
+
+    def save_replay(self):
+        for env in self.envs:
+            env.save_replay()
+    
+    def render(self, mode="human"):
+        if mode == "rgb_array":
+            return np.array([env.render(mode=mode) for env in self.envs])
+        elif mode == "human":
+            for env in self.envs:
+                env.render(mode=mode)
+        else:
+            raise NotImplementedError
+
+
+class ChooseDummyVecEnv(ShareVecEnv):
+    def __init__(self, env_fns):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]
+        ShareVecEnv.__init__(self, len(
+            env_fns), env.observation_space, env.share_observation_space, env.action_space)
+        self.actions = None
+
+    def step_async(self, actions):
+        self.actions = actions
+
+    def step_wait(self):
+        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
+        obs, share_obs, rews, dones, infos, available_actions = map(
+            np.array, zip(*results))
+        self.actions = None
+        return obs, share_obs, rews, dones, infos, available_actions
+
+    def reset(self, reset_choose):
+        results = [env.reset(choose)
+                   for (env, choose) in zip(self.envs, reset_choose)]
+        obs, share_obs, available_actions = map(np.array, zip(*results))
+        return obs, share_obs, available_actions
+
+    def close(self):
+        for env in self.envs:
+            env.close()
+
+    def render(self, mode="human"):
+        if mode == "rgb_array":
+            return np.array([env.render(mode=mode) for env in self.envs])
+        elif mode == "human":
+            for env in self.envs:
+                env.render(mode=mode)
+        else:
+            raise NotImplementedError
+
+class ChooseSimpleDummyVecEnv(ShareVecEnv):
+    def __init__(self, env_fns):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]
+        ShareVecEnv.__init__(self, len(
+            env_fns), env.observation_space, env.share_observation_space, env.action_space)
+        self.actions = None
+
+    def step_async(self, actions):
+        self.actions = actions
+
+    def step_wait(self):
+        results = [env.step(a) for (a, env) in zip(self.actions, self.envs)]
+        obs, rews, dones, infos = map(np.array, zip(*results))
+        self.actions = None
+        return obs, rews, dones, infos
+
+    def reset(self, reset_choose):
+        obs = [env.reset(choose)
+                   for (env, choose) in zip(self.envs, reset_choose)]
+        return np.array(obs)
+
+    def close(self):
+        for env in self.envs:
+            env.close()
+
+    def render(self, mode="human"):
+        if mode == "rgb_array":
+            return np.array([env.render(mode=mode) for env in self.envs])
+        elif mode == "human":
+            for env in self.envs:
+                env.render(mode=mode)
+        else:
+            raise NotImplementedError
diff --git a/controllers/mat_rapid/soccer/multiagentenv.py b/controllers/mat_rapid/soccer/multiagentenv.py
new file mode 100644
index 0000000..2a1f4dd
--- /dev/null
+++ b/controllers/mat_rapid/soccer/multiagentenv.py
@@ -0,0 +1,82 @@
+from collections import namedtuple
+import numpy as np
+
+
+def convert(dictionary):
+    return namedtuple('GenericDict', dictionary.keys())(**dictionary)
+
+class MultiAgentEnv(object):
+
+    def __init__(self, **kwargs):
+        # Unpack arguments from sacred
+        args = kwargs["env_args"]
+        if isinstance(args, dict):
+            args = convert(args)
+        self.args = args
+
+        if getattr(args, "seed", None) is not None:
+            self.seed = args.seed
+            self.rs = np.random.RandomState(self.seed) # initialise numpy random state
+
+    def step(self, actions):
+        """ Returns reward, terminated, info """
+        raise NotImplementedError
+
+    def get_obs(self):
+        """ Returns all agent observations in a list """
+        raise NotImplementedError
+
+    def get_obs_agent(self, agent_id):
+        """ Returns observation for agent_id """
+        raise NotImplementedError
+
+    def get_obs_size(self):
+        """ Returns the shape of the observation """
+        raise NotImplementedError
+
+    def get_state(self):
+        raise NotImplementedError
+
+    def get_state_size(self):
+        """ Returns the shape of the state"""
+        raise NotImplementedError
+
+    def get_avail_actions(self):
+        raise NotImplementedError
+
+    def get_avail_agent_actions(self, agent_id):
+        """ Returns the available actions for agent_id """
+        raise NotImplementedError
+
+    def get_total_actions(self):
+        """ Returns the total number of actions an agent could ever take """
+        # TODO: This is only suitable for a discrete 1 dimensional action space for each agent
+        raise NotImplementedError
+
+    def get_stats(self):
+        raise NotImplementedError
+
+    # TODO: Temp hack
+    def get_agg_stats(self, stats):
+        return {}
+
+    def reset(self):
+        """ Returns initial observations and states"""
+        raise NotImplementedError
+
+    def render(self):
+        raise NotImplementedError
+
+    def close(self):
+        raise NotImplementedError
+
+    def seed(self, seed):
+        raise NotImplementedError
+
+    def get_env_info(self):
+        env_info = {"state_shape": self.get_state_size(),
+                    "obs_shape": self.get_obs_size(),
+                    "n_actions": self.get_total_actions(),
+                    "n_agents": self.n_agents,
+                    "episode_limit": self.episode_limit}
+        return env_info
diff --git a/controllers/mat_rapid/soccer/player.py b/controllers/mat_rapid/soccer/player.py
new file mode 100644
index 0000000..e261c87
--- /dev/null
+++ b/controllers/mat_rapid/soccer/player.py
@@ -0,0 +1,92 @@
+import numpy as np
+from controller import Supervisor
+import math
+
+class Player():
+    kick_vel = 0.0
+
+    def __init__(self, name = None, supervisor = None):
+        super().__init__()
+        self.name = name
+        self.supervisor = supervisor
+        self.player = None
+        self.emitter = None
+        self.waiting_time = 0
+        self.dx = 0
+        self.dy = 0
+        self.dthe = 0
+
+    def reset(self, pos = [0.0, 0.0, 0.0]):
+        children = self.supervisor.getRoot().getField('children')
+        if self.player != None:
+            self.player.remove()
+        if "blue" in self.name:
+            ch = int(self.name[-1])
+            children.importMFNodeFromString(-1, f'DEF {self.name} GankenKun_box {{translation {pos[0]} {pos[1]} 0.300 rotation 0 0 1 {pos[2]} jerseyTexture "textures/GankenKun_{self.name}.png" jerseyColor 0, 0, 1 channel {ch} controller "void"}}')
+        else:
+            ch = int(self.name[-1])+3
+            children.importMFNodeFromString(-1, f'DEF {self.name} GankenKun_box {{translation {pos[0]} {pos[1]} 0.300 rotation 0 0 1 {pos[2]} jerseyTexture "textures/GankenKun_{self.name}.png" jerseyColor 1, 0, 0 channel {ch} controller "void"}}')
+        self.pos = pos
+        self.emitter = self.supervisor.getDevice(f'{self.name}_emitter')
+        self.player = self.supervisor.getFromDef(f'{self.name}')
+        self.player_pos = self.player.getField('translation')
+        self.player_rot = self.player.getField('rotation')
+
+        self.alive = True
+        self.score = 0
+        self.action = 0
+        self.is_fall = False
+        self.is_replace = False
+        self.waiting_time = 1
+
+    def move(self, pos = [0.0, 0.0, 0.0]):
+        self.player.resetPhysics()
+        self.player_pos.setSFVec3f([pos[0], pos[1], 0.450])
+        self.player_rot.setSFRotation([0, 0, 1, pos[2]])
+        
+    def send(self, message):
+        if self.waiting_time > 0:
+            self.waiting_time -= 1
+            return
+        if "kick" in message.decode('utf-8'):
+            self.waiting_time = 4
+        message_parts = message.decode('utf-8').split(',')
+        if message_parts[0] == "walk":
+            self.dx, self.dy, self.dthe = float(message_parts[1])*0.01, float(message_parts[2])*0.01, float(message_parts[3])*0.01
+            self.kick_vel = 0.0
+        if "kick" in message.decode('utf-8'):
+            self.dx, self.dy, self.dthe = 0.0, 0.0, 0.0
+            self.kick_vel = 2.0
+
+    def update(self):
+        #self.action = action
+        x, y, z = self.player_pos.getSFVec3f()
+        yaw, pitch, roll = self.rotation_to_euler(self.player_rot.getSFRotation())
+        self.pos = [x, y, yaw]
+        if abs(pitch) > 1.0 or abs(roll) > 1.0:
+            self.is_fall = True
+        else:
+            self.is_fall = False
+        x += self.dx * math.cos(yaw) - self.dy * math.sin(yaw)
+        y += self.dx * math.sin(yaw) + self.dy * math.cos(yaw)
+        self.player_pos.setSFVec3f([x, y, z])
+        self.player_rot.setSFRotation([0, 0, 1, yaw + self.dthe])
+        self.player.setVelocity([0, 0, 0, 0, 0, 0])
+    
+    def is_done(self):
+        return not self.alive
+    
+    def rotation_to_euler(self, rotation):
+        x, y, z, angle = rotation
+        c = np.cos(angle)
+        s = np.sin(angle)
+        t = 1 - c
+        R = np.array([
+            [t*x*x + c, t*x*y - z*s, t*x*z + y*s],
+            [t*x*y + z*s, t*y*y + c, t*y*z - x*s],
+            [t*x*z - y*s, t*y*z + x*s, t*z*z + c]
+        ])
+        yaw = np.arctan2(R[1, 0], R[0, 0])
+        pitch = np.arctan2(-R[2, 0], np.sqrt(R[2, 1]**2 + R[2, 2]**2))
+        roll = np.arctan2(R[2, 1], R[2, 2])
+        return yaw, pitch, roll
diff --git a/controllers/mat_rapid/soccer/soccer.py b/controllers/mat_rapid/soccer/soccer.py
new file mode 100644
index 0000000..cf3d1de
--- /dev/null
+++ b/controllers/mat_rapid/soccer/soccer.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import math
+import copy
+import random
+
+from controller import Supervisor
+
+from gymnasium.spaces import Box, Discrete, Sequence
+from gymnasium.utils import EzPickle, seeding
+
+from pettingzoo import AECEnv
+from pettingzoo.utils import wrappers
+from pettingzoo.utils.agent_selector import agent_selector
+from pettingzoo.utils.conversions import parallel_wrapper_fn
+
+from soccer.player import Player
+
+__all__ = ["env", "parallel_env", "raw_env"]
+
+def env(**kwargs):
+    env = raw_env(**kwargs)
+    env = wrappers.AssertOutOfBoundsWrapper(env)
+    env = wrappers.OrderEnforcingWrapper(env)
+    return env
+
+parallel_env = parallel_wrapper_fn(env)
+
+def normalize_angle_rad(angle):
+    while angle > math.pi:
+        angle -= 2.0 * math.pi
+    while angle <= -math.pi:
+        angle += 2.0 * math.pi
+    return angle
+
+class raw_env(AECEnv, EzPickle):
+    metadata = {
+        "render_modes": ["human", "rgb_array"],
+        "name": "soccer_v0",
+        "is_parallelizable": True,
+    }
+    
+    supervisor = None
+    
+    def __init__(self, max_cycles=300, render_mode=None):
+        EzPickle.__init__(self, max_cycles=max_cycles, render_mode=render_mode)
+        if self.supervisor == None:
+            self.supervisor = Supervisor()
+        self.time_step = int(self.supervisor.getBasicTimeStep())
+
+        self.frames = 0
+        self.render_mode = render_mode
+        self._seed()
+        self.max_cycles = max_cycles
+        self.out_agent = []
+        self.agent_name_mapping = {}
+        self.agent_dict = {}
+        self.kill_list = []
+        self.agent_list = []
+        #self.agents = ["blue1", "blue2", "blue3", "red1", "red2", "red3"]
+        self.agents = ["blue1", "blue2", "blue3"]
+        self.dead_agents = []
+        for i in range(len(self.agents)):
+            self.agent_name_mapping[self.agents[i]] = i
+            self.agent_list.append(Player(self.agents[i], self.supervisor))
+        #obs_space = Box(low=-5, high=5, shape = ([15]), dtype=np.float16)
+        obs_space = Box(low=-5, high=5, shape = ([9]), dtype=np.float32)
+        self.observation_spaces = dict(zip(self.agents, [obs_space for _ in enumerate(self.agents)]))
+        self.action_spaces = dict(zip(self.agents, [Discrete(9) for _ in enumerate(self.agents)]))
+        self.actions = ["walk,1,0,0", "walk,-1,0,0", "walk,0,1,0", "walk,0,-1,0", "walk,0,0,1", "walk,0,0,-1", "motion,left_kick", "motion,right_kick", "walk,0,0,0"]
+        self.state_space = Box(low=-5, high=5, shape = ([21]), dtype=np.float32)
+
+        self.possible_agents = copy.deepcopy(self.agents)
+        self._agent_selector = agent_selector(self.agents)
+
+        self.reinit()
+
+    def __del__(self):
+        print("DELETE")
+
+    def observation_space(self, agent):
+        return self.observation_spaces[agent]
+
+    def action_space(self, agent):
+        return self.action_spaces[agent]
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+
+    def observe(self, agent):
+        i = self.agent_name_mapping[agent]
+        state = self.state()
+        ball_x, ball_y = [state[0], state[1]]
+        bx, by, bthe = state[i*3+3], state[i*3+4], state[i*3+5]
+        s, c = math.sin(bthe), math.cos(bthe)
+        blx, bly = ball_x - bx, ball_y - by
+        x, y = blx * c + bly * s, - blx * s + bly * c
+        angle = math.degrees(math.atan2(y, x))
+        obs = [x, y] if (abs(angle) < 60) else [-100, -100]
+        obs += [bx, by, bthe]
+        no_agent = len(self.possible_agents)
+        base_index = list(range(no_agent))
+        if agent.startswith("red"):
+            index = base_index[int(no_agent/2):] + base_index[:int(no_agent/2)]
+        else:
+            index = base_index
+        index.remove(i)
+        for j in index:
+            rx, ry = state[j*3+3], state[j*3+4]
+            lx, ly = rx - bx, ry - by
+            x, y = lx * c + ly * s, - lx * s + ly * c
+            obs += [x, y]
+        if agent.startswith("red"):
+            obs[2] = -obs[2]
+            obs[3] = -obs[3]
+            obs[4] = normalize_angle_rad(obs[4]+math.pi)
+        return obs
+    
+    def state(self):
+        ball_x, ball_y, _ = self.ball_pos.getSFVec3f()
+        #for agent in self.agent_list:
+        #    agent.update()
+        player = []
+        for i in range(len(self.agent_list)):
+            player.append(self.agent_list[i].pos)
+        #state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2], player[2][0], player[2][1], player[2][2], player[3][0], player[3][1], player[3][2], player[4][0], player[4][1], player[4][2], player[5][0], player[5][1], player[5][2]]
+        state = [ball_x, ball_y, 0, player[0][0], player[0][1], player[0][2], player[1][0], player[1][1], player[1][2],  player[2][0], player[2][1], player[2][2]]
+        return state
+    
+    def step(self, action):
+        if self.terminations[self.agent_selection] or self.truncations[self.agent_selection]:
+            self._was_dead_step(action)
+            return
+        self._cumulative_rewards[self.agent_selection] = 0
+        agent = self.agent_list[self.agent_name_mapping[self.agent_selection]]
+        agent.score = 0
+
+        terminate = False
+        truncate = False
+        goal = False
+        
+        #print("frames: "+str(self.frames))
+
+        i = self.agent_name_mapping[self.agent_selection]
+        if self.agent_list[i].is_fall:
+            while True:
+                if self.agents[i].startswith("blue"):
+                    x, y = random.uniform(-4.0, -3.0), random.uniform(-2.5, 2.5)
+                elif self.agents[i].startswith("red"):
+                    x, y = random.uniform(4.0, 3.0), random.uniform(-2.5, 2.5)
+                near_robot = False
+                for j in range(i):
+                    robot_x, robot_y, _ = self.agent_list[j].pos
+                    length = math.sqrt((x-robot_x)**2+(y-robot_y)**2)
+                    if length < 1:
+                        near_robot = True
+                        break
+                if near_robot == False:
+                    break
+            self.init_pos[i][0], self.init_pos[i][1] = x, y
+            self.agent_list[i].move(self.init_pos[i])
+            self.agent_list[i].is_replace = True
+        else:
+            message = self.actions[action].encode('utf-8')
+            agent.send(message)
+            if "kick" in message.decode('utf-8'):
+                ball_x, ball_y, _ = self.ball_pos.getSFVec3f()
+                bx, by, bthe = agent.pos
+                s, c = math.sin(bthe), math.cos(bthe)
+                blx, bly = ball_x - bx, ball_y - by
+                x, y = blx * c + bly * s, - blx * s + bly * c
+                if 0.1 < x < 0.25:
+                    if ("left" in message.decode('utf-8') and 0.0 < y < 0.1) or ("right" in message.decode('utf-8') and -0.1 < y < 0.0):
+                        vel = agent.kick_vel
+                        self.ball.setVelocity([vel*c, vel*s, 0, 0, 0, 0])
+
+        if self._agent_selector.is_last():
+            self.frames += 1
+            # status update and calculate reward
+            self._clear_rewards()
+            for i in range(4):
+                self.supervisor.step(self.time_step)
+                ball_x, ball_y, _ = self.ball_pos.getSFVec3f()
+                ball_vel_x, ball_vel_y = self.ball.getVelocity()[:2]
+                for agent in self.agents:
+                    self.agent_list[self.agent_name_mapping[agent]].update()
+                    x, y, the = self.agent_list[self.agent_name_mapping[agent]].pos
+                    length = math.sqrt((x-ball_x)**2+(y-ball_y)**2)
+                    #self.rewards[agent] += 0.2/length/40
+                    if length < 1.0:
+                        if agent.startswith("blue"):
+                            ball_dx, ball_dy = 4.5 - ball_x, 0 - ball_y
+                            ball_len = math.sqrt(ball_dx**2+ball_dy**2)
+                            ball_dx, ball_dy = ball_dx / ball_len, ball_dy / ball_len
+                            reward = ball_vel_x * ball_dx + ball_vel_y * ball_dy
+                            self.rewards[agent] += max(reward, 0) * 10
+                        elif agent.startswith("red"):
+                            ball_dx, ball_dy = 4.5 - ( -ball_x), 0 - (-ball_y)
+                            ball_len = math.sqrt(ball_dx**2+ball_dy**2)
+                            ball_dx, ball_dy = ball_dx / ball_len, ball_dy / ball_len
+                            reward = (-ball_vel_x) * ball_dx + (-ball_vel_y) * ball_dy
+                            self.rewards[agent] += max(reward, 0) * 10
+            for agent in self.agents:
+                # out of field penalty
+                x, y, the = self.agent_list[self.agent_name_mapping[agent]].pos
+                if abs(x) > 5.0 or abs(y) > 3.5:
+                    self.rewards[agent] += -0.4
+                # hit other robot penalty
+                for other_agent in self.agents:
+                    if other_agent == agent:
+                        continue
+                    xo, yo, _ = self.agent_list[self.agent_name_mapping[other_agent]].pos
+                    if math.sqrt((x-xo)**2+(y-yo)**2) < 0.3:
+                        self.rewards[agent] = -1.0
+                # fall penalty
+                if self.agent_list[self.agent_name_mapping[agent]].is_replace:
+                    self.rewards[agent] += -1
+                    self.agent_list[self.agent_name_mapping[agent]].is_replace = False
+                    print("reward(fall): "+str(agent)+" "+str(self.rewards[agent]))
+
+                # global rewards
+                if ball_x > 4.5 and abs(ball_y) < 1.3:
+                    goal = True
+                    truncate = True
+                    if agent.startswith("blue"):
+                        self.rewards[agent] += 1000
+                    elif agent.startswith("red"):
+                        self.rewards[agent] += -1000
+                    print("Team blue Goal, reward: "+str(agent)+" "+str(self.rewards[agent]))
+                elif ball_x < -4.5 and abs(ball_y) < 1.3:
+                    goal = True
+                    truncate = True
+                    if agent.startswith("blue"):
+                        self.rewards[agent] += -1000
+                    elif agent.startswith("red"):
+                        self.rewards[agent] += 1000
+                    print("Team red Goal, reward: "+str(agent)+" "+str(self.rewards[agent]))
+
+            for agent in self.agents:
+                self.total_rewards[agent] += self.rewards[agent]
+
+            if not goal:
+                if abs(ball_x) > 4.5 or abs(ball_y) > 3.0:
+                    print("The ball out of the field")
+                    y = random.uniform(-2.5, 2.5)
+                    self.ball.resetPhysics()
+                    self.ball_pos.setSFVec3f([0, y, 0])
+        
+        if self.frames >= self.max_cycles:
+            truncate = True
+        self.terminations = {a: terminate for a in self.agents}
+        self.truncations = {a: truncate for a in self.agents}
+        if truncate:
+            for agent in self.agents:
+                self.infos[agent]["episode"] = {"r": self.total_rewards[agent], "l": self.max_cycles}
+
+        if self._agent_selector.is_last():
+            _live_agents = self.agents[:]
+            for k in self.kill_list:
+                _live_agents.remove(k)
+                self.terminations[k] = True
+                self.dead_agents.append(k)
+            self.kill_list = []
+            self._agent_selector.reinit(_live_agents)
+
+        if len(self._agent_selector.agent_order):
+            self.agent_selection = self._agent_selector.next()
+        
+        self._accumulate_rewards()
+        self._deads_step_first()
+    
+    def render():
+        pass
+
+    def reinit(self):
+        self.score = 0
+        self.run = True
+        children = self.supervisor.getRoot().getField('children')
+
+        try:
+            self.ball
+        except:
+            pass
+        else:
+            self.ball.remove()
+
+        y = random.uniform(-2.5, 2.5)
+        children.importMFNodeFromString(-1, f'DEF BALL RobocupSoccerBall {{ translation 0 {y} 0.1 size 1 }}')
+        self.ball = self.supervisor.getFromDef('BALL')
+        self.ball_pos = self.ball.getField('translation')
+        self.init_pos = [[-0.3, 0, 0], [-2, -1, 0], [-2, 1, 0], [1, 0, 3.14], [2, -1, 3.14], [2, 1, 3.14]]
+        for i in range(len(self.agent_list)):
+            while True:
+                if self.agents[i].startswith("blue"):
+                    x, y = random.uniform(-4.0, -1.0), random.uniform(-2.5, 2.5)
+                elif self.agents[i].startswith("red"):
+                    x, y = random.uniform(4.0, 1.0), random.uniform(-2.5, 2.5)
+                near_robot = False
+                for j in range(i):
+                    length = math.sqrt((x-self.init_pos[j][0])**2+(y-self.init_pos[j][1])**2)
+                    if length < 1:
+                        near_robot = True
+                        break
+                if near_robot == False:
+                    break
+            self.init_pos[i][0], self.init_pos[i][1] = x, y
+            self.agent_list[i].reset(self.init_pos[i])
+        self.frames = 0
+
+    def reset(self, seed = None, options = None):
+        if seed is not None:
+            self._seed(seed=seed)
+        self.agents = copy.deepcopy(self.possible_agents)
+        self._agent_selector.reinit(self.agents)
+        self.agent_selection = self._agent_selector.next()
+        self.rewards = dict(zip(self.agents, [0 for _ in self.agents]))
+        self.total_rewards = dict(zip(self.agents, [0 for _ in self.agents]))
+        self._cumulative_rewards = {a: 0 for a in self.agents}
+        self.terminations = dict(zip(self.agents, [False for _ in self.agents]))
+        self.truncations = dict(zip(self.agents, [False for _ in self.agents]))
+        self.infos = dict(zip(self.agents, [{} for _ in self.agents]))
+        self.reinit()
diff --git a/controllers/mat_rapid/soccer/soccer_env.py b/controllers/mat_rapid/soccer/soccer_env.py
new file mode 100644
index 0000000..070c1b9
--- /dev/null
+++ b/controllers/mat_rapid/soccer/soccer_env.py
@@ -0,0 +1,116 @@
+from functools import partial
+import gym
+from gym.spaces import Box
+from gym.wrappers import TimeLimit
+import numpy as np
+#import gfootball.env as football_env
+import soccer_v0
+#from .encode.obs_encode import FeatureEncoder
+#from .encode.rew_encode import Rewarder
+
+from soccer.multiagentenv import MultiAgentEnv
+
+
+class SoccerEnv(MultiAgentEnv):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.env = soccer_v0.parallel_env(max_cycles=kwargs["env_args"]["episode_length"])
+        #self.scenario = kwargs["env_args"]["scenario"]
+        self.env.reset()
+        self.n_agents = self.env.num_agents
+        self.agents = self.env.agents
+        #self.reward_type = kwargs["env_args"]["reward"]
+
+        #self.feature_encoder = FeatureEncoder()
+        #self.reward_encoder = Rewarder()
+        self.action_space = [gym.spaces.Discrete(self.env.action_space(agent).n) for agent in self.agents]
+
+        tmp_obs_dicts, _ = self.env.reset()
+        #tmp_obs = [self._encode_obs(obs_dict)[0] for obs_dict in tmp_obs_dicts]
+        tmp_obs = np.hstack([np.array(tmp_obs_dicts[k], dtype=np.float32).flatten() for k in sorted(tmp_obs_dicts)])
+        #self.observation_space = [Box(low=float("-inf"), high=float("inf"), shape=tmp_obs[n].shape, dtype=np.float32)
+        #                          for n in range(self.n_agents)]
+        self.observation_space = [self.env.observation_space(agent) for agent in self.agents]
+        #self.share_observation_space = self.observation_space.copy()
+        self.share_observation_space = [Box(low=float("-inf"), high=float("inf"), shape=tmp_obs.shape, dtype=np.float32) for n in range(self.n_agents)]
+
+        self.pre_obs = None
+
+    def _encode_obs(self, raw_obs):
+        #obs = self.feature_encoder.encode(raw_obs.copy())
+        obs = raw_obs
+        obs_cat = np.hstack(
+            [np.array(obs[k], dtype=np.float32).flatten() for k in sorted(obs)]
+        )
+        return obs_cat
+        #ava = obs["avail"]
+        #return obs_cat, ava
+
+    def reset(self, **kwargs):
+        """ Returns initial observations and states"""
+        obs_dicts, _ = self.env.reset()
+        self.pre_obs = obs_dicts
+        #obs = []
+        #ava = []
+        obs = [np.array(obs_dicts[k], dtype=np.float32) for k in sorted(obs_dicts)]
+        #for obs_dict in obs_dicts:
+        #    obs_i, ava_i = self._encode_obs(obs_dict)
+        #    obs.append(obs_i)
+        #    ava.append(ava_i)
+        return obs
+        #state = obs.copy()
+        #return obs, state, ava
+
+    def step(self, actions):
+        #actions_int = [int(a) for a in actions]
+        #o, r, d, i = self.env.step(actions_int)
+        actions_dict = {}
+        for i, agent in enumerate(self.agents):
+            actions_dict[agent] = int(actions[i])
+        observations, rewards, terminations, truncations, infos = self.env.step(actions_dict)
+        #obs = []
+        obs = [np.array(observations[k], dtype=np.float32) for k in sorted(observations)]
+        #ava = []
+        #for obs_dict in observations:
+        #    obs_i, ava_i = self._encode_obs(obs_dict)
+        #    obs.append(obs_i)
+        #    ava.append(ava_i)
+        #state = obs.copy()
+
+        #rewards = [[self.reward_encoder.calc_reward(_r, _prev_obs, _obs)]
+        #           for _r, _prev_obs, _obs in zip(r, self.pre_obs, o)]
+        rewards = [[rewards[k]] for k in sorted(rewards)]
+
+        self.pre_obs = observations
+
+        d = [truncations[k] for k in sorted(truncations)]
+        dones = np.ones((self.n_agents), dtype=bool) * d
+        #infos = [i for n in range(self.n_agents)]
+        #infos = [infos[k] for k in sorted(infos)]
+        info_n = []
+        for i, agent in enumerate(self.agents):
+            info = {'individual_reward': rewards[i][0]}
+            info_n.append(info)
+        #return obs, state, rewards, dones, infos, ava
+        return obs, rewards, dones, info_n
+
+    def render(self, **kwargs):
+        # self.env.render(**kwargs)
+        pass
+
+    def close(self):
+        pass
+
+    def seed(self, args):
+        pass
+
+    def get_env_info(self):
+
+        env_info = {"state_shape": self.observation_space[0].shape,
+                    "obs_shape": self.observation_space[0].shape,
+                    "n_actions": self.action_space[0].n,
+                    "n_agents": self.n_agents,
+                    "action_spaces": self.action_space
+                    }
+        return env_info
diff --git a/controllers/mat_rapid/soccer_v0.py b/controllers/mat_rapid/soccer_v0.py
new file mode 100644
index 0000000..6bf6c03
--- /dev/null
+++ b/controllers/mat_rapid/soccer_v0.py
@@ -0,0 +1,7 @@
+from soccer.soccer import (
+    env,
+    parallel_env,
+    raw_env,
+)
+
+__all__ = ["env", "parallel_env", "raw_env"]
diff --git a/controllers/mat_rapid/utils/__init__.py b/controllers/mat_rapid/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/controllers/mat_rapid/utils/shared_buffer.py b/controllers/mat_rapid/utils/shared_buffer.py
new file mode 100644
index 0000000..8047980
--- /dev/null
+++ b/controllers/mat_rapid/utils/shared_buffer.py
@@ -0,0 +1,288 @@
+import torch
+import numpy as np
+import torch.nn.functional as F
+from utils.util import get_shape_from_obs_space, get_shape_from_act_space
+
+
+def _flatten(T, N, x):
+    return x.reshape(T * N, *x.shape[2:])
+
+
+def _cast(x):
+    return x.transpose(1, 2, 0, 3).reshape(-1, *x.shape[3:])
+
+
+def _shuffle_agent_grid(x, y):
+    rows = np.indices((x, y))[0]
+    # cols = np.stack([np.random.permutation(y) for _ in range(x)])
+    cols = np.stack([np.arange(y) for _ in range(x)])
+    return rows, cols
+
+
+class SharedReplayBuffer(object):
+    """
+    Buffer to store training data.
+    :param args: (argparse.Namespace) arguments containing relevant model, policy, and env information.
+    :param num_agents: (int) number of agents in the env.
+    :param obs_space: (gym.Space) observation space of agents.
+    :param cent_obs_space: (gym.Space) centralized observation space of agents.
+    :param act_space: (gym.Space) action space for agents.
+    """
+
+    def __init__(self, args, num_agents, obs_space, cent_obs_space, act_space, env_name):
+        self.episode_length = args.episode_length
+        self.n_rollout_threads = args.n_rollout_threads
+        self.hidden_size = args.hidden_size
+        self.recurrent_N = args.recurrent_N
+        self.gamma = args.gamma
+        self.gae_lambda = args.gae_lambda
+        self._use_gae = args.use_gae
+        self._use_popart = args.use_popart
+        self._use_valuenorm = args.use_valuenorm
+        self._use_proper_time_limits = args.use_proper_time_limits
+        self.algo = args.algorithm_name
+        self.num_agents = num_agents
+        self.env_name = env_name
+
+        obs_shape = get_shape_from_obs_space(obs_space)
+        share_obs_shape = get_shape_from_obs_space(cent_obs_space)
+
+        if type(obs_shape[-1]) == list:
+            obs_shape = obs_shape[:1]
+
+        if type(share_obs_shape[-1]) == list:
+            share_obs_shape = share_obs_shape[:1]
+
+        self.share_obs = np.zeros((self.episode_length + 1, self.n_rollout_threads, num_agents, *share_obs_shape),
+                                  dtype=np.float32)
+        self.obs = np.zeros((self.episode_length + 1, self.n_rollout_threads, num_agents, *obs_shape), dtype=np.float32)
+
+        self.rnn_states = np.zeros(
+            (self.episode_length + 1, self.n_rollout_threads, num_agents, self.recurrent_N, self.hidden_size),
+            dtype=np.float32)
+        self.rnn_states_critic = np.zeros_like(self.rnn_states)
+
+        self.value_preds = np.zeros(
+            (self.episode_length + 1, self.n_rollout_threads, num_agents, 1), dtype=np.float32)
+        self.returns = np.zeros_like(self.value_preds)
+        self.advantages = np.zeros(
+            (self.episode_length, self.n_rollout_threads, num_agents, 1), dtype=np.float32)
+
+        if act_space.__class__.__name__ == 'Discrete':
+            self.available_actions = np.ones((self.episode_length + 1, self.n_rollout_threads, num_agents, act_space.n),
+                                             dtype=np.float32)
+        else:
+            self.available_actions = None
+
+        act_shape = get_shape_from_act_space(act_space)
+
+        self.actions = np.zeros(
+            (self.episode_length, self.n_rollout_threads, num_agents, act_shape), dtype=np.float32)
+        self.action_log_probs = np.zeros(
+            (self.episode_length, self.n_rollout_threads, num_agents, act_shape), dtype=np.float32)
+        self.rewards = np.zeros(
+            (self.episode_length, self.n_rollout_threads, num_agents, 1), dtype=np.float32)
+
+        self.masks = np.ones((self.episode_length + 1, self.n_rollout_threads, num_agents, 1), dtype=np.float32)
+        self.bad_masks = np.ones_like(self.masks)
+        self.active_masks = np.ones_like(self.masks)
+
+        self.step = 0
+
+    def insert(self, share_obs, obs, rnn_states_actor, rnn_states_critic, actions, action_log_probs,
+               value_preds, rewards, masks, bad_masks=None, active_masks=None, available_actions=None):
+        """
+        Insert data into the buffer.
+        :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information.
+        :param obs: (np.ndarray) local agent observations.
+        :param rnn_states_actor: (np.ndarray) RNN states for actor network.
+        :param rnn_states_critic: (np.ndarray) RNN states for critic network.
+        :param actions:(np.ndarray) actions taken by agents.
+        :param action_log_probs:(np.ndarray) log probs of actions taken by agents
+        :param value_preds: (np.ndarray) value function prediction at each step.
+        :param rewards: (np.ndarray) reward collected at each step.
+        :param masks: (np.ndarray) denotes whether the environment has terminated or not.
+        :param bad_masks: (np.ndarray) action space for agents.
+        :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env.
+        :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available.
+        """
+        self.share_obs[self.step + 1] = share_obs.copy()
+        self.obs[self.step + 1] = obs.copy()
+        self.rnn_states[self.step + 1] = rnn_states_actor.copy()
+        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
+        self.actions[self.step] = actions.copy()
+        self.action_log_probs[self.step] = action_log_probs.copy()
+        self.value_preds[self.step] = value_preds.copy()
+        self.rewards[self.step] = rewards.copy()
+        self.masks[self.step + 1] = masks.copy()
+        if bad_masks is not None:
+            self.bad_masks[self.step + 1] = bad_masks.copy()
+        if active_masks is not None:
+            self.active_masks[self.step + 1] = active_masks.copy()
+        if available_actions is not None:
+            self.available_actions[self.step + 1] = available_actions.copy()
+
+        self.step = (self.step + 1) % self.episode_length
+
+    def chooseinsert(self, share_obs, obs, rnn_states, rnn_states_critic, actions, action_log_probs,
+                     value_preds, rewards, masks, bad_masks=None, active_masks=None, available_actions=None):
+        """
+        Insert data into the buffer. This insert function is used specifically for Hanabi, which is turn based.
+        :param share_obs: (argparse.Namespace) arguments containing relevant model, policy, and env information.
+        :param obs: (np.ndarray) local agent observations.
+        :param rnn_states_actor: (np.ndarray) RNN states for actor network.
+        :param rnn_states_critic: (np.ndarray) RNN states for critic network.
+        :param actions:(np.ndarray) actions taken by agents.
+        :param action_log_probs:(np.ndarray) log probs of actions taken by agents
+        :param value_preds: (np.ndarray) value function prediction at each step.
+        :param rewards: (np.ndarray) reward collected at each step.
+        :param masks: (np.ndarray) denotes whether the environment has terminated or not.
+        :param bad_masks: (np.ndarray) denotes indicate whether whether true terminal state or due to episode limit
+        :param active_masks: (np.ndarray) denotes whether an agent is active or dead in the env.
+        :param available_actions: (np.ndarray) actions available to each agent. If None, all actions are available.
+        """
+        self.share_obs[self.step] = share_obs.copy()
+        self.obs[self.step] = obs.copy()
+        self.rnn_states[self.step + 1] = rnn_states.copy()
+        self.rnn_states_critic[self.step + 1] = rnn_states_critic.copy()
+        self.actions[self.step] = actions.copy()
+        self.action_log_probs[self.step] = action_log_probs.copy()
+        self.value_preds[self.step] = value_preds.copy()
+        self.rewards[self.step] = rewards.copy()
+        self.masks[self.step + 1] = masks.copy()
+        if bad_masks is not None:
+            self.bad_masks[self.step + 1] = bad_masks.copy()
+        if active_masks is not None:
+            self.active_masks[self.step] = active_masks.copy()
+        if available_actions is not None:
+            self.available_actions[self.step] = available_actions.copy()
+
+        self.step = (self.step + 1) % self.episode_length
+
+    def after_update(self):
+        """Copy last timestep data to first index. Called after update to model."""
+        self.share_obs[0] = self.share_obs[-1].copy()
+        self.obs[0] = self.obs[-1].copy()
+        self.rnn_states[0] = self.rnn_states[-1].copy()
+        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
+        self.masks[0] = self.masks[-1].copy()
+        self.bad_masks[0] = self.bad_masks[-1].copy()
+        self.active_masks[0] = self.active_masks[-1].copy()
+        if self.available_actions is not None:
+            self.available_actions[0] = self.available_actions[-1].copy()
+
+    def chooseafter_update(self):
+        """Copy last timestep data to first index. This method is used for Hanabi."""
+        self.rnn_states[0] = self.rnn_states[-1].copy()
+        self.rnn_states_critic[0] = self.rnn_states_critic[-1].copy()
+        self.masks[0] = self.masks[-1].copy()
+        self.bad_masks[0] = self.bad_masks[-1].copy()
+
+    def compute_returns(self, next_value, value_normalizer=None):
+        """
+        Compute returns either as discounted sum of rewards, or using GAE.
+        :param next_value: (np.ndarray) value predictions for the step after the last episode step.
+        :param value_normalizer: (PopArt) If not None, PopArt value normalizer instance.
+        """
+        self.value_preds[-1] = next_value
+        gae = 0
+        for step in reversed(range(self.rewards.shape[0])):
+            if self._use_popart or self._use_valuenorm:
+                delta = self.rewards[step] + self.gamma * value_normalizer.denormalize(
+                    self.value_preds[step + 1]) * self.masks[step + 1] \
+                        - value_normalizer.denormalize(self.value_preds[step])
+                gae = delta + self.gamma * self.gae_lambda * self.masks[step + 1] * gae
+
+                # here is a patch for mpe, whose last step is timeout instead of terminate
+                if self.env_name == "MPE" and step == self.rewards.shape[0] - 1:
+                    gae = 0
+
+                self.advantages[step] = gae
+                self.returns[step] = gae + value_normalizer.denormalize(self.value_preds[step])
+            else:
+                delta = self.rewards[step] + self.gamma * self.value_preds[step + 1] * \
+                        self.masks[step + 1] - self.value_preds[step]
+                gae = delta + self.gamma * self.gae_lambda * self.masks[step + 1] * gae
+
+                # here is a patch for mpe, whose last step is timeout instead of terminate
+                if self.env_name == "MPE" and step == self.rewards.shape[0] - 1:
+                    gae = 0
+
+                self.advantages[step] = gae
+                self.returns[step] = gae + self.value_preds[step]
+
+    def feed_forward_generator_transformer(self, advantages, num_mini_batch=None, mini_batch_size=None):
+        """
+        Yield training data for MLP policies.
+        :param advantages: (np.ndarray) advantage estimates.
+        :param num_mini_batch: (int) number of minibatches to split the batch into.
+        :param mini_batch_size: (int) number of samples in each minibatch.
+        """
+        episode_length, n_rollout_threads, num_agents = self.rewards.shape[0:3]
+        batch_size = n_rollout_threads * episode_length
+
+        if mini_batch_size is None:
+            assert batch_size >= num_mini_batch, (
+                "PPO requires the number of processes ({}) "
+                "* number of steps ({}) = {} "
+                "to be greater than or equal to the number of PPO mini batches ({})."
+                "".format(n_rollout_threads, episode_length,
+                          n_rollout_threads * episode_length,
+                          num_mini_batch))
+            mini_batch_size = batch_size // num_mini_batch
+
+        rand = torch.randperm(batch_size).numpy()
+        sampler = [rand[i * mini_batch_size:(i + 1) * mini_batch_size] for i in range(num_mini_batch)]
+        rows, cols = _shuffle_agent_grid(batch_size, num_agents)
+
+        # keep (num_agent, dim)
+        share_obs = self.share_obs[:-1].reshape(-1, *self.share_obs.shape[2:])
+        share_obs = share_obs[rows, cols]
+        obs = self.obs[:-1].reshape(-1, *self.obs.shape[2:])
+        obs = obs[rows, cols]
+        rnn_states = self.rnn_states[:-1].reshape(-1, *self.rnn_states.shape[2:])
+        rnn_states = rnn_states[rows, cols]
+        rnn_states_critic = self.rnn_states_critic[:-1].reshape(-1, *self.rnn_states_critic.shape[2:])
+        rnn_states_critic = rnn_states_critic[rows, cols]
+        actions = self.actions.reshape(-1, *self.actions.shape[2:])
+        actions = actions[rows, cols]
+        if self.available_actions is not None:
+            available_actions = self.available_actions[:-1].reshape(-1, *self.available_actions.shape[2:])
+            available_actions = available_actions[rows, cols]
+        value_preds = self.value_preds[:-1].reshape(-1, *self.value_preds.shape[2:])
+        value_preds = value_preds[rows, cols]
+        returns = self.returns[:-1].reshape(-1, *self.returns.shape[2:])
+        returns = returns[rows, cols]
+        masks = self.masks[:-1].reshape(-1, *self.masks.shape[2:])
+        masks = masks[rows, cols]
+        active_masks = self.active_masks[:-1].reshape(-1, *self.active_masks.shape[2:])
+        active_masks = active_masks[rows, cols]
+        action_log_probs = self.action_log_probs.reshape(-1, *self.action_log_probs.shape[2:])
+        action_log_probs = action_log_probs[rows, cols]
+        advantages = advantages.reshape(-1, *advantages.shape[2:])
+        advantages = advantages[rows, cols]
+
+        for indices in sampler:
+            # [L,T,N,Dim]-->[L*T,N,Dim]-->[index,N,Dim]-->[index*N, Dim]
+            share_obs_batch = share_obs[indices].reshape(-1, *share_obs.shape[2:])
+            obs_batch = obs[indices].reshape(-1, *obs.shape[2:])
+            rnn_states_batch = rnn_states[indices].reshape(-1, *rnn_states.shape[2:])
+            rnn_states_critic_batch = rnn_states_critic[indices].reshape(-1, *rnn_states_critic.shape[2:])
+            actions_batch = actions[indices].reshape(-1, *actions.shape[2:])
+            if self.available_actions is not None:
+                available_actions_batch = available_actions[indices].reshape(-1, *available_actions.shape[2:])
+            else:
+                available_actions_batch = None
+            value_preds_batch = value_preds[indices].reshape(-1, *value_preds.shape[2:])
+            return_batch = returns[indices].reshape(-1, *returns.shape[2:])
+            masks_batch = masks[indices].reshape(-1, *masks.shape[2:])
+            active_masks_batch = active_masks[indices].reshape(-1, *active_masks.shape[2:])
+            old_action_log_probs_batch = action_log_probs[indices].reshape(-1, *action_log_probs.shape[2:])
+            if advantages is None:
+                adv_targ = None
+            else:
+                adv_targ = advantages[indices].reshape(-1, *advantages.shape[2:])
+
+            yield share_obs_batch, obs_batch, rnn_states_batch, rnn_states_critic_batch, actions_batch, \
+                  value_preds_batch, return_batch, masks_batch, active_masks_batch, old_action_log_probs_batch, \
+                  adv_targ, available_actions_batch
diff --git a/controllers/mat_rapid/utils/util.py b/controllers/mat_rapid/utils/util.py
new file mode 100644
index 0000000..8b61453
--- /dev/null
+++ b/controllers/mat_rapid/utils/util.py
@@ -0,0 +1,72 @@
+import numpy as np
+import math
+import torch
+
+def check(input):
+    if type(input) == np.ndarray:
+        return torch.from_numpy(input)
+        
+def get_gard_norm(it):
+    sum_grad = 0
+    for x in it:
+        if x.grad is None:
+            continue
+        sum_grad += x.grad.norm() ** 2
+    return math.sqrt(sum_grad)
+
+def update_linear_schedule(optimizer, epoch, total_num_epochs, initial_lr):
+    """Decreases the learning rate linearly"""
+    lr = initial_lr - (initial_lr * (epoch / float(total_num_epochs)))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+
+def huber_loss(e, d):
+    a = (abs(e) <= d).float()
+    b = (e > d).float()
+    return a*e**2/2 + b*d*(abs(e)-d/2)
+
+def mse_loss(e):
+    return e**2/2
+
+def get_shape_from_obs_space(obs_space):
+    if obs_space.__class__.__name__ == 'Box':
+        obs_shape = obs_space.shape
+    elif obs_space.__class__.__name__ == 'list':
+        obs_shape = obs_space
+    else:
+        raise NotImplementedError
+    return obs_shape
+
+def get_shape_from_act_space(act_space):
+    if act_space.__class__.__name__ == 'Discrete':
+        act_shape = 1
+    elif act_space.__class__.__name__ == "MultiDiscrete":
+        act_shape = act_space.shape
+    elif act_space.__class__.__name__ == "Box":
+        act_shape = act_space.shape[0]
+    elif act_space.__class__.__name__ == "MultiBinary":
+        act_shape = act_space.shape[0]
+    else:  # agar
+        act_shape = act_space[0].shape[0] + 1  
+    return act_shape
+
+
+def tile_images(img_nhwc):
+    """
+    Tile N images into one big PxQ image
+    (P,Q) are chosen to be as close as possible, and if N
+    is square, then P=Q.
+    input: img_nhwc, list or array of images, ndim=4 once turned into array
+        n = batch index, h = height, w = width, c = channel
+    returns:
+        bigim_HWc, ndarray with ndim=3
+    """
+    img_nhwc = np.asarray(img_nhwc)
+    N, h, w, c = img_nhwc.shape
+    H = int(np.ceil(np.sqrt(N)))
+    W = int(np.ceil(float(N)/H))
+    img_nhwc = np.array(list(img_nhwc) + [img_nhwc[0]*0 for _ in range(N, H*W)])
+    img_HWhwc = img_nhwc.reshape(H, W, h, w, c)
+    img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4)
+    img_Hh_Ww_c = img_HhWwc.reshape(H*h, W*w, c)
+    return img_Hh_Ww_c
\ No newline at end of file
diff --git a/controllers/mat_rapid/utils/valuenorm.py b/controllers/mat_rapid/utils/valuenorm.py
new file mode 100644
index 0000000..6d9ce36
--- /dev/null
+++ b/controllers/mat_rapid/utils/valuenorm.py
@@ -0,0 +1,79 @@
+
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+
+class ValueNorm(nn.Module):
+    """ Normalize a vector of observations - across the first norm_axes dimensions"""
+
+    def __init__(self, input_shape, norm_axes=1, beta=0.99999, per_element_update=False, epsilon=1e-5, device=torch.device("cpu")):
+        super(ValueNorm, self).__init__()
+
+        self.input_shape = input_shape
+        self.norm_axes = norm_axes
+        self.epsilon = epsilon
+        self.beta = beta
+        self.per_element_update = per_element_update
+        self.tpdv = dict(dtype=torch.float32, device=device)
+
+        self.running_mean = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
+        self.running_mean_sq = nn.Parameter(torch.zeros(input_shape), requires_grad=False).to(**self.tpdv)
+        self.debiasing_term = nn.Parameter(torch.tensor(0.0), requires_grad=False).to(**self.tpdv)
+        
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.running_mean.zero_()
+        self.running_mean_sq.zero_()
+        self.debiasing_term.zero_()
+
+    def running_mean_var(self):
+        debiased_mean = self.running_mean / self.debiasing_term.clamp(min=self.epsilon)
+        debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp(min=self.epsilon)
+        debiased_var = (debiased_mean_sq - debiased_mean ** 2).clamp(min=1e-2)
+        return debiased_mean, debiased_var
+
+    @torch.no_grad()
+    def update(self, input_vector):
+        if type(input_vector) == np.ndarray:
+            input_vector = torch.from_numpy(input_vector)
+        input_vector = input_vector.to(**self.tpdv)
+
+        batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes)))
+        batch_sq_mean = (input_vector ** 2).mean(dim=tuple(range(self.norm_axes)))
+
+        if self.per_element_update:
+            batch_size = np.prod(input_vector.size()[:self.norm_axes])
+            weight = self.beta ** batch_size
+        else:
+            weight = self.beta
+
+        self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight))
+        self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight))
+        self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight))
+
+    def normalize(self, input_vector):
+        # Make sure input is float32
+        if type(input_vector) == np.ndarray:
+            input_vector = torch.from_numpy(input_vector)
+        input_vector = input_vector.to(**self.tpdv)
+
+        mean, var = self.running_mean_var()
+        out = (input_vector - mean[(None,) * self.norm_axes]) / torch.sqrt(var)[(None,) * self.norm_axes]
+        
+        return out
+
+    def denormalize(self, input_vector):
+        """ Transform normalized data back into original distribution """
+        if type(input_vector) == np.ndarray:
+            input_vector = torch.from_numpy(input_vector)
+        input_vector = input_vector.to(**self.tpdv)
+
+        mean, var = self.running_mean_var()
+        out = input_vector * torch.sqrt(var)[(None,) * self.norm_axes] + mean[(None,) * self.norm_axes]
+        
+        out = out.cpu().numpy()
+        
+        return out
diff --git a/protos/GankenKun_box.proto b/protos/GankenKun_box.proto
new file mode 100755
index 0000000..75c727c
--- /dev/null
+++ b/protos/GankenKun_box.proto
@@ -0,0 +1,108 @@
+#VRML_SIM R2021a utf8
+# license: Apache License 2.0
+# license url: http://www.apache.org/licenses/LICENSE-2.0
+# This is a proto file for Webots for the GankenKun
+# Extracted from: gankenkun_simple.urdf
+
+PROTO GankenKun_box [
+  field  SFVec3f     translation     0 0 0
+  field  SFRotation  rotation        0 1 0 0
+  field  SFString    name            "GankenKun"  # Is `Robot.name`.
+  field  SFString    controller      "void"       # Is `Robot.controller`.
+  field  MFString    controllerArgs  []           # Is `Robot.controllerArgs`.
+  field  SFString    customData      ""           # Is `Robot.customData`.
+  field  SFBool      supervisor      FALSE        # Is `Robot.supervisor`.
+  field  SFBool      synchronization TRUE         # Is `Robot.synchronization`.
+  field  SFBool      selfCollision   TRUE         # Is `Robot.selfCollision`.
+#  field  SFInt32     cameraWidth     640          # Is `Camera.width`.
+#  field  SFInt32     cameraHeight    480          # Is `Camera.height`.
+  field  MFString    jerseyTexture   "textures/GankenKun_red1.png"
+  field  SFColor     jerseyColor     1 0 0
+#  field  SFBool      enable_backlash FALSE         # Enables backlash on all joints.
+  field  SFInt32     channel         1             # communication channel
+]
+{
+  Robot {
+    translation IS translation
+    rotation IS rotation
+    children [
+      Transform {
+        translation 0.000 0.000 0.000
+        rotation 0.0 0.0 1 1.57
+        children [
+          Shape {
+            appearance DEF teamcolor PBRAppearance {
+              baseColor IS jerseyColor
+              transparency 0.000000
+              roughness 1.000000
+              metalness 0
+              emissiveColor 0.000000 0.000000 0.000000
+            }
+            geometry DEF body Box {
+              size 0.200 0.200 0.600
+            }
+          }
+        ]
+      }
+      Transform {
+        translation -0.005 0.000 0.000
+        rotation -0.577350 -0.577350 0.577350 2.094395
+        children [
+          Shape {
+            appearance PBRAppearance {
+              baseColorMap ImageTexture {
+                url IS jerseyTexture
+              }
+              metalness 0
+              roughness 1
+            }
+            geometry DEF body_number IndexedFaceSet {
+              coord Coordinate {
+                point [
+                  -0.03300 -0.08825 -0.03400, 0.03300 -0.08825 -0.03400, 0.03300 0.01500 -0.03400, -0.03300 0.01500 -0.03400, 0.03300 -0.07500 0.09700, -0.03300 -0.07500 0.09700, -0.03300 0.01500 0.09700, 0.03300 0.01500 0.09700,
+                ]
+              }
+              texCoord TextureCoordinate {
+                point [
+                  0.0 0.0, 1.0 0.0, 1.0 1.0, 0.0 1.0
+                ]
+              }
+              coordIndex [
+                3 2 1 0 -1 7 6 5 4 -1
+              ]
+              texCoordIndex [
+                0 1 2 3 -1 0 1 2 3 -1
+              ]
+              creaseAngle 1
+            }
+          }
+        ]
+      }
+    ]
+    name IS name 
+    boundingObject Transform {
+      translation 0.000 0.000 0.000
+      rotation 0.0 0.0 1 1.57
+      children [
+        Box {
+           size 0.200 0.200 0.600
+        }
+      ]
+    }
+    physics Physics {
+      density -1
+      mass 1.289633
+      centerOfMass [ 0.000 0.000000 0.300 ]
+      inertiaMatrix [
+        7.000000e-03 7.000000e-03 7.000000e-03
+        0.000000e+00 0.000000e+00 0.000000e+00
+      ]
+    }
+    controller IS controller
+    controllerArgs IS controllerArgs
+    customData IS customData
+    supervisor IS supervisor
+    synchronization IS synchronization
+    selfCollision IS selfCollision
+  }
+}
diff --git a/worlds/mat_rapid.wbt b/worlds/mat_rapid.wbt
new file mode 100644
index 0000000..26addc6
--- /dev/null
+++ b/worlds/mat_rapid.wbt
@@ -0,0 +1,68 @@
+#VRML_SIM R2021b utf8
+WorldInfo {
+  info [
+    "GANKENKUN robot."
+    "The GANKENKUN robot simulation model"
+  ]
+  title "GANKENKUN"
+  basicTimeStep 80
+  optimalThreadCount 8
+  physicsDisableTime 0.1
+  physicsDisableLinearThreshold 0.1
+  physicsDisableAngularThreshold 0.1
+  contactProperties [
+    ContactProperties {
+      material1 "grass"
+      coulombFriction [
+        0.5
+      ]
+      softCFM 0.03
+    }
+    ContactProperties {
+      material1 "grass"
+      material2 "robocup soccer ball"
+      coulombFriction [
+        0.5
+      ]
+      bounce 0.76
+      softCFM 0.05
+    }
+    ContactProperties {
+      material2 "robocup soccer ball"
+      bounce 0.76
+    }
+  ]
+}
+Viewpoint {
+  orientation 0.6719791180295076 -0.49317350564460494 -0.552470775935249 1.8510428047885563
+  position -2.5375047220358824 -0.9166711516828123 0.6010308394649483
+}
+TexturedBackground {
+  texture "stadium_dry"
+}
+TexturedBackgroundLight {
+  texture "stadium_dry"
+}
+
+RobocupSoccerField2 {
+  size "kid"
+}
+
+Robot {
+  supervisor TRUE
+  controller "<extern>"
+  children[
+    Emitter  { name "blue1_emitter"  channel 1 }
+    Receiver { name "blue1_receiver" channel 1 }
+    Emitter  { name "blue2_emitter"  channel 2 }
+    Receiver { name "blue2_receiver" channel 2 }
+    Emitter  { name "blue3_emitter"  channel 3 }
+    Receiver { name "blue3_receiver" channel 3 }
+    Emitter  { name "red1_emitter"   channel 4 }
+    Receiver { name "red1_receiver"  channel 4 }
+    Emitter  { name "red2_emitter"   channel 5 }
+    Receiver { name "red2_receiver"  channel 5 }
+    Emitter  { name "red3_emitter"   channel 6 }
+    Receiver { name "red3_receiver"  channel 6 }
+  ]
+}