JasonGross
diff --git a/‎gbmi/exp_indhead/finetune_ind.py
Lines changed: 80 additions & 6 deletions b/‎gbmi/exp_indhead/finetune_ind.py
Lines changed: 80 additions & 6 deletions
diff --git a/‎gbmi/exp_indhead/finetuned_model.pth
120 KB b/‎gbmi/exp_indhead/finetuned_model.pth
120 KB
diff --git a/‎gbmi/exp_indhead/induction_head_results.py
Lines changed: 32 additions & 16 deletions b/‎gbmi/exp_indhead/induction_head_results.py
Lines changed: 32 additions & 16 deletions
diff --git a/‎gbmi/exp_indhead/noise_bound.py
Lines changed: 1 addition & 1 deletion b/‎gbmi/exp_indhead/noise_bound.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎gbmi/exp_indhead/term.pt
20.3 KB b/‎gbmi/exp_indhead/term.pt
20.3 KB
@@ -206,6 +206,8 @@ def diff_3(a, i_1, i_2, j, dic, matrices, attn_1):
             c = torch.max(c, term_3[i_2, dic[i_2], i, dic[i]].max())
         c = torch.max(c, term_3[i_2, dic[i_2], j, dic[j]].max())
         t_3 += (1 - attn_1[dic[j], j - 1].min()) * c
+        print(a, i_1, i_2, j)
+        print(c)
 
         # print(t_3)
     if j == 1:
@@ -256,6 +258,8 @@ def diff_2_4(a, i_1, i_2, j, dic, matrices, attn_1):
                 c = torch.max(c, term_4[k, dic[k], i][..., dic[i]].max())
             c = torch.max(c, term_4[k, dic[k], j][..., dic[j]].max())
             d = d + (1 - attn_1[dic[j], j - 1].min()) * c
+            if k == 0:
+                print(c)
 
         if j == 0:
 
@@ -353,6 +357,8 @@ def diff_2_3_4(a, i_1, i_2, j, dic, matrices, attn_1):
                 + term_3[i_2, dic[i_2], j, dic[j]].max(),
             )
             d = d + (1 - attn_1[dic[j], j - 1].min()) * c
+            if k == 0:
+                print(c)
 
         if j == 0:
 
@@ -445,13 +451,16 @@ def diff_2_3_4(a, i_1, i_2, j, dic, matrices, attn_1):
 
 
 def least_attention(a, i_1, i_2, j, dic, matrices, attn_1):
-    e = diff_2_4(a, i_1, i_2, j, dic, matrices, attn_1)
 
-    return (
-        diff_1(a, i_1, i_2, j, dic, matrices)
-        + e
-        + diff_3(a, i_1, i_2, j, dic, matrices, attn_1)
-    )
+    g = diff_3(a, i_1, i_2, j, dic, matrices, attn_1)
+    f = diff_2_4(a, i_1, i_2, j, dic, matrices, attn_1)
+    e = diff_2_3_4(a, i_1, i_2, j, dic, matrices, attn_1)
+
+    # print(a, i_1, i_2, j)
+    # print(e)
+    # print(f+g)
+    # print(e-f-g)
+    return diff_1(a, i_1, i_2, j, dic, matrices) + f + g
 
 
 def second_layer_attention(matrices, attn_1):
@@ -1027,4 +1036,69 @@ def good_loss_bound(model):
 # Show the plot
 plt.show()
 
+# %%
+import torch as t
+
+
+def sample(a, b, i, d_voc):
+    # i goes from 1 to n_ctx-3
+    # randomly fill with tokens which are not equal to a
+    seq = t.randint(low=0, high=d_voc - 1, size=(i + 3,))
+    seq = seq + (seq >= a).int()
+
+    # fill last position with a
+    seq[-1] = a
+
+    # pick position of first a
+    m = t.randint(low=0, high=i, size=(1,)).item()
+
+    # fill position m with b
+    seq[m + 1] = a
+    seq[m + 2] = b
+    return seq
+
+
+def sample_acc_and_loss(model, batch_size=15000):
+    d_vocab = model.W_E.shape[0]
+    n_ctx = model.W_pos.shape[0]
+
+    acc = 0
+    loss = 0
+
+    loss_CE = t.nn.CrossEntropyLoss()
+
+    # Compute probability of each sequence length
+    sample_seq_length = t.arange(1, n_ctx - 3)
+    prob_sample_seq_len = t.tensor([i * (d_vocab - 1) ** i for i in sample_seq_length])
+    prob_sample_seq_len = prob_sample_seq_len / prob_sample_seq_len.sum()
+
+    # sample the sequence length
+    sampled = sample_seq_length[
+        torch.multinomial(prob_sample_seq_len, num_samples=batch_size, replacement=True)
+    ]
+
+    # sample a
+    sample_a = t.randint(0, d_vocab, (batch_size,))
+
+    with t.no_grad():
+        for i in range(batch_size):
+            # sample a
+            a = sample_a[i].item()
+
+            # sample b unequal to a
+            b = t.randint(0, d_vocab - 1, (1,)).item()
+            b = b + (b >= a)
+            length = sampled[i]
+
+            # sample sequence
+            seq = sample(a, b, length, d_vocab)
+
+            # measure accuracy and loss
+            logit = model(seq).squeeze()[-1]
+            acc += logit.argmax() == b
+            loss += loss_CE(logit.unsqueeze(0), t.tensor([b]))
+
+    return acc / batch_size, loss / batch_size
+
+
 # %%
@@ -791,6 +791,7 @@ def sample_acc_and_loss(model, batch_size=15000):
 
 W_U = ein.array(lambda i, j: i == j, sizes=[d_model, d_voc]).float().to(device)
 
+raw_terms = [W_U, W_K_1, W_K_0, W_Q_0, W_Q_1, W_V_0, W_V_1, W_E, W_O_0, W_O_1, W_pos]
 
 index_0_d = (
     ein.array(
@@ -1013,22 +1014,24 @@ def plot_diag(data, i):
 
 
 # %%
-def put_in_model(model):
-    model.W_U.data = W_U
-    model.blocks[0].attn.W_K.data[0] = W_K_0
-    model.blocks[1].attn.W_K.data[0] = W_K_1
-    model.blocks[0].attn.W_Q.data[0] = W_Q_0
-    model.blocks[1].attn.W_Q.data[0] = W_Q_1
-    model.blocks[0].attn.W_V.data[0] = W_V_0
-    model.blocks[1].attn.W_V.data[0] = W_V_1
-
-    model.W_E.data = W_E
-    model.blocks[0].attn.W_O.data[0] = W_O_0
-    model.blocks[1].attn.W_O.data[0] = W_O_1
-    model.W_pos.data = W_pos
-
-
-put_in_model(model_2)
+
+
+def put_in_model(model, raw):
+    model.W_U.data = raw[0]
+    model.blocks[0].attn.W_K.data[0] = raw[2]
+    model.blocks[1].attn.W_K.data[0] = raw[1]
+    model.blocks[0].attn.W_Q.data[0] = raw[3]
+    model.blocks[1].attn.W_Q.data[0] = raw[4]
+    model.blocks[0].attn.W_V.data[0] = raw[5]
+    model.blocks[1].attn.W_V.data[0] = raw[6]
+
+    model.W_E.data = raw[7]
+    model.blocks[0].attn.W_O.data[0] = raw[8]
+    model.blocks[1].attn.W_O.data[0] = raw[9]
+    model.W_pos.data = raw[10]
+
+
+put_in_model(model_2, raw_terms)
 correct_terms = terms(model_2)
 correct_terms = tuple(term.clone().detach() for term in correct_terms)
 
@@ -1123,4 +1126,17 @@ def get_graphs(fun, model):
     return term_dic
 
 
+# %%
+def noise(M, v):
+    return M + torch.randn_like(M) * v
+
+
+def add_noise(model, v):
+    new_raw_terms = []
+    for i in range(len(raw_terms)):
+        new_raw_terms.append(noise(raw_terms[i].detach().clone(), v))
+        new_raw_terms[i].requires_grad = True
+    put_in_model(model, new_raw_terms)
+
+
 # %%
@@ -58,7 +58,7 @@ def armin(
 
 # %%
 def noise(M, v):
-    return M + (torch.rand_like(M) - 0.5) * 2 * v
+    return M + torch.randn_like(M) * v
 
 
 W_E = ein.array(lambda i, j: i == j, sizes=[d_voc, d_model]).float().to(device)