-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
104 lines (86 loc) · 3.57 KB
/
model.py
File metadata and controls
104 lines (86 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
import math
class BaseModel(nn.Module):
def __init__(self):
super().__init__()
def get_num_params(self):
return sum(p.numel() for p in self.parameters())
def _init_weights(self, module):
if isinstance(module, nn.Linear):
nn.init.normal_(module.weight, mean=0.0, std=0.02)
if module.bias is not None:
nn.init.zeros_(module.bias)
def forward(self, x):
raise NotImplementedError
class Linear_Reg_Plane(BaseModel):
"""Linear regression model with one hidden layer"""
def __init__(self, input_dim, n_hid, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, n_hid)
self.fc2 = nn.Linear(n_hid, output_dim)
self.apply(self._init_weights)
print(f"number of parameters: {self.get_num_params()/1e6:.6f} M ")
def forward(self, x):
x = F.relu(self.fc1(x))
return self.fc2(x)
class Heads_Reg(BaseModel):
"""Regression model with multi-head between-heads attention"""
def __init__(self, input_dim, n_embd, n_head, output_dim):
super().__init__()
self.n_embd = n_embd
self.n_head = n_head
self.embed = nn.Linear(input_dim, n_embd)
self.pred = nn.Linear(n_embd, output_dim)
self.apply(self._init_weights)
print(f"number of parameters: {self.get_num_params()/1e6:.6f} M ")
def get_wei(self, x):
x = self.embed(x)
x = rearrange(x, 'B (nh hs) -> B nh hs', nh=self.n_head)
wei = x @ x.transpose(-2, -1) / math.sqrt(self.n_embd)
wei = F.softmax(wei, dim=-1)
return wei
def forward(self, x):
x = self.embed(x)
x = rearrange(x, 'B (nh hs) -> B nh hs', nh=self.n_head)
attention = F.scaled_dot_product_attention(x, x, x)
attention = rearrange(attention, 'B nh hs -> B (nh hs)')
return self.pred(attention)
class Batchs_Reg_Plane(BaseModel):
"""Regression model with between-batchs attention"""
def __init__(self, input_dim, n_embd, n_batchs, output_dim):
super().__init__()
self.n_embd = n_embd
self.n_batchs = n_batchs
self.embed = nn.Linear(input_dim, n_embd)
self.pred = nn.Linear(n_embd, output_dim)
self.apply(self._init_weights)
print(f"number of parameters: {self.get_num_params()/1e6:.6f} M ")
def get_wei(self, x):
assert x.shape[0] % self.n_batchs == 0, "batch size must be divisible by n_batchs"
x = self.embed(x)
x = rearrange(x, '(nB Bs) d -> Bs nB d', nB=self.n_batchs)
wei = x @ x.transpose(-2, -1) / math.sqrt(self.n_embd)
wei = F.softmax(wei, dim=-1)
return wei
def forward(self, x):
x = self.embed(x)
x = rearrange(x, '(nB Bs) d -> Bs nB d', nB=self.n_batchs)
attention = F.scaled_dot_product_attention(x, x, x)
attention = rearrange(attention, 'Bs nB d -> (nB Bs) d')
return self.pred(attention)
class Linear_Reg_Gaussian(BaseModel):
"""Linear regression model with two hidden layer"""
def __init__(self, input_dim, n_embd, output_dim):
super().__init__()
self.fc1 = nn.Linear(input_dim, n_embd)
self.fc2 = nn.Linear(n_embd, n_embd)
self.fc3 = nn.Linear(n_embd, output_dim)
self.apply(self._init_weights)
print(f"number of parameters: {self.get_num_params()/1e6:.6f} M ")
def forward(self, x):
x = F.tanh(self.fc1(x))
x = F.tanh(self.fc2(x))
return self.fc3(x)