Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import torch
import torch.nn as nn
from torch.autograd import Variable
class robustlog(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_keys=2,device='cpu'):
super(robustlog, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size,
hidden_size,
num_layers,
batch_first=True,
bidirectional=True,
dropout=0.5)
self.num_directions = 2
self.fc1 = nn.Linear(hidden_size * self.num_directions, hidden_size)
self.fc2 = nn.Linear(hidden_size, 2)
self.attention_size = self.hidden_size
self.w_omega = Variable(
torch.zeros(self.hidden_size * self.num_directions, self.attention_size))
self.u_omega = Variable(torch.zeros(self.attention_size))
self.sequence_length = 100
def attention_net(self, lstm_output, device):
output_reshape = torch.Tensor.reshape(lstm_output,
[-1, self.hidden_size * self.num_directions])
attn_tanh = torch.tanh(torch.mm(output_reshape, self.w_omega.to(device)))
attn_hidden_layer = torch.mm(
attn_tanh, torch.Tensor.reshape(self.u_omega.to(device), [-1, 1]))
exps = torch.Tensor.reshape(torch.exp(attn_hidden_layer),
[-1, self.sequence_length])
alphas = exps / torch.Tensor.reshape(torch.sum(exps, 1), [-1, 1])
alphas_reshape = torch.Tensor.reshape(alphas,
[-1, self.sequence_length, 1])
state = lstm_output
attn_output = torch.sum(state * alphas_reshape, 1)
return attn_output
def forward(self, features):
inp = features
# inp = features[2]
self.sequence_length = inp.shape[1]
out, _ = self.lstm(inp)
out = self.attention_net(out, self.device)
out = self.fc1(out)
out = self.fc2(out)
return out
if __name__ == "__main__":
device = torch.device('cuda:0')
model = robustlog(300,10,2,device=device).to(device)
inp = torch.ones((64,20,300)).to(device)
output = model(inp)
print(output.shape)