import torch
import torch.nn as nn
from torch.autograd import Variable


class robustlog(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_keys=2,device='cpu'):
        super(robustlog, self).__init__()
        self.device = device
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size,
                            hidden_size,
                            num_layers,
                            batch_first=True,
                            bidirectional=True,
                            dropout=0.5)
        self.num_directions = 2
        self.fc1 = nn.Linear(hidden_size * self.num_directions, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 2)

        self.attention_size = self.hidden_size
        self.w_omega = Variable(
            torch.zeros(self.hidden_size * self.num_directions, self.attention_size))
        self.u_omega = Variable(torch.zeros(self.attention_size))
        self.sequence_length = 100

    def attention_net(self, lstm_output, device):
        output_reshape = torch.Tensor.reshape(lstm_output,
                                              [-1, self.hidden_size * self.num_directions])

        attn_tanh = torch.tanh(torch.mm(output_reshape, self.w_omega.to(device)))
        attn_hidden_layer = torch.mm(
            attn_tanh, torch.Tensor.reshape(self.u_omega.to(device), [-1, 1]))
        exps = torch.Tensor.reshape(torch.exp(attn_hidden_layer),
                                    [-1, self.sequence_length])
        alphas = exps / torch.Tensor.reshape(torch.sum(exps, 1), [-1, 1])
        alphas_reshape = torch.Tensor.reshape(alphas,
                                              [-1, self.sequence_length, 1])
        state = lstm_output
        attn_output = torch.sum(state * alphas_reshape, 1)
        return attn_output

    def forward(self, features):
        inp = features
        # inp = features[2]
        self.sequence_length = inp.shape[1]
        out, _ = self.lstm(inp)
        out = self.attention_net(out, self.device)
        out = self.fc1(out)
        out = self.fc2(out)
        return out


if __name__ == "__main__":
    device = torch.device('cuda:0')
    model = robustlog(300,10,2,device=device).to(device)
    inp = torch.ones((64,20,300)).to(device)
    output = model(inp)
    print(output.shape)