nlp - 计算梯度PyTorch 中嵌入向量的值

标签 nlp lstm chatbot pytorch word-embedding

我正在尝试使用 PyTorch 为聊天机器人训练双编码器 LSTM 模型。

我定义了两个类:Encoder 类定义 LSTM 本身,而 Dual_Encoder 类将 Encoder 应用于我尝试训练的上下文和响应话语:

class Encoder(nn.Module):

    def __init__(self, 
                 input_size, 
                 hidden_size, 
                 vocab_size, 
                 num_layers = 1, 
                 num_directions = 1, 
                 dropout = 0, 
                 bidirectional = False,
                 rnn_type = 'lstm'): 

                 super(Encoder, self).__init__()

                 self.input_size = input_size
                 self.hidden_size = hidden_size
                 self.vocab_size = vocab_size
                 self.num_layers = 1
                 self.num_directions = 1
                 self.dropout = 0,
                 self.bidirectional = False

                 self.embedding = nn.Embedding(vocab_size, input_size, sparse = False, padding_idx = 0)
                 self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=False, dropout = dropout, bidirectional=False).cuda()

                 self.init_weights()

    def init_weights(self):
        init.orthogonal(self.lstm.weight_ih_l0)

        init.uniform(self.lstm.weight_hh_l0, a=-0.01, b=0.01)

        embedding_weights = torch.FloatTensor(self.vocab_size, self.input_size).cuda()
        init.uniform(embedding_weights, a = -0.25, b= 0.25)

        id_to_vec, emb_dim = create_id_to_vec('/data/train_shuffled_onethousand.csv','/data/glove.6B.100d.txt')

        for id, vec in id_to_vec.items():
            embedding_weights[id] = vec

        del self.embedding.weight
        self.embedding.weight = nn.Parameter(embedding_weights)
        self.embedding.weight.requires_grad = True

        #self.embedding.weight.data.copy_(torch.from_numpy(self.embedding_weights))

    def forward(self, inputs):
        embeddings = self.embedding(inputs)
        outputs, hiddens = self.lstm(embeddings)
        return outputs, hiddens

#%%

class DualEncoder(nn.Module):

    def __init__(self, encoder):
         super(DualEncoder, self).__init__()
         self.encoder = encoder
         self.number_of_layers = 1
         #h_0 (num_layers * num_directions, batch, hidden_size): 
         #tensor containing the initial hidden state for each element in the batch.
         #dual_hidden_size = self.encoder.hidden_size * self.encoder.num_directions

         M = torch.FloatTensor(self.encoder.hidden_size, self.encoder.hidden_size).cuda()

         init.normal(M)

         self.M = nn.Parameter(M, requires_grad = True)

    def forward(self, contexts, responses):
        #output (seq_len, batch, hidden_size * num_directions): 
        #tensor containing the output features (h_t) from the last layer 
        #of the RNN, for each t. 

        #h_n (num_layers * num_directions, batch, hidden_size): 
        #tensor containing the hidden state for t=seq_len
        context_out, context_hn = self.encoder(contexts)

        response_out, response_hn = self.encoder(responses)

        scores_list = []

        y_preds = None

        for e in range(999): 
            context_h = context_out[e][-1].view(1, self.encoder.hidden_size)
            response_h = response_out[e][-1].view(self.encoder.hidden_size,1)


            dot_var = torch.mm(torch.mm(context_h, self.M), response_h)[0][0]

            dot_tensor = dot_var.data
            dot_tensor.cuda()

            score = torch.sigmoid(dot_tensor)
            scores_list.append(score)

        y_preds_tensor = torch.stack(scores_list).cuda()  
        y_preds = autograd.Variable(y_preds_tensor).cuda()

        return y_preds 

#%% TRAINING

torch.backends.cudnn.enabled = False
#%%
vocab = create_vocab('/data/train_shuffled_onethousand.csv')
vocab_len = len(vocab)
emb_dim = get_emb_dim('/data/glove.6B.100d.txt')
#%%

encoder_model = Encoder(
        input_size = emb_dim,
        hidden_size = 300,
        vocab_size = vocab_len)

encoder_model.cuda()
#%%
dual_encoder = DualEncoder(encoder_model)

dual_encoder.cuda()
#%%
loss_func = torch.nn.BCELoss()

loss_func.cuda()

learning_rate = 0.001
epochs = 100
#batch_size = 50

optimizer = optim.Adam(dual_encoder.parameters(),
                       lr = learning_rate)
#%%
for i in range(epochs):

    context_matrix, response_matrix, y = make_matrices('/data/train_shuffled_onethousand.csv')

    context_matrix = autograd.Variable(context_matrix, requires_grad=True).cuda()

    response_matrix = autograd.Variable(response_matrix, requires_grad=True).cuda()

    y_label = y.cuda()

    y_preds = dual_encoder(context_matrix, response_matrix)

    loss = loss_func(y_preds, y_label)

    if i % 10 == 0:
        print("Epoch: ", i, ", Loss: ", loss.data[0])

    #evaluation metrics...

    dual_encoder.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm(dual_encoder.parameters(), 10)

    optimizer.step()

出现以下错误:

2018-01-06 06:07:02,148 INFO - result = self.forward(*input, **kwargs)
2018-01-06 06:07:02,148 INFO - File "all_scripts.py", line 258, in forward
2018-01-06 06:07:02,148 INFO - context_out, context_hn = self.encoder(contexts)
2018-01-06 06:07:02,149 INFO - File "/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 325, in __call__
2018-01-06 06:07:02,149 INFO - result = self.forward(*input, **kwargs)
2018-01-06 06:07:02,149 INFO - File "all_scripts.py", line 229, in forward
2018-01-06 06:07:02,149 INFO - embeddings = self.embedding(inputs)
2018-01-06 06:07:02,150 INFO - File "/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 325, in __call__
2018-01-06 06:07:02,150 INFO - result = self.forward(*input, **kwargs)
2018-01-06 06:07:02,150 INFO - File "/usr/local/lib/python3.6/site-packages/torch/nn/modules/sparse.py", line 103, in forward
2018-01-06 06:07:02,150 INFO - self.scale_grad_by_freq, self.sparse
2018-01-06 06:07:02,150 INFO - File "/usr/local/lib/python3.6/site-packages/torch/nn/_functions/thnn/sparse.py", line 40, in forward
2018-01-06 06:07:02,151 INFO - assert not ctx.needs_input_grad[0], "Embedding doesn't " \
2018-01-06 06:07:02,151 INFO - AssertionError: Embedding doesn't compute the gradient w.r.t. the indices

我确实理解为什么会出现问题(当然计算索引的梯度是没有意义的)。 但我不明白如何调整代码以便它计算梯度。嵌入向量的内容值。

非常感谢所有帮助!

(另请参阅 thread in the PyTorch 论坛)

最佳答案

经过一些广泛的调整,代码现在可以运行了。问题不仅仅在于嵌入初始化。请参阅my github repo改进的代码。

关于nlp - 计算梯度PyTorch 中嵌入向量的值,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48128934/

相关文章:

tensorflow - 使用 Vanilla Numpy 进行 LSTM 计算

Python - Tensorflow - LSTM- ValueError : Error when checking model target: expected dense_16 to have shape (None, 100) 但得到了形状为 (16, 2) 的数组

angular - 如何在对话框流参数中添加日期选择器

facebook - 尝试设置 Facebook Messenger 机器人时出现 "Webhooks failing"和 "webhooks disabled"错误

machine-learning - 有没有术语识别的算法或解决方案?

java : Intelligent text splitting

neural-network - 为什么缩放数据在神经网络(LSTM)中非常重要

node.js - 有什么方法可以自定义 LUIS 中任何话语的分数吗?

java - 葡萄牙语 WordNet

python - 使用 Spacy 处理语法错误