Source code for src.core.bert.model

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

from core.transformer.model import EncoderLayer, PositionalEncoding


[docs] class BERTPretrainModel(nn.Module): """ BERT Pretrain Model :param config_dict: Config Params Dictionary :type config_dict: dict """ def __init__(self, config_dict): super(BERTPretrainModel, self).__init__() embed_dim = config_dict["model"]["d_model"] num_vocab = config_dict["dataset"]["num_vocab"] num_layers = config_dict["model"]["num_layers"] dropout = config_dict["model"]["dropout"] self.embed_layer = nn.Embedding(num_vocab, embed_dim) self.dropout = nn.Dropout(dropout) self.positional_encoding = PositionalEncoding(config_dict) self.encoder_layers = [EncoderLayer(config_dict) for _ in range(num_layers)] self.classifier_layer = nn.LazyLinear(num_vocab) self.nsp_classifier_layer = nn.LazyLinear(1)
[docs] def forward(self, tokens): """ Forward propogation :param tokens: Input tokens :type tokens: torch.Tensor (num_samples, seq_len) :return: Predicted Tokens, NSP output :rtype: tuple (torch.Tensor [num_samples, seq_len, num_vocab], torch.Tensor [num_samples,]) """ tokens_embed = self.dropout(self.positional_encoding(self.embed_layer(tokens))) enc_output = tokens_embed for layer in self.encoder_layers: enc_output = layer(enc_output) output = self.classifier_layer(enc_output) # output = nn.Softmax(dim=-1)(output) nsp_output = nn.Sigmoid()(self.nsp_classifier_layer(output[:, 0, :])) return output, nsp_output
[docs] class BERTFinetuneModel(nn.Module): """ BERT Finetune Model :param config_dict: Config Params Dictionary :type config_dict: dict """ def __init__(self, config_dict): super(BERTFinetuneModel, self).__init__() embed_dim = config_dict["model"]["d_model"] num_vocab = config_dict["dataset"]["num_vocab"] num_layers = config_dict["model"]["num_layers"] dropout = config_dict["model"]["dropout"] d_model = config_dict["model"]["d_model"] self.seq_len = config_dict["dataset"]["seq_len"] self.embed_layer = nn.Embedding(num_vocab, embed_dim) self.dropout = nn.Dropout(dropout) self.positional_encoding = PositionalEncoding(config_dict) self.encoder_layers = [EncoderLayer(config_dict) for _ in range(num_layers)] self.start = nn.init.xavier_uniform_(torch.empty(d_model, 1)) self.start = Variable(self.start, requires_grad=True) self.end = nn.init.xavier_uniform_(torch.empty(d_model, 1)) self.end = Variable(self.end, requires_grad=True)
[docs] def forward(self, tokens): """ Forward propogation :param tokens: Input tokens :type tokens: torch.Tensor (num_samples, seq_len) :return: Encoded Inputs, Start and End ids probs :rtype: tuple (torch.Tensor [num_samples, seq_len, d_model], torch.Tensor [num_samples,], torch.Tensor [num_samples,]) """ tokens_embed = self.dropout(self.positional_encoding(self.embed_layer(tokens))) enc_output = tokens_embed for layer in self.encoder_layers: enc_output = layer(enc_output) cxt_enc_output = enc_output[:, self.seq_len // 2 :, :] start_muls = torch.matmul(cxt_enc_output, self.start).squeeze() start_probs = nn.LogSoftmax(dim=1)(start_muls) end_muls = torch.matmul(cxt_enc_output, self.end).squeeze() end_probs = nn.LogSoftmax(dim=1)(end_muls) return enc_output, start_probs, end_probs