如何在 BERT 之上添加一个 CNN 层?
数据挖掘
nlp
美国有线电视新闻网
火炬
迁移学习
伯特
2021-09-22 05:54:08
2个回答
您可以使用 HuggingFace 的 BertModel ( transformers ) 作为模型的基础层,就像在 Pytorch 中构建神经网络一样,您可以在它之上构建。HuggingFace 的其他 BertModel 也是以同样的方式构建的。作为参考,您可以在此处查看他们的 TokenClassification 代码。这在文档中没有提到太多,我认为应该这样做。
然而,对于 Tensorflow,您需要将 Bert 模型转换为 Keras 层。我没有真正经历过或尝试过,但我认为这篇博文很好地解释了它。
伯特:
import transformers
import torch
tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = transformers.BertModel.from_pretrained('bert-base-uncased')
max_seq = 100
def tokenize_text(df, max_seq):
return [
tokenizer.encode(text)[:max_seq] for text in df['text']
]
def pad_text(tokenized_text, max_seq):
return np.array([el + [0] * (max_seq - len(el)) for el in tokenized_text])
def tokenize_and_pad_text(df, max_seq):
tokenized_text = tokenize_text(df, max_seq)
padded_text = pad_text(tokenized_text, max_seq)
return torch.tensor(padded_text)
def targets_to_tensor(df):
return torch.tensor(df['label'].values, dtype=torch.float32)
train_indices = tokenize_and_pad_text(small_train, max_seq)
val_indices = tokenize_and_pad_text(small_valid, max_seq)
test_indices = tokenize_and_pad_text(small_test, max_seq)
with torch.no_grad():
x_train = bert_model(train_indices)[0]
x_val = bert_model(val_indices)[0]
x_test = bert_model(test_indices)[0]
y_train = targets_to_tensor(small_train)
y_val = targets_to_tensor(small_valid)
y_test = targets_to_tensor(small_test)
美国有线电视新闻网:
import time
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score
from torch.autograd import Variable
class KimCNN(nn.Module):
def __init__(self, embed_num, embed_dim, class_num, kernel_num, kernel_sizes, dropout, static):
super(KimCNN, self).__init__()
V = embed_num
D = embed_dim
C = class_num
Co = kernel_num
Ks = kernel_sizes
self.static = static
self.embed = nn.Embedding(V, D)
self.convs1 = nn.ModuleList([nn.Conv2d(1, Co, (K, D)) for K in Ks])
self.dropout = nn.Dropout(dropout)
self.fc1 = nn.Linear(len(Ks) * Co, C)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
if self.static:
x = Variable(x)
x = x.unsqueeze(1) # (N, Ci, W, D)
x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1] # [(N, Co, W), ...]*len(Ks)
x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x] # [(N, Co), ...]*len(Ks)
x = torch.cat(x, 1)
x = self.dropout(x) # (N, len(Ks)*Co)
logit = self.fc1(x) # (N, C)
output = self.sigmoid(logit)
return output
n_epochs = 50
batch_size = 10
lr = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.BCELoss()
def generate_batch_data(x, y, batch_size):
i, batch = 0, 0
for batch, i in enumerate(range(0, len(x) - batch_size, batch_size), 1):
x_batch = x[i : i + batch_size]
y_batch = y[i : i + batch_size]
yield x_batch, y_batch, batch
if i + batch_size < len(x):
yield x[i + batch_size :], y[i + batch_size :], batch + 1
if batch == 0:
yield x, y, 1
train_losses, val_losses = [], []
for epoch in range(n_epochs):
start_time = time.time()
train_loss = 0
model.train(True)
for x_batch, y_batch, batch in generate_batch_data(x_train, y_train, batch_size):
y_pred = model(x_batch)
y_batch = y_batch.unsqueeze(1)
optimizer.zero_grad()
loss = loss_fn(y_pred, y_batch)
loss.backward()
optimizer.step()
train_loss += loss.item()
train_loss /= batch
train_losses.append(train_loss)
elapsed = time.time() - start_time
model.eval() # disable dropout for deterministic output
with torch.no_grad(): # deactivate autograd engine to reduce memory usage and speed up computations
val_loss, batch = 0, 1
for x_batch, y_batch, batch in generate_batch_data(x_val, y_val, batch_size):
y_pred = model(x_batch)
y_batch = y_batch.unsqueeze(1)
loss = loss_fn(y_pred, y_batch)
val_loss += loss.item()
val_loss /= batch
val_losses.append(val_loss)
print(
"Epoch %d Train loss: %.2f. Validation loss: %.2f. Elapsed time: %.2fs."
% (epoch + 1, train_losses[-1], val_losses[-1], elapsed)
)
这篇博客很好地解释了如何在 Pytorch 中使用 CNN 和 BERT。
其它你可能感兴趣的问题