Hi All, I am using model = XLNetForSequenceClassification.from_pretrained(âxlnet-base-casedâ) for text classfication , on what dataset this XLNetForSequenceClassification pretrained
Thanks in advance
Hi All, I am using model = XLNetForSequenceClassification.from_pretrained(âxlnet-base-casedâ) for text classfication , on what dataset this XLNetForSequenceClassification pretrained
Thanks in advance
HI @sru
model = XLNetForSequenceClassification.from_pretrained(âxlnet-base-casedâ)
this line instantiates xlnet for classification task by loading the pre-trained xlnet-base-cased
language model and adds a classification head (linear layer) on top of it.
Note that, itâs not trained for classification, the classification head is randomly initialised. You should fine-tune it on your dataset for classification.
Thanks for your response.
which classification head (linear layer) is randomly initialized.
we can use this model for multi-classification right
Thanks in advance.
which loss function is using in this model
Yes, you can just set the num_labels
argument to the number of classes that you have.
model = AutoModelForSequenceClassification.from_pretrained("xlnet-base-cased",
num_labels=3)
If you pass the labels during forward, they will be used to calculate the loss with CrossEntropyLoss if more than one label, otherwise MSELoss will be used for a regression problem.
Thanks for your response.
so for multiclassfication we need use AutoModelForSequenceClassification not xlnetForSequenceClassification
is my understanding correct
There is no different. âAutoModel(ForXXX)â automatically selects the right model depending on the model that you request. Because you ask for xlnet-base-cased
it will know to load XLNetForSequenceClassification
. You can use either.
@BramVanroy thanks
i am using below model
xlnet_model = XLNetForSequenceClassification.from_pretrained(âxlnet-base-casedâ, num_labels = 3)
its is not at classfiying correctly
precision recall f1-score support
0 0.88 1.00 0.93 1746
1 0.00 0.00 0.00 181
2 0.00 0.00 0.00 68
Well, did you fine-tune the modelâŚ? It isnât going to miraculously work.
@BramVanroy
thanks a lot for your response
I did fine-tune the model, but itâs not working unable to identify label 2 at all
Are those validation or test scores? What does your training loop look like?
test score
training loop:
def train_epoch(xlnet_model, data_loader, optimizer, device, scheduler, n_examples):
xlnet_model = xlnet_model.train()
losses = []
acc = 0
counter = 0
for d in data_loader:
input_ids = d["input_ids"].reshape(-1,512).to(device)
attention_mask = d["attention_mask"].to(device)
labels = d["labels"].to(device)
outputs = xlnet_model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
loss = outputs[0]
logits = outputs[1]
_, prediction = torch.max(outputs[1], dim=1)
labels = labels.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(labels, prediction)
acc += accuracy
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(xlnet_model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
counter = counter + 1
return acc / counter, np.mean(losses)
def process():
# load data
data = pd.read_csv(â/corpus_p.csvâ)
#data = shuffle(data)
#spilt dataset train and test, valid
train_data , test_data = train_test_split(data, test_size=0.5, random_state=101)
valid_data, test_data = train_test_split(test_data, test_size=0.5, random_state=101)
#tokenizer
PRE_TRAINED_MODEL_NAME = 'xlnet-base-cased'
tokenizer = XLNetTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Loader
BATCH_SIZE = 4
train_data_loader = dataloader(train_data, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = dataloader(valid_data, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader =dataloader(test_data, tokenizer, MAX_LEN, BATCH_SIZE)
#create the model (using pertained XLNET model for text classification)
xlnet_model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels = 2)
xlnet_model = xlnet_model.to(device)
print("xlnet")
#optimizer
optimizer = AdamW(xlnet_model.parameters(), lr=2e-5, weight_decay=0.01, correct_bias=False)
EPOCHS = 9
best_accuracy = 0
param_optimizer = list(xlnet_model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0}]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps)
print("end")
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(xlnet_model,train_data_loader,optimizer, device, scheduler, len(train_data))
print(f'Train loss {train_loss} Train accuracy {train_acc}')
val_acc, val_loss = eval_model(xlnet_model,val_data_loader, device, len(valid_data))
print(f'Val loss {val_loss} Val accuracy {val_acc}')
if val_acc > best_accuracy:
torch.save(xlnet_model.state_dict(), './xlnet_model.bin')
best_accuracy = val_acc
xlnet_model.load_state_dict(torch.load('./xlnet_model.bin'))
test_acc, test_loss = eval_model(xlnet_model,test_data_loader,device,len(test_data))
print('Test Accuracy :', test_acc)
print('Test Loss :', test_loss)
y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(xlnet_model,test_data_loader)
#print(metrics.confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
if name==âmainâ:
process()
Please put in an effort and format your code correctly. This is not very readable.
I donât immediately see what is wrong here. As a rule of thumb, try overfitting one sample (just train for a couple of epochs on just one sample). You should get close to 100% accuracy as the model fully overfits. If that does not happen, then something is wrong in your loop/model set-up.
training loop
def train_epoch(xlnet_model, data_loader, optimizer, device, scheduler, n_examples):
xlnet_model = xlnet_model.train()
losses = []
acc = 0
counter = 0
for d in data_loader:
input_ids = d["input_ids"].reshape(-1,128).to(device)
attention_mask = d["attention_mask"].to(device)
labels = d["labels"].to(device)
outputs = xlnet_model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
loss = outputs[0]
logits = outputs[1]
_, prediction = torch.max(outputs[1], dim=1)
labels = labels.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(labels, prediction)
acc += accuracy
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(xlnet_model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
counter = counter + 1
return acc / counter, np.mean(losses)
@BramVanroy training loop
Thatâs not better. To format your code correctly, use the âpreformatted textâ button in the editor or better yet, wrap the code in triple backticks ``` before and after the code.
But even so, as I said I canât do anything with this code.
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
#from pytorch_transformers import XLNetForSequenceClassification, XLNetTokenizer, AdamW, WarmupLinearSchedule
from transformers import XLNetModel, XLNetTokenizer,get_linear_schedule_with_warmup
from transformers import AdamW
from tqdm import tqdm, trange
from sklearn import metrics
import pandas as pd
import io
from sklearn.utils import shuffle
from transformers import XLNetForSequenceClassification
import numpy as np
import matplotlib.pyplot as plt
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report,confusion_matrix
import time
import torch.nn.functional as F
MAX_LEN =128
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Classification(Dataset):
def __init__(self, texts, labels, tokenizer, max_len):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, item):
text = str(self.texts[item])
label = self.labels[item]
encoding = self.tokenizer.encode_plus(
text,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=False,
return_attention_mask=True,
return_tensors='pt',
)
input_ids = pad_sequences(encoding['input_ids'], maxlen=MAX_LEN, dtype=torch.Tensor ,truncating="post",padding="post")
input_ids = input_ids.astype(dtype = 'int64')
input_ids = torch.tensor(input_ids)
attention_mask = pad_sequences(encoding['attention_mask'], maxlen=MAX_LEN, dtype=torch.Tensor ,truncating="post",padding="post")
attention_mask = attention_mask.astype(dtype = 'int64')
attention_mask = torch.tensor(attention_mask)
return {
'review_text': text,
'input_ids': input_ids,
'attention_mask': attention_mask.flatten(),
'labels': torch.tensor(label, dtype=torch.long)
}
#Creation dataloader
def dataloader(df, tokenizer, max_len, batch_size):
loader = Classification(
texts=df.text.to_numpy(),
labels=df.label.to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
loader,
batch_size=batch_size,
)
#training
def train_epoch(xlnet_model, data_loader, optimizer, device, scheduler, n_examples):
xlnet_model = xlnet_model.train()
losses = []
acc = 0
counter = 0
for d in data_loader:
input_ids = d["input_ids"].reshape(-1,128).to(device)
attention_mask = d["attention_mask"].to(device)
labels = d["labels"].to(device)
outputs = xlnet_model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
loss = outputs[0]
logits = outputs[1]
_, prediction = torch.max(outputs[1], dim=1)
labels = labels.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(labels, prediction)
acc += accuracy
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(xlnet_model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
counter = counter + 1
return acc / counter, np.mean(losses)
#validation
def eval_model(xlnet_model, data_loader,device, n_examples):
xlnet_model = xlnet_model.eval()
losses = []
acc = 0
counter = 0
with torch.no_grad():
for d in data_loader:
input_ids = d["input_ids"].reshape(-1,128).to(device)
attention_mask = d["attention_mask"].to(device)
labels = d["labels"].to(device)
outputs = xlnet_model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
loss = outputs[0]
logits = outputs[1]
_, prediction = torch.max(outputs[1], dim=1)
labels = labels.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(labels, prediction)
acc += accuracy
losses.append(loss.item())
counter = counter + 1
return acc / counter, np.mean(losses)
#prediction
def get_predictions(xlnet_model, data_loader):
xlnet_model = xlnet_model.eval()
review_texts = []
predictions = []
prediction_probs = []
real_values = []
with torch.no_grad():
for d in data_loader:
texts = d["review_text"]
input_ids = d["input_ids"].reshape(-1,128).to(device)
attention_mask = d["attention_mask"].to(device)
labels = d["labels"].to(device)
outputs = xlnet_model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = labels)
loss = outputs[0]
logits = outputs[1]
_, preds = torch.max(outputs[1], dim=1)
probs = F.softmax(outputs[1], dim=1)
review_texts.extend(texts)
predictions.extend(preds)
prediction_probs.extend(probs)
real_values.extend(labels)
predictions = torch.stack(predictions).cpu()
prediction_probs = torch.stack(prediction_probs).cpu()
real_values = torch.stack(real_values).cpu()
return review_texts, predictions, prediction_probs, real_values
#main methods
def process():
# load data
data = pd.read_csv('/corpus_p.csv')
data = shuffle(data)
#spilt dataset train and test, valid
train_data , test_data = train_test_split(data, test_size=0.5, random_state=101)
valid_data, test_data = train_test_split(test_data, test_size=0.5, random_state=101)
#tokenizer
PRE_TRAINED_MODEL_NAME = 'xlnet-base-cased'
tokenizer = XLNetTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)
#Loader
BATCH_SIZE = 8
train_data_loader = dataloader(train_data, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = dataloader(valid_data, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader =dataloader(test_data, tokenizer, MAX_LEN, BATCH_SIZE)
#create the model (using pertrained XLNET model for text classification)
xlnet_model = XLNetForSequenceClassification.from_pretrained('xlnet-base-cased', num_labels = 3)
xlnet_model = xlnet_model.to(device)
print("xlnet")
#optimizer
optimizer = AdamW(xlnet_model.parameters(), lr=2e-5, weight_decay=0.01, correct_bias=False)
EPOCHS = 9
best_accuracy = 0
param_optimizer = list(xlnet_model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay':0.0}]
optimizer = AdamW(optimizer_grouped_parameters, lr=3e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=0,
num_training_steps=total_steps)
print("end")
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(xlnet_model,train_data_loader,optimizer, device, scheduler, len(train_data))
print(f'Train loss {train_loss} Train accuracy {train_acc}')
val_acc, val_loss = eval_model(xlnet_model,val_data_loader, device, len(valid_data))
print(f'Val loss {val_loss} Val accuracy {val_acc}')
if val_acc > best_accuracy:
torch.save(xlnet_model.state_dict(), './xlnet_model.bin')
best_accuracy = val_acc
xlnet_model.load_state_dict(torch.load('./xlnet_model.bin'))
test_acc, test_loss = eval_model(xlnet_model,test_data_loader,device,len(test_data))
print('Test Accuracy :', test_acc)
print('Test Loss :', test_loss)
y_review_texts, y_pred, y_pred_probs, y_test = get_predictions(xlnet_model,test_data_loader)
#print(metrics.confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
if __name__=="__main__":
process() ```
@BramVanroy updated the code and thanks a lot for ur response.
can someone guide me, in which part i am doing wrong
Did you try following this tutorial that uses XLNetForSequenceClassification
?