below is the training script that i have been using to train the model
ibm-research/MoLFormer-XL-both-10pct
on my custom dataset of 6 rows of training and 6 rows of validation
now the error that is arising is due to saving of the tensors of the model after the training that i think of
error
You are trying to save a non contiguous tensor: `base_model.encoder.layer.0.attention.self.feature_map.weight` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.
to takle this i added the for loop of contiguous tensors but nothing to avail and my guess is that the argument of save strategy in training arguments is the culprit
i did changed it to no but then nothing get stored in the results as it should be.
please any help would be helpful
import torch
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from datasets import Dataset
import torch.nn as nn
# Load model and tokenizer
model_name = "ibm-research/MoLFormer-XL-both-10pct"
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
base_model = AutoModel.from_pretrained(model_name,trust_remote_code=True)
for param in base_model.parameters():
if not param.is_contiguous():
param.data = param.data.contiguous()
descriptor_list = [
"alcoholic", "aldehydic", "alliaceous", "almond", "amber", "animal", "anisic", "apple", "apricot", "aromatic", "balsamic", "banana", "beefy", "bergamot", "berry", "bitter", "black currant", "brandy", "burnt", "buttery", "cabbage", "camphoreous", "caramellic", "cedar", "celery", "chamomile", "cheesy", "cherry", "chocolate", "cinnamon", "citrus", "clean", "clove", "cocoa", "coconut", "coffee", "cognac", "cooked", "cooling", "cortex", "coumarinic", "creamy", "cucumber", "dairy", "dry", "earthy", "ethereal", "fatty", "fermented", "fishy", "floral", "fresh", "fruit skin", "fruity", "garlic", "gassy", "geranium", "grape", "grapefruit", "grassy", "green", "hawthorn", "hay", "hazelnut", "herbal", "honey", "hyacinth", "jasmin", "juicy", "ketonic", "lactonic", "lavender", "leafy", "leathery", "lemon", "lily", "malty", "meaty", "medicinal", "melon", "metallic", "milky", "mint", "muguet", "mushroom", "musk", "musty", "natural", "nutty", "odorless", "oily", "onion", "orange", "orangeflower", "orris", "ozone", "peach", "pear", "phenolic", "pine", "pineapple", "plum", "popcorn", "potato", "powdery", "pungent", "radish", "raspberry", "ripe", "roasted", "rose", "rummy", "sandalwood", "savory", "sharp", "smoky", "soapy", "solvent", "sour", "spicy", "strawberry", "sulfurous", "sweaty", "sweet", "tea", "terpenic", "tobacco", "tomato", "tropical", "vanilla", "vegetable", "vetiver", "violet", "warm", "waxy", "weedy", "winey", "woody"
]
label_map = {desc: i for i, desc in enumerate(descriptor_list)}
# Training dataset
train_data = [
("CC(O)CN", "fishy"),
("CCC(=O)C(=O)O", "fatty;lactonic;sweet;caramellic;creamy"),
("O=C(O)CCc1ccccc1", "rose;floral;fatty;sweet;musk;cinnamon;balsamic"),
("OCc1ccc(O)cc1", "medicinal;phenolic;fruity;nutty;bitter;sweet;almond;coconut"),
("O=Cc1ccc(O)cc1", "phenolic;woody;nutty;vanilla;hay;metallic;sweet;almond;honey;balsamic")
]
# Validation dataset
val_data = [
("O=C(O)c1ccc(O)cc1", "nutty;phenolic"),
("CC(=O)O", "pungent;sharp;sour"),
("CC=O", "fresh;fruity;nutty;alcoholic;pungent;aldehydic;musty;ethereal"),
("CC(=O)C(C)O", "fatty;sweet;milky;dairy;buttery;creamy"),
("CC(C)=O", "pear;apple;ethereal;solvent"),
("O=C(O)CCCCC(=O)O", "sour")
]
def encode_data(data):
inputs, masks, labels = [], [], []
for smiles, descriptor_str in data:
tokenized = tokenizer(smiles, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
label_tensor = torch.zeros(len(label_map))
for d in descriptor_str.split(";"):
if d in label_map:
label_tensor[label_map[d]] = 1
inputs.append(tokenized["input_ids"].squeeze().tolist())
masks.append(tokenized["attention_mask"].squeeze().tolist())
labels.append(label_tensor.tolist())
return Dataset.from_dict({"input_ids": inputs, "attention_mask": masks, "labels": labels})
# Create datasets
dataset = encode_data(train_data)
val_dataset = encode_data(val_data)
# Define Multi-Label Model
class MultiLabelModel(nn.Module):
def __init__(self, base_model, num_labels):
super().__init__()
self.base_model = base_model
self.classifier = nn.Linear(base_model.config.hidden_size, num_labels)
def forward(self, input_ids, attention_mask, labels=None):
outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
logits = self.classifier(outputs.last_hidden_state[:, 0, :]) # CLS token output
logits = logits.contiguous()
if labels is not None:
loss_fn = nn.BCEWithLogitsLoss()
loss = loss_fn(logits, labels)
return {"loss": loss, "logits": logits}
return {"logits": logits}
num_labels = len(label_map)
model = MultiLabelModel(base_model, num_labels)
# Training arguments
training_args = TrainingArguments(
output_dir="./results",
per_device_train_batch_size=1,
num_train_epochs=10,
logging_dir="./logs",
logging_steps=1,
#################save_strategy = "epoch"
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset,
eval_dataset=val_dataset
)
trainer.train()