Molformer model training error

below is the training script that i have been using to train the model
ibm-research/MoLFormer-XL-both-10pct
on my custom dataset of 6 rows of training and 6 rows of validation

now the error that is arising is due to saving of the tensors of the model after the training that i think of

error

You are trying to save a non contiguous tensor: `base_model.encoder.layer.0.attention.self.feature_map.weight` which is not allowed. It either means you are trying to save tensors which are reference of each other in which case it's recommended to save only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to pack it before saving.

to takle this i added the for loop of contiguous tensors but nothing to avail and my guess is that the argument of save strategy in training arguments is the culprit

i did changed it to no but then nothing get stored in the results as it should be.
please any help would be helpful


import torch
from transformers import AutoTokenizer, AutoModel, Trainer, TrainingArguments
from datasets import Dataset
import torch.nn as nn

# Load model and tokenizer
model_name = "ibm-research/MoLFormer-XL-both-10pct"
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
base_model = AutoModel.from_pretrained(model_name,trust_remote_code=True)
for param in base_model.parameters():
        if not param.is_contiguous():
            param.data = param.data.contiguous()


descriptor_list = [
    "alcoholic", "aldehydic", "alliaceous", "almond", "amber", "animal", "anisic", "apple", "apricot", "aromatic", "balsamic", "banana", "beefy", "bergamot", "berry", "bitter", "black currant", "brandy", "burnt", "buttery", "cabbage", "camphoreous", "caramellic", "cedar", "celery", "chamomile", "cheesy", "cherry", "chocolate", "cinnamon", "citrus", "clean", "clove", "cocoa", "coconut", "coffee", "cognac", "cooked", "cooling", "cortex", "coumarinic", "creamy", "cucumber", "dairy", "dry", "earthy", "ethereal", "fatty", "fermented", "fishy", "floral", "fresh", "fruit skin", "fruity", "garlic", "gassy", "geranium", "grape", "grapefruit", "grassy", "green", "hawthorn", "hay", "hazelnut", "herbal", "honey", "hyacinth", "jasmin", "juicy", "ketonic", "lactonic", "lavender", "leafy", "leathery", "lemon", "lily", "malty", "meaty", "medicinal", "melon", "metallic", "milky", "mint", "muguet", "mushroom", "musk", "musty", "natural", "nutty", "odorless", "oily", "onion", "orange", "orangeflower", "orris", "ozone", "peach", "pear", "phenolic", "pine", "pineapple", "plum", "popcorn", "potato", "powdery", "pungent", "radish", "raspberry", "ripe", "roasted", "rose", "rummy", "sandalwood", "savory", "sharp", "smoky", "soapy", "solvent", "sour", "spicy", "strawberry", "sulfurous", "sweaty", "sweet", "tea", "terpenic", "tobacco", "tomato", "tropical", "vanilla", "vegetable", "vetiver", "violet", "warm", "waxy", "weedy", "winey", "woody"
]
label_map = {desc: i for i, desc in enumerate(descriptor_list)}

# Training dataset
train_data = [
    ("CC(O)CN", "fishy"),
    ("CCC(=O)C(=O)O", "fatty;lactonic;sweet;caramellic;creamy"),
    ("O=C(O)CCc1ccccc1", "rose;floral;fatty;sweet;musk;cinnamon;balsamic"),
    ("OCc1ccc(O)cc1", "medicinal;phenolic;fruity;nutty;bitter;sweet;almond;coconut"),
    ("O=Cc1ccc(O)cc1", "phenolic;woody;nutty;vanilla;hay;metallic;sweet;almond;honey;balsamic")
]

# Validation dataset
val_data = [
    ("O=C(O)c1ccc(O)cc1", "nutty;phenolic"),
    ("CC(=O)O", "pungent;sharp;sour"),
    ("CC=O", "fresh;fruity;nutty;alcoholic;pungent;aldehydic;musty;ethereal"),
    ("CC(=O)C(C)O", "fatty;sweet;milky;dairy;buttery;creamy"),
    ("CC(C)=O", "pear;apple;ethereal;solvent"),
    ("O=C(O)CCCCC(=O)O", "sour")
]

def encode_data(data):
    inputs, masks, labels = [], [], []
    for smiles, descriptor_str in data:
        tokenized = tokenizer(smiles, truncation=True, padding="max_length", max_length=128, return_tensors="pt")
        label_tensor = torch.zeros(len(label_map))
        for d in descriptor_str.split(";"):
            if d in label_map:
                label_tensor[label_map[d]] = 1
        inputs.append(tokenized["input_ids"].squeeze().tolist())
        masks.append(tokenized["attention_mask"].squeeze().tolist())
        labels.append(label_tensor.tolist())
    return Dataset.from_dict({"input_ids": inputs, "attention_mask": masks, "labels": labels})

# Create datasets
dataset = encode_data(train_data)
val_dataset = encode_data(val_data)

# Define Multi-Label Model
class MultiLabelModel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.base_model = base_model
        self.classifier = nn.Linear(base_model.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.last_hidden_state[:, 0, :])  # CLS token output
        logits = logits.contiguous()
        if labels is not None:
            loss_fn = nn.BCEWithLogitsLoss()
            loss = loss_fn(logits, labels)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

num_labels = len(label_map)
model = MultiLabelModel(base_model, num_labels)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=10,
    logging_dir="./logs",
    logging_steps=1,
    #################save_strategy = "epoch"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=val_dataset
)
trainer.train()
1 Like

@John6666 can you please help in this :folded_hands:

1 Like

It’s probably possible to get around this, but it would be even better if you could find out why it’s no longer contiguous and fix it.

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=10,
    logging_dir="./logs",
    logging_steps=1,
    save_safetensors=False, # added
    #################save_strategy = "epoch"
)

Hmm…


To address the issue of non-contiguous tensors in your PyTorch model, follow these structured steps to ensure all tensors are contiguous before saving and during operations:

Step 1: Identify Sources of Non-Contiguous Tensors

Check your model’s attention layer or any operations that might produce non-contiguous tensors. For example, transposing, slicing, or reshaping operations can make tensors non-contiguous.

Step 2: Modify the Model’s Forward Pass

After any operation that could result in a non-contiguous tensor, explicitly make the tensor contiguous. For instance, after the base model’s output:

outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
last_hidden_state = outputs.last_hidden_state
# Ensure contiguous
last_hidden_state = last_hidden_state.contiguous()
logits = self.classifier(last_hidden_state[:, 0, :])
logits = logits.contiguous()

Step 3: Override the save_pretrained Method

Ensure that all tensors in the model’s state dictionary are contiguous before saving. Modify your MultiLabelModel class as follows:

class MultiLabelModel(nn.Module):
    def __init__(self, base_model, num_labels):
        super().__init__()
        self.base_model = base_model
        self.classifier = nn.Linear(base_model.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        last_hidden_state = last_hidden_state.contiguous()  # Ensure contiguous
        logits = self.classifier(last_hidden_state[:, 0, :])
        logits = logits.contiguous()  # Ensure contiguous

        if labels is not None:
            loss_fn = nn.BCEWithLogitsLoss()
            loss = loss_fn(logits, labels)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}

    def save_pretrained(self, save_directory):
        # Ensure all tensors are contiguous before saving
        state_dict = self.state_dict()
        contiguous_state_dict = {}
        for key, tensor in state_dict.items():
            if not tensor.is_contiguous():
                contiguous_state_dict[key] = tensor.contiguous()
            else:
                contiguous_state_dict[key] = tensor
        super().save_pretrained(save_directory, state_dict=contiguous_state_dict)

Step 4: Configure Training Arguments for Contiguity

Ensure that your training arguments are set to save weights or the entire model as needed. You can adjust the save_strategy to “epoch” to save models at the end of each epoch.

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    num_train_epochs=10,
    logging_dir="./logs",
    logging_steps=1,
    save_strategy="epoch",
    save_steps=100,
    save_total_limit=2,
)

Step 5: Use save_weights_only Option (Optional)

If saving the entire model causes issues, consider saving only the weights:

model.save_weightsOnly(save_directory="./results")

Ensure that any custom save methods check and make tensors contiguous before saving.

Summary

By ensuring that all tensors are contiguous during the forward pass and before saving, you can prevent the non-contiguous tensor error. Modifying the save_pretrained method to enforce contiguity and adjusting training arguments to save models correctly are key steps in resolving this issue.

thanks @John6666
using the save_safetensors=False argument helped me overcome the error
but making the tensors contiguous explicitly still resulted in the same error where could the problem lie??

1 Like

It seems that this may be caused by the Accelerate library, but I don’t know if this has been resolved yet…

state_dict = accelerator.get_state_dict(model)
model.save_pretrained(output_path, state_dict=state_dict)

okkk thank you very much @John6666

1 Like