SO this worked for me , i
imported
from transformers.modeling_utils import PreTrainedModel ,PretrainedConfig
and then in my class
class TransformerLanguageModel(PreTrainedModel):
def __init__(self, config):
super(TransformerLanguageModel, self).__init__(config)
self.token_embedding_table = nn.Embedding(config.vocab_size, config.hidden_size)
self.position_embedding_table = nn.Embedding(config.block_size, config.hidden_size)
self.transformer = nn.Transformer(
d_model=config.hidden_size,
nhead=config.num_attention_heads,
num_encoder_layers=config.num_hidden_layers,
num_decoder_layers=config.num_hidden_layers,
dim_feedforward=4 * config.hidden_size,
dropout=config.hidden_dropout_prob,
activation='gelu'
)
self.ln1 = nn.LayerNorm(config.hidden_size)
self.ln2 = nn.LayerNorm(config.hidden_size)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size)
after that you have to create a config variable
Create a configuration object
config = PretrainedConfig(
vocab_size=1000, # Specify your vocabulary size
hidden_size=n_embd, # Use your embedding dimension
num_attention_heads=n_head,
num_hidden_layers=n_layer,
hidden_dropout_prob=dropout,
block_size=block_size
)
model = TransformerLanguageModel(config)
model.to(device)
now you can save the model
model.save_pretrained('./path_to_model/')