Way to fine tune pre trained model & get the embeddings

I require embedding of a sentence, and before using Bert model, I want to fine tune it for my specific domain and then get the embeddings (pooler_output from AutoModel)
This was my plan

  1. Finetune Masked Language model for the specific domain
  2. Load this fine tuned model into AutoModel, and then get the embeddings

However, the embeddings which I got from the second steo doesn’t seem to be correct
(I did a simple check - for a particular sentence, computed the cosine similarity of standalone step2 and the similarity of step2 preceeded by step1, this similarity was low, which I think it shouldn’t be, as I just did a fine tuning with a small dataset (500 data points)

Code -


#step1
from transformers import AutoModelForMaskedLM
bert_maskedML=AutoModelForMaskedLM.from_pretrained('bert-base-uncased')

from transformers import AutoTokenizer
bert_tokenizer=AutoTokenizer.from_pretrained("bert-base-uncased")

import datasets
from datasets import load_dataset
df=pd.read_csv('/kaggle/input/inputs-n500/Regression_inputs_n500.csv')
dataset=load_dataset("csv", data_files='.fintech_inputs_n500.csv', split=datasets.Split.TRAIN)

tok_oup=dataset.map(lambda x:bert_tokenizer(x['text'],  padding='max_length'), batched=True)
tok_oup=tok_oup.remove_columns('text')
tok_oup=tok_oup.remove_columns('Unnamed: 0')
tok_oup.set_format("torch", columns=[ "input_ids", 'token_type_ids' ,'attention_mask'])


from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=bert_tokenizer, mlm=True, mlm_probability=0.15
)

from transformers import Trainer, TrainingArguments
training_args = TrainingArguments("test-trainer3")


trainer = Trainer(
    bert_maskedML,
    training_args,
    data_collator=data_collator,
    train_dataset=tok_oup
)


trainer.train()
trainer.save_model('./MaskedLM')

#step 2
from transformers import AutoModel
bertMasked_auto=AutoModel.from_pretrained('/kaggle/working/MaskedLM')


#step2 , if no step1
from transformers import AutoModel
bert_auto=AutoModel.from_pretrained('bert-base-uncased')


#comparison
inputs=[ "no you say that if i make a late payment there is no late fee"]
inputs=bert_tokenizer(inputs, padding='max_length', return_tensors='pt')


bert_masked_predctn=bertMasked_auto(**inputs)
bert_auto_predctn=bert_auto(**inputs)
from torch.nn import CosineSimilarity
cos = CosineSimilarity(dim=0, eps=1e-6)
auto_pooler=bert_auto_predctn['pooler_output']
Masked_auto_pooler=bert_masked_predctn['pooler_output']
cos(auto_pooler[0], Masked_auto_pooler[0])