pre-train bert
HI
i am trying to pre_train bert in my own corpus
but got this errors
!pip install accelerate -U
!pip install transformers
import torchvision, torchaudio
from accelerate import Accelerator
from transformers import DataCollatorForLanguageModeling,TextDataset,TrainingArguments
from transformers import BertTokenizer, BertForPreTraining
tokenizer = BertTokenizer.from_pretrained('Jaafer/code-search-net-tokenizer')
model = BertForPreTraining.from_pretrained('bert-base-uncased')
!pip install accelerate -U
!pip install transformers
import torchvision, torchaudio
from accelerate import Accelerator
from transformers import DataCollatorForLanguageModeling,TextDataset,TrainingArguments
from transformers import BertTokenizer, BertForPreTraining
tokenizer = BertTokenizer.from_pretrained('Jaafer/code-search-net-tokenizer')
model = BertForPreTraining.from_pretrained('bert-base-uncased')
%%time
data="/kaggle/input/ontology/ontology.txt"
from transformers import LineByLineTextDataset
dataset = LineByLineTextDataset(
tokenizer=tokenizer,
file_path=data,
block_size=128,
)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=True, mlm_probability=0.15)
model.to(device)
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir="./EsperBERTo",
overwrite_output_dir=True,
num_train_epochs=1,
per_gpu_train_batch_size=64,
save_steps=10_000,
save_total_limit=2,
prediction_loss_only=True,
)
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=dataset,
)
trainer.train()