Optimizer got an empty parameter list when using deepspeed

Please can somebody point me what im doing wrong?

Here is the error message:

> optimizer got an empty parameter list

And this is the training code

tokenizer = AutoTokenizer.from_pretrained("DeepESP/gpt2-spanish-medium",  bos_token="<startoftext>", eos_token="<endoftext>", pad_token="<pad>")
model = AutoModelForCausalLM.from_pretrained("DeepESP/gpt2-spanish-medium")
model.resize_token_embeddings(len(tokenizer))

training_args = transformers.TrainingArguments(
    output_dir="./results", num_train_epochs=1, logging_steps = 200, 
    save_strategy="epoch", per_device_eval_batch_size = batch_size,
    per_device_train_batch_size=batch_size,
    warmup_steps=100, logging_dir="log",
    weight_decay=0.01,
    fp16 = True,
    deepspeed = "./deepspeed_config.json",
)

def fn_collator(data):
    return  {
      "input_ids": torch.stack([f[0] for f in data]),
      "attention_mask": torch.stack([f[1] for f in data]),
      "labels": torch.stack([f[0] for f in data])
    }
model.requires_grad_(False)

transformers.Trainer(model=model, args=training_args, train_dataset=train_dataset, data_collator=fn_collator).train()  

The deepspeed configuration:

{
    "train_batch_size": 1,
    "fp16": {
        "enabled": "auto"
    },
    "zero_optimization": {
        "stage": 1,
        "offload_optimizer": {
         "device": "cpu",
         "pin_memory": true
        },
        "allgather_partitions": true,
        "allgather_bucket_size": 2e8,
        "reduce_scatter": true,
        "reduce_bucket_size": 2e8,
        "overlap_comm": true
    },
    "optimizer": {
        "type": "AdamW",
        "params": {
            "lr": "auto",
            "betas": "auto",
            "eps": "auto",
            "weight_decay": "auto"
        }
    },
    "scheduler": {
        "type": "WarmupLR",
        "params": {
        "warmup_min_lr": 0,
        "warmup_max_lr": 3e-5,
        "warmup_num_steps": 500
     }
   }
}
import deepspeed
os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '9994' 
os.environ['RANK'] = "0"
os.environ['LOCAL_RANK'] = "0"
os.environ['WORLD_SIZE'] = "1"

deepspeed.init_distributed( "gloo")

# Windows 10, AMD RYZEN, RTX 2070 super