Fine tune for multilabel classification, shapes mismatch

I’m trying to fine tune distilBert for multi-label classification with KERAS but I’m encountering a lot of problems. To debug my code I replicated it with the 20newsgroup dataset that can be found in sklearn and treating it like a multi-label case (Binary crossentropy instead of Categorical Crossentropy). This is my code:

# load sklearn, pandas and numpy
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

import pandas as pd
import numpy as np

#deep learning stuff
import tensorflow as tf
from tensorflow import keras
from transformers import (
    AutoTokenizer, 
    TrainingArguments,
    TFAutoModelForSequenceClassification
)

# download dataset
categories = ['alt.atheism', 'soc.religion.christian','comp.graphics', 'sci.med']
twenty_train = fetch_20newsgroups(
    subset='train', 
    categories=categories, 
    shuffle=True, 
    random_state=42
)
data = twenty_train.data
target = OneHotEncoder(sparse=False).fit_transform(twenty_train.target.reshape(-1, 1))

model = TFAutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-cased", 
    from_pt=True, 
    num_labels=len(categories),
    problem_type="multi_label_classification"
)
loss = keras.losses.BinaryCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(
    learning_rate=5e-05,
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)
model.compile(optimizer=optimizer, loss=loss)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
inputs = tokenizer(data, padding="max_length", truncation=True)
train_features = {x: inputs[x] for x in tokenizer.model_input_names}
train_tf = tf.data.Dataset.from_tensor_slices((train_features, target))
model.fit(
    train_tf,
    batch_size=64,
    epochs=10
)

When I try to train I get this error message: ValueError: logitsandlabels must have the same shape, received ((512, 4) vs (4, 1))., what Am I doing wrong?