Huggingface distilbert-base-uncased-finetuned-sst-2-english runs out of ram with only a few kb?

My dataset is only 10 thousand sentences. I run it in batches of 100, and clear the memory on each run. I manually slice the sentences to only 50 characters. After running for 32 minutes, it crashes… On google colab with 25 gigs of ram.

I must be doing something terribly wrong.

I’m using the out-of-the-box model and tokenizer.

 def eval_model(model, tokenizer_, X, y, batchsize, maxlength):
    assert len(X) == len(y)
    labels = ["negative", "positive"]
    correctCounter = 0
    
    epochs = int(np.ceil(len(dev_sent) / batchsize))
    accuracies = []
    for i in range(epochs):
      print(f"Epoch {i}")
      # slicing up the data into batches
      X_ = X[i:((i+1)*100)]
      X_ = [x[:maxlength] for x in X_] # make sure sentences are only of maxlength 
      y_ = y[i:((i+1)*100)]

      encoded_input = tokenizer(X_, return_tensors='pt', padding=True,  truncation=True, max_length=maxlength)
      output = model(**encoded_input)
      scores = output[0][0].detach().numpy()
      scores = softmax(scores)
      for i, scores in enumerate([softmax(logit) for logit in output["logits"].detach().numpy()]):
        print("--------------------")
        print(f'Sentence no. {len(accuracies)+1}')
        print("Sentence: " + X_[i])
        print("Score: " + str(scores))
        ranking = np.argsort(scores)
        print(f"Ranking: {ranking}")
        pred = labels[np.argmax(np.argsort(scores))]
        print(f"Prediction: {pred}, annotation: {y_[i]}")
        if pred == y_[i]: 
          print("SUCCES!")
          correctCounter += 1
        else: print("FAILURE!")
      # garbage collection (to not run out of ram... Which is shouldn't, it's just a few kb, but it does.... ?!)
      del(encoded_input)
      del(output)
      del(scores)
      gc.collect()
      accuracies.append(correctCounter / len(y_))
      #print(f"current accuracy: {np.mean(np.asarray(accuracies))}")
    return np.mean(np.asarray(accuracies))

  task='sentiment'
  MODEL = f"distilbert-base-uncased-finetuned-sst-2-english"

  tokenizer = AutoTokenizer.from_pretrained(MODEL)
  model = AutoModelForSequenceClassification.from_pretrained(MODEL)
  model.save_pretrained(MODEL)
  tokenizer.save_pretrained(MODEL)

  accuracy = eval_model(model, tokenizer, dev_sent, dev_sentiment, 100, 50)