Interesting Results - Near perfect F1 and convergence when using smaller data sets. Thoughts?

Hey everyone! Semi-new to Transformers, Huggingface, and Transfer learning, so when I got these interesting results, I thought it would be best to come here to see if anyone can give any insights.

The first result that I would love to get everyones thoughts on is how accurate a model is being. There is no way that this is possible, right? It has too good to be true. The results are seen in the first image below, and

The second, the picture below, show’s the same data set, but trying with a smaller training (fine-tuning) size. It converges around the 0.7 for F1, which also doesn’t make any sense to me given the success of the full data set. The full data set does have about 1,000 more observations, but I wouldn’t believe that this would make a difference.

def load_dataset(tweets_filepath):
  LABEL2INDEX = {'love': 0, 'anger': 1, 'sadness': 2, 'happy': 3, 'fear': 4} # Label string to index
  INDEX2LABEL = {0: 'love', 1: 'anger', 2: 'sadness', 3: 'happy', 4: 'fear'} # Index to label string
  NUM_LABELS = 5 # Number of label
  df = pd.read_csv(tweets_filepath) # Read tsv file with pandas
  #df.columns = ['text','sentiment'] # Rename the columns
  df.columns = ['Sentiment','Input'] # Rename the columns
  df['Sentiment'] = df['Sentiment'].apply(lambda lab: LABEL2INDEX[lab]) # Convert string label into index
  return df
def train_test_valid_SPLIT(df):
  train_70 = df.sample(frac = 0.7)

  df_30 = df.drop(train_70.index)

  valid_15 = df_30.sample(frac = 0.5)

  test_15 = df.drop(valid_15.index) 

  return train_70, valid_15, test_15
def model_full(df_data_path, dense_1, dense_2, dropout, learning_rate, epoch, batch_size, DECAY):
  checkpoint_filepath = '/content/drive/MyDrive/DOE_Experiment_V2'

  df = load_dataset(df_data_path)
  train_70, valid_15, test_15 = train_test_valid_SPLIT(df)

  y_train = to_categorical(train_70.Sentiment)
  y_valid = to_categorical(valid_15.Sentiment)
  y_test = to_categorical(test_15.Sentiment)

  # Tokenize the input (takes some time) 
  # here tokenizer using from bert-base-cased
  x_train = tokenizer(
      text=train_70.Input.tolist(),
      add_special_tokens=True,
      max_length=70,
      truncation=True,
      padding=True, 
      return_tensors='tf',
      return_token_type_ids = False,
      return_attention_mask = True,
      verbose = True)
  x_valid = tokenizer(
      text=valid_15.Input.tolist(),
      add_special_tokens=True,
      max_length=70,
      truncation=True,
      padding=True, 
      return_tensors='tf',
      return_token_type_ids = False,
      return_attention_mask = True,
      verbose = True)
  x_test = tokenizer(
      text=test_15.Input.tolist(),
      add_special_tokens=True,
      max_length=70,
      truncation=True,
      padding=True, 
      return_tensors='tf',
      return_token_type_ids = False,
      return_attention_mask = True,
      verbose = True)

  input_ids = x_train['input_ids']
  attention_mask = x_train['attention_mask']

  max_len = 70
  input_ids = Input(shape=(max_len,), dtype=tf.int32, name="input_ids")
  input_mask = Input(shape=(max_len,), dtype=tf.int32, name="attention_mask")
  embeddings = bert(input_ids,attention_mask = input_mask)[0] 
  out = tf.keras.layers.GlobalMaxPool1D()(embeddings)
  out = Dense(dense_1, activation='relu')(out)
  out = tf.keras.layers.Dropout(dropout)(out)
  out = Dense(dense_2,activation = 'relu')(out)
  y = Dense(5,activation = 'sigmoid')(out)
  model = tf.keras.Model(inputs=[input_ids, input_mask], outputs=y)
  model.layers[2].trainable = True

  LEARNING_RATE = learning_rate

  #3e-6
  NB_START_EPOCHS = epoch
  BATCH_SIZE = batch_size

  optimizer = Adam(
      learning_rate=learning_rate, # this learning rate is for bert model , taken from huggingface website 
      epsilon=1e-08,
      decay=DECAY,
      clipnorm=1.0)
  
  # Set loss and metrics
  loss = CategoricalCrossentropy(from_logits = True)
  metric = CategoricalAccuracy('balanced_accuracy')
  # Compile the model
  model.compile(
      optimizer = optimizer,
      loss = loss, 
      metrics = metric)

  model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
                            filepath=checkpoint_filepath,
                            save_weights_only=False,
                            monitor='val_accuracy',
                            mode='max',
                            save_best_only=True)
    
  optimizer = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE)

  model.compile(optimizer=optimizer
                  , loss='categorical_crossentropy'
                  , metrics=['accuracy'])
    
  history = model.fit({'input_ids':x_train['input_ids'],'attention_mask':x_train['attention_mask']},
                       y_train,
                       epochs=NB_START_EPOCHS,
                       batch_size=BATCH_SIZE,
                       validation_data=({'input_ids':x_test['input_ids'],'attention_mask':x_test['attention_mask']}, y_test),
                       callbacks=[model_checkpoint_callback],
                       shuffle=True)
  


  model.load_weights(checkpoint_filepath)
  #INDEX2LABEL = {0: 'love', 1: 'anger', 2: 'sadness', 3: 'happy', 4: 'fear'} # Index to label string
  label_names = ['love', 'anger', 'sadness', 'happy', 'fear']

  predicted_raw = model.predict({'input_ids':x_test['input_ids'],'attention_mask':x_test['attention_mask']})
  y_predicted = np.argmax(predicted_raw, axis = 1)
  #y_true = y_test.Sentiment
  #y_true = y_test
  y_true = test_15.Sentiment
  #y_prob = model.predict(X_valid)
  #prediction_ints = np.zeros_like(y_prob)
  #prediction_ints[np.arange(len(y_prob)), y_prob.argmax(1)] = 1
  #prediction = np.where(prediction_ints==1)[1]
  
  print(classification_report(y_true, y_predicted, target_names=label_names, digits=4))

  return history
df_data_path = '/content/drive/MyDrive/DOE_Experiment_V2/Twitter_Emotion_Dataset.csv'
checkpoint_filepath = '/content/drive/MyDrive/DOE_Experiment_V2'
m_history = model_full(df_data_path, dense_1 = 128, dense_2 = 32, dropout = .1, learning_rate = 3e-6, epoch = 50 , batch_size = 64, DECAY = .005)
delete_saved_model()
Epoch 1/50
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?
WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss`argument?
49/49 [==============================] - ETA: 0s - loss: 0.8712 - accuracy: 0.7037WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 145s 2s/step - loss: 0.8712 - accuracy: 0.7037 - val_loss: 0.2962 - val_accuracy: 0.9479
Epoch 2/50
49/49 [==============================] - ETA: 0s - loss: 0.2812 - accuracy: 0.9406WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 114s 2s/step - loss: 0.2812 - accuracy: 0.9406 - val_loss: 0.1729 - val_accuracy: 0.9594
Epoch 3/50
49/49 [==============================] - ETA: 0s - loss: 0.1831 - accuracy: 0.9555WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 114s 2s/step - loss: 0.1831 - accuracy: 0.9555 - val_loss: 0.1186 - val_accuracy: 0.9733
Epoch 4/50
49/49 [==============================] - ETA: 0s - loss: 0.1391 - accuracy: 0.9659WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 115s 2s/step - loss: 0.1391 - accuracy: 0.9659 - val_loss: 0.0955 - val_accuracy: 0.9778
Epoch 5/50
49/49 [==============================] - ETA: 0s - loss: 0.1071 - accuracy: 0.9724WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 114s 2s/step - loss: 0.1071 - accuracy: 0.9724 - val_loss: 0.0759 - val_accuracy: 0.9824
Epoch 6/50
49/49 [==============================] - ETA: 0s - loss: 0.0833 - accuracy: 0.9799WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 115s 2s/step - loss: 0.0833 - accuracy: 0.9799 - val_loss: 0.0609 - val_accuracy: 0.9848
Epoch 7/50
49/49 [==============================] - ETA: 0s - loss: 0.0590 - accuracy: 0.9867WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 115s 2s/step - loss: 0.0590 - accuracy: 0.9867 - val_loss: 0.0557 - val_accuracy: 0.9866
Epoch 8/50
49/49 [==============================] - ETA: 0s - loss: 0.0488 - accuracy: 0.9873WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 116s 2s/step - loss: 0.0488 - accuracy: 0.9873 - val_loss: 0.0515 - val_accuracy: 0.9882
Epoch 9/50
49/49 [==============================] - 59s 1s/step - loss: 0.0420 - accuracy: 0.9906 - val_loss: 0.0535 - val_accuracy: 0.9874
Epoch 10/50
49/49 [==============================] - ETA: 0s - loss: 0.0366 - accuracy: 0.9906WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 117s 2s/step - loss: 0.0366 - accuracy: 0.9906 - val_loss: 0.0456 - val_accuracy: 0.9898
Epoch 11/50
49/49 [==============================] - 60s 1s/step - loss: 0.0266 - accuracy: 0.9925 - val_loss: 0.0484 - val_accuracy: 0.9896
Epoch 12/50
49/49 [==============================] - 63s 1s/step - loss: 0.0234 - accuracy: 0.9948 - val_loss: 0.0493 - val_accuracy: 0.9896
Epoch 13/50
49/49 [==============================] - ETA: 0s - loss: 0.0247 - accuracy: 0.9932WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 118s 2s/step - loss: 0.0247 - accuracy: 0.9932 - val_loss: 0.0463 - val_accuracy: 0.9904
Epoch 14/50
49/49 [==============================] - ETA: 0s - loss: 0.0201 - accuracy: 0.9955WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 116s 2s/step - loss: 0.0201 - accuracy: 0.9955 - val_loss: 0.0445 - val_accuracy: 0.9914
Epoch 15/50
49/49 [==============================] - 59s 1s/step - loss: 0.0177 - accuracy: 0.9961 - val_loss: 0.0477 - val_accuracy: 0.9906
Epoch 16/50
49/49 [==============================] - 61s 1s/step - loss: 0.0166 - accuracy: 0.9961 - val_loss: 0.0440 - val_accuracy: 0.9912
Epoch 17/50
49/49 [==============================] - 62s 1s/step - loss: 0.0170 - accuracy: 0.9961 - val_loss: 0.0419 - val_accuracy: 0.9914
Epoch 18/50
49/49 [==============================] - 63s 1s/step - loss: 0.0096 - accuracy: 0.9974 - val_loss: 0.0456 - val_accuracy: 0.9906
Epoch 19/50
49/49 [==============================] - 63s 1s/step - loss: 0.0124 - accuracy: 0.9968 - val_loss: 0.0436 - val_accuracy: 0.9914
Epoch 20/50
49/49 [==============================] - ETA: 0s - loss: 0.0094 - accuracy: 0.9971WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 120s 2s/step - loss: 0.0094 - accuracy: 0.9971 - val_loss: 0.0425 - val_accuracy: 0.9920
Epoch 21/50
49/49 [==============================] - 60s 1s/step - loss: 0.0072 - accuracy: 0.9977 - val_loss: 0.0444 - val_accuracy: 0.9917
Epoch 22/50
49/49 [==============================] - 61s 1s/step - loss: 0.0066 - accuracy: 0.9987 - val_loss: 0.0447 - val_accuracy: 0.9917
Epoch 23/50
49/49 [==============================] - 62s 1s/step - loss: 0.0074 - accuracy: 0.9981 - val_loss: 0.0474 - val_accuracy: 0.9909
Epoch 24/50
49/49 [==============================] - ETA: 0s - loss: 0.0071 - accuracy: 0.9977WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 120s 2s/step - loss: 0.0071 - accuracy: 0.9977 - val_loss: 0.0442 - val_accuracy: 0.9928
Epoch 25/50
49/49 [==============================] - 60s 1s/step - loss: 0.0051 - accuracy: 0.9981 - val_loss: 0.0449 - val_accuracy: 0.9925
Epoch 26/50
49/49 [==============================] - 61s 1s/step - loss: 0.0060 - accuracy: 0.9987 - val_loss: 0.0482 - val_accuracy: 0.9920
Epoch 27/50
49/49 [==============================] - 62s 1s/step - loss: 0.0052 - accuracy: 0.9987 - val_loss: 0.0500 - val_accuracy: 0.9917
Epoch 28/50
49/49 [==============================] - 63s 1s/step - loss: 0.0028 - accuracy: 0.9990 - val_loss: 0.0454 - val_accuracy: 0.9928
Epoch 29/50
49/49 [==============================] - 64s 1s/step - loss: 0.0055 - accuracy: 0.9984 - val_loss: 0.0469 - val_accuracy: 0.9920
Epoch 30/50
49/49 [==============================] - 63s 1s/step - loss: 0.0026 - accuracy: 0.9987 - val_loss: 0.0444 - val_accuracy: 0.9928
Epoch 31/50
49/49 [==============================] - 64s 1s/step - loss: 0.0027 - accuracy: 0.9997 - val_loss: 0.0532 - val_accuracy: 0.9920
Epoch 32/50
49/49 [==============================] - 64s 1s/step - loss: 0.0028 - accuracy: 0.9987 - val_loss: 0.0504 - val_accuracy: 0.9920
Epoch 33/50
49/49 [==============================] - 63s 1s/step - loss: 0.0033 - accuracy: 0.9994 - val_loss: 0.0513 - val_accuracy: 0.9925
Epoch 34/50
49/49 [==============================] - ETA: 0s - loss: 0.0022 - accuracy: 0.9997WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 118s 2s/step - loss: 0.0022 - accuracy: 0.9997 - val_loss: 0.0499 - val_accuracy: 0.9930
Epoch 35/50
49/49 [==============================] - 60s 1s/step - loss: 0.0018 - accuracy: 0.9997 - val_loss: 0.0542 - val_accuracy: 0.9925
Epoch 36/50
49/49 [==============================] - 61s 1s/step - loss: 8.6645e-04 - accuracy: 0.9997 - val_loss: 0.0579 - val_accuracy: 0.9917
Epoch 37/50
49/49 [==============================] - ETA: 0s - loss: 0.0028 - accuracy: 0.9987WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 118s 2s/step - loss: 0.0028 - accuracy: 0.9987 - val_loss: 0.0504 - val_accuracy: 0.9936
Epoch 38/50
49/49 [==============================] - 60s 1s/step - loss: 7.3073e-04 - accuracy: 0.9997 - val_loss: 0.0515 - val_accuracy: 0.9922
Epoch 39/50
49/49 [==============================] - 61s 1s/step - loss: 0.0015 - accuracy: 0.9987 - val_loss: 0.0574 - val_accuracy: 0.9917
Epoch 40/50
49/49 [==============================] - ETA: 0s - loss: 0.0019 - accuracy: 0.9990WARNING:absl:Found untraced functions such as embeddings_layer_call_fn, embeddings_layer_call_and_return_conditional_losses, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn while saving (showing 5 of 420). These functions will not be directly callable after loading.
49/49 [==============================] - 120s 2s/step - loss: 0.0019 - accuracy: 0.9990 - val_loss: 0.0499 - val_accuracy: 0.9941
Epoch 41/50
49/49 [==============================] - 60s 1s/step - loss: 4.2783e-04 - accuracy: 1.0000 - val_loss: 0.0515 - val_accuracy: 0.9928
Epoch 42/50
49/49 [==============================] - 61s 1s/step - loss: 0.0012 - accuracy: 0.9994 - val_loss: 0.0551 - val_accuracy: 0.9930
Epoch 43/50
49/49 [==============================] - 62s 1s/step - loss: 5.2802e-04 - accuracy: 0.9997 - val_loss: 0.0530 - val_accuracy: 0.9928
Epoch 44/50
49/49 [==============================] - 63s 1s/step - loss: 0.0026 - accuracy: 0.9994 - val_loss: 0.0598 - val_accuracy: 0.9920
Epoch 45/50
49/49 [==============================] - 63s 1s/step - loss: 9.5365e-04 - accuracy: 0.9994 - val_loss: 0.0632 - val_accuracy: 0.9909
Epoch 46/50
49/49 [==============================] - 63s 1s/step - loss: 0.0034 - accuracy: 0.9997 - val_loss: 0.0535 - val_accuracy: 0.9925
Epoch 47/50
49/49 [==============================] - 63s 1s/step - loss: 1.7547e-04 - accuracy: 1.0000 - val_loss: 0.0565 - val_accuracy: 0.9930
Epoch 48/50
49/49 [==============================] - 63s 1s/step - loss: 1.4855e-04 - accuracy: 1.0000 - val_loss: 0.0517 - val_accuracy: 0.9936
Epoch 49/50
49/49 [==============================] - 64s 1s/step - loss: 4.2302e-04 - accuracy: 0.9997 - val_loss: 0.0596 - val_accuracy: 0.9930
Epoch 50/50
49/49 [==============================] - 63s 1s/step - loss: 6.3817e-04 - accuracy: 0.9997 - val_loss: 0.0544 - val_accuracy: 0.9939
117/117 [==============================] - 22s 160ms/step
              precision    recall  f1-score   support

        love     0.9981    0.9872    0.9926       546
       anger     0.9957    0.9957    0.9957       929
     sadness     0.9929    0.9988    0.9959       846
       happy     0.9942    0.9942    0.9942       855
        fear     0.9894    0.9912    0.9903       565

    accuracy                         0.9941      3741
   macro avg     0.9941    0.9934    0.9937      3741
weighted avg     0.9941    0.9941    0.9941      3741

It would only let me post one image. The second image I reference above is below. Thanks in advance for any help anyone can provide!