When I train a model(code like this ,it is sample,the code is very similar to TFBertForSequenceClassification,just for deffient parameter ‘training’)
class BertFC(tf.keras.Model):
def __init__(self):
super(BertFC, self).__init__()
self.bert = TFBertModel.from_pretrained('bert-base-chinese', return_dict=True)
self.dropout = tf.keras.layers.Dropout(0.1)
self.dense = tf.keras.layers.Dense(20, activation='softmax')
def call(self, inputs):
idx, attn, ids = inputs
hidden = self.bert(idx, attention_mask=attn, token_type_ids=ids, training=False)
temp = hidden[1]
temp = self.dropout(temp, training=True)
out = self.dense(temp)
return out
the learning rate is like this:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, d_model, warmup_steps=1000):
super(CustomSchedule, self).__init__()
self.d_model = tf.cast(d_model, tf.float32)
self.warmup_steps = warmup_steps
def __call__(self, step):
arg1 = tf.math.rsqrt(step)
arg2 = step * (self.warmup_steps ** -1.5)
return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)
but when I train it , the acc first will rise,and then decline like this
I know about the saddle point,but the loss rise for a long time and seem like never decline
what wrong about this?(train data is current)