I am trying to save a model (tensorflow-based) on S3 that I created which is essentially a finetuned version of the pretrained distilbert model. I am using the command
model.save_pretrained(mode_path, save_models=True)
and getting the following error:
RuntimeError: Dirty entry flush destroy failed (file write failed: time = Mon Jan 2 02:19:33 2023
, filename = ‘/dbfs/mnt/spock-root/MODELS/ONE_LAYER_BASELINE/tf_model.h5’, file descriptor = 115, errno = 95, error message = ‘Operation not supported’, buf = 0x11ff9c200, total write size = 4096, bytes this sub-write = 4096, bytes actually written = 18446744073709551615, offset = 0)
Here is the definition of my model class:
from transformers.models.distilbert.modeling_tf_distilbert import TFDistilBertMainLayer, TFDistilBertPreTrainedModel
from transformers.modeling_tf_outputs import (
TFBaseModelOutput,
TFMaskedLMOutput,
TFMultipleChoiceModelOutput,
TFQuestionAnsweringModelOutput,
TFTokenClassifierOutput,
)
from transformers.modeling_tf_utils import get_initializer,unpack_inputs
class TFDistilBertForMultilabelClassification(TFDistilBertPreTrainedModel):
def init(self, config, *inputs, **kwargs):
print(f"INSIDE TFDistilBertForMultilabelClassification")
print("config = ", config)
super().init(config, *inputs, **kwargs)
self.num_labels = config.num_labels
self.config = config
self.distilbert = TFDistilBertMainLayer(config, name=“distilbert”)
if num_dense_layers > 0:
self.pre_classifier = tf.keras.layers.Dense(
config.dim,
kernel_initializer=get_initializer(config.initializer_range),
activation=“relu”,
name=“pre_classifier”,
)
self.classifier = tf.keras.layers.Dense(
len(label_cols), kernel_initializer=get_initializer(config.initializer_range), name=“classifier”
)
self.dropout = tf.keras.layers.Dropout(config.seq_classif_dropout)
@unpack_inputs
def call(
self,
input_ids = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
labels: Optional[Union[np.ndarray, tf.Tensor]] = None,
training: Optional[bool] = False,
) -> Union[TFMultiClassClassifierOutput, Tuple[tf.Tensor]]:
r"""
labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""
distilbert_output = self.distilbert(
input_ids=input_ids,
attention_mask=attention_mask,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
training=training,
)
hidden_state = distilbert_output[0] # (bs, seq_len, dim)
pooled_output = hidden_state[:, 0] # (bs, dim)
if num_dense_layers > 0:
pooled_output = self.pre_classifier(pooled_output) # (bs, dim)
pooled_output = self.dropout(pooled_output, training=training) # (bs, dim)
logits = self.classifier(pooled_output) # (bs, dim)
if labels is None:
loss = None
else:
loss = tf.keras.losses.binary_crossentropy(labels[...,None], logits[...,None], from_logits=True)
if not return_dict:
output = (logits,) + distilbert_output[1:]
return ((loss,) + output) if loss is not None else output
return TFMultiClassClassifierOutput(
loss=loss,
logits=logits,
hidden_states=distilbert_output.hidden_states,
attentions=distilbert_output.attentions,
)
def freeze_layers_for_finetuning(self,
num_to_freeze_tf_blocks: Optional[int] = 0):
for i in range(num_to_freeze_tf_blocks):
self.distilbert.transformer.layer[i].trainable = False
return
def serving_output(self, output: TFMultiClassClassifierOutput) -> TFMultiClassClassifierOutput:
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
return TFMultiClassClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)