Adding image embeddings to layoutLM makes the model unconvertable
After following the - Google Colab
I wanted to convert the .pt model to onnx. The issue is that the changes made in the notebook do not allow for the model conversion to work.
New model ------------------------------------
import torch.nn as nn
from transformers.models.layoutlm import LayoutLMModel, LayoutLMConfig
from transformers.modeling_outputs import TokenClassifierOutput
import torchvision
from torchvision.ops import RoIAlign
class LayoutLMForTokenClassification(nn.Module):
def **init**(self, output_size=(3,3),
spatial_scale=14/224,
sampling_ratio=2
):
super().**init**()
``
# LayoutLM base model + token classifier
self.num_labels = len(label2idx)
self.layoutlm = LayoutLMModel.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=self.num_labels)
self.dropout = nn.Dropout(self.layoutlm.config.hidden_dropout_prob)
self.classifier = nn.Linear(self.layoutlm.config.hidden_size, self.num_labels)
# backbone + roi-align + projection layer
model = torchvision.models.resnet101(pretrained=True)
self.backbone = nn.Sequential(*(list(model.children())[:-3]))
self.roi_align = RoIAlign(output_size, spatial_scale=spatial_scale, sampling_ratio=sampling_ratio)
self.projection = nn.Linear(in_features=1024*3*3, out_features=self.layoutlm.config.hidden_size)
def forward(
self,
input_ids,
bbox,
attention_mask,
token_type_ids,
position_ids=None,
head_mask=None,
inputs_embeds=None,
labels=None,
resized_images=None, # shape (N, C, H, W), with H = W = 224
resized_and_aligned_bounding_boxes=None, # single torch tensor that also contains the batch index for every bbox at image size 224
output_attentions=None,
output_hidden_states=None,
return_dict=None,
):
r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
1]``.
"""
return_dict = return_dict if return_dict is not None else self.layoutlm.config.use_return_dict
# first, forward pass on LayoutLM
outputs = self.layoutlm(
input_ids=input_ids,
bbox=bbox,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
sequence_output = outputs[0]
# next, send resized images of shape (batch_size, 3, 224, 224) through backbone to get feature maps of images
# shape (batch_size, 1024, 14, 14)
feature_maps = self.backbone(resized_images)
# next, use roi align to get feature maps of individual (resized and aligned) bounding boxes
# shape (batch_size*seq_len, 1024, 3, 3)
device = input_ids.device
resized_bounding_boxes_list = []
for i in resized_and_aligned_bounding_boxes:
resized_bounding_boxes_list.append(i.float().to(device))
feat_maps_bboxes = self.roi_align(input=feature_maps,
# we pass in a list of tensors
# We have also added -0.5 for the first two coordinates and +0.5 for the last two coordinates,
# see https://stackoverflow.com/questions/60060016/why-does-roi-align-not-seem-to-work-in-pytorch
rois=resized_bounding_boxes_list
)
# next, reshape + project to same dimension as LayoutLM.
batch_size = input_ids.shape[0]
seq_len = input_ids.shape[1]
feat_maps_bboxes = feat_maps_bboxes.view(batch_size, seq_len, -1) # Shape (batch_size, seq_len, 1024*3*3)
projected_feat_maps_bboxes = self.projection(feat_maps_bboxes) # Shape (batch_size, seq_len, hidden_size)
# add those to the sequence_output - shape (batch_size, seq_len, hidden_size)
sequence_output += projected_feat_maps_bboxes
sequence_output = self.dropout(sequence_output)
logits = self.classifier(sequence_output)
loss = None
if labels is not None:
loss_fct = nn.CrossEntropyLoss()
if attention_mask is not None:
active_loss = attention_mask.view(-1) == 1
active_logits = logits.view(-1, self.num_labels)[active_loss]
active_labels = labels.view(-1)[active_loss]
loss = loss_fct(active_logits, active_labels)
else:
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
if not return_dict:
output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output
return TokenClassifierOutput(
loss=loss,
logits=logits,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
ERROR -----
from transformers.onnx import export
4 def save_onnx(save_path):
5 onnx_config = LayoutLMOnnxConfig(model.config)
6 export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)
[/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py](https://localhost:8080/#) in __getattr__(self, name)
1207 raise AttributeError("'{}' object has no attribute '{}'".format(
1208 type(self).**name**, name))
1209
1210 def **setattr**(self, name: str, value: Union[Tensor, 'Module']) -> None:
AttributeError: 'LayoutLMForTokenClassification' object has no attribute 'config'
REPRODUCTION
Reproduction
Step 1 . Run this notebook -
Step 2 - Run the model conversion code -
from pathlib import Path
from transformers.models.layoutlm import LayoutLMOnnxConfig
from transformers.onnx import export
def save_onnx(save_path):
onnx_config = LayoutLMOnnxConfig(model.config)
export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)
print("Save model as ONNX")
save_onnx('/content/data/model/model.onnx')
I have also tried this method, but the output is blank.–
def save_onnx(save_path):
configuration = LayoutLMConfig()
onnx_config = LayoutLMOnnxConfig(configuration)
export(preprocessor=tokenizer, model=model.cpu(), config=onnx_config, output=Path(save_path),opset=11)
Please let me know if you will need anything else.
Expected behavior
The converted onnx model is produced in the instructed directory