AttributeError: 'TimmBackbone' object has no attribute 'model_type'

I want to create hybrid ViT model for image classification using inceptionv3 as backbone. But I am encountering following attribute error.

AttributeError                            Traceback (most recent call last)
<ipython-input-7-d3ad4668c999> in <cell line: 16>()
     14 
     15 # Initialize the model with the config
---> 16 model = ViTHybridForImageClassification(config)
     17 #model = ViTHybridModel(config)
     18 

4 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
   1727             if name in modules:
   1728                 return modules[name]
-> 1729         raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
   1730 
   1731     def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:

AttributeError: 'TimmBackbone' object has no attribute 'model_type'

My code is given below. I am using Fastai library with transformers and timm.

# Define a DataBlock with more augmentations
dblock = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=GrandparentSplitter(train_name='train', valid_name='valid'),
    get_y=parent_label,
    item_tfms=Resize(224),
    batch_tfms=aug_transforms(
        mult=2.0,                # Increase the number of augmentation transformations
        do_flip=True,            # Flip horizontally
        flip_vert=False,         # Do not flip vertically
        max_rotate=10.0,         # Rotate by a maximum of 10 degrees
        max_zoom=1.1,            # Zoom by a maximum of 10%
        max_lighting=0.2,        # Adjust brightness and contrast
        max_warp=0.2,            # Apply perspective warping
        p_affine=0.75,           # Probability of applying affine transformations
        p_lighting=0.75          # Probability of applying lighting transformations
        
    )
)

# Create DataLoaders
dls = dblock.dataloaders(path, bs=16)

# Load InceptionV3 backbone from timm
inception_backbone = timm.create_model('inception_v3', pretrained=False, features_only=True)

# Set up the configuration for ViTHybrid
config = ViTHybridConfig(
    backbone="inception_v3",  # Specify the backbone
    use_timm_backbone=True,   # Use timm backbone
    use_pretrained_backbone=False,  # Use pretrained backbone
    num_labels=4,  # Specify the number of classes for your task
    image_size=500,  # Specify the input image size
    patch_size=14, # Default 1 value
    backbone_featmap_shape=[16, 2048, 14, 14]
)

# Initialize the model with the config
model = ViTHybridForImageClassification(config)
#model = ViTHybridModel(config)

# Manually set the backbone in the model (Workaround)
model.vit.embeddings.patch_embeddings.backbone = inception_backbone

# Define Learner
learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(), metrics=accuracy)

# Here callbacks are added with fine tuning model
#learn.fine_tune(5)
learn.fit_one_cycle(10, 1e-3)
1 Like