I have created an AutoModelForAudioClassification from SEWConfig. My goal is to feed it the [batchsize, 9600] input I obtained from other layers of my model class.
When I pass a random input, eg AutoModelForAudioClassification.from_config(SEWConfig())(torch.randn((128, 9600))), it runs fine.
But, when I use it within a class, it throws RuntimeError: you can only change requires_grad flags of leaf variables.
.
I can’t find any other forum similar to this issue.
My code is below:
class CNNTrans(nn.Module):
def __init__(self, transformer, n_features):
super(CNNTrans, self).__init__()
self.n_features=n_features
cnns=nn.Sequential(
nn.Conv1d(1, 2, kernel_size=3, stride=1),
nn.BatchNorm1d(2),
nn.ReLU(),
nn.Conv1d(2, 8, kernel_size=3, stride=1),
nn.BatchNorm1d(8),
nn.ReLU(),
nn.Conv1d(8, 24, kernel_size=3, stride=1),
nn.BatchNorm1d(24),
nn.ReLU(),
nn.Conv1d(24, 48, kernel_size=4, stride=1),
nn.BatchNorm1d(48),
nn.ReLU(),
nn.Conv1d(48, 96, kernel_size=4, stride=1),
nn.BatchNorm1d(96),
nn.ReLU(),
)
self.layers=nn.ModuleDict()
for i in range(self.n_features):
self.layers[f'feature_{i}']=copy.deepcopy(cnns)
self.cnn_dropout=nn.Dropout(0.1)
self.trans=transformer
def forward(self, x):
outp=[]
for i in range(self.n_features):
outp.append(self.layers[f'feature_{i}'](x[:,i:i+1,:]))
x=torch.concat(outp, dim=1).squeeze()
x=self.trans(x) # Throws Error. the shape of x here is [BatchSize, 9600]
return x.logits
For transformer argument, I pass the AutoModelForAudioClassification as explained above.
What works?
model.trans(torch.randn((32, 9600))
What doesn’t work?
x=self.trans(x) # x[32, 9600]
Full trace:
/usr/local/lib/python3.7/dist-packages/transformers/models/sew/modeling_sew.py in forward(self, input_values, attention_mask, output_attentions, output_hidden_states, return_dict, labels)
1180 output_attentions=output_attentions,
1181 output_hidden_states=output_hidden_states,
-> 1182 return_dict=return_dict,
1183 )
1184
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/sew/modeling_sew.py in forward(self, input_values, attention_mask, mask_time_indices, output_attentions, output_hidden_states, return_dict)
931 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
932
--> 933 extract_features = self.feature_extractor(input_values)
934 extract_features = extract_features.transpose(1, 2)
935 extract_features = self.layer_norm(extract_features)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1109 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1110 return forward_call(*input, **kwargs)
1111 # Do not call functions when jit is used
1112 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/transformers/models/sew/modeling_sew.py in forward(self, input_values)
365 # make sure hidden_states require grad for gradient_checkpointing
366 if self._requires_grad and self.training:
--> 367 hidden_states.requires_grad = True
368
369 for conv_layer in self.conv_layers:
RuntimeError: you can only change requires_grad flags of leaf variables.
Just for a check, I have also tried loading models with other configs but everything behaves the same.
At this point, I don’t know if it’s a bug or am I doing something wrong here. Any help would be appreciated.