i am using the off the shelf Hubert for CTC model with just one change i.e vocab_size=60 and it is not working (predicting all tokens to be pad token) while it is working with 32. what could be the solution?
class ASR(nn.Module):
def _init_(self, vocab_size = 32, ignore_mismatched_sizes = True, *args, **kwargs):
super(ASR, self)._init_(*args, **kwargs)
self.hubert_ctc = HubertForCTC.from_pretrained(“facebook/hubert-large-ls960-ft”, vocab_size=vocab_size, ignore_mismatched_sizes=ignore_mismatched_sizes)def forward(self, input_values, attention_mask=None, labels=None,**kwargs): if(attention_mask is None): attention_mask = torch.ones_like(input_values) out = self.hubert_ctc(input_values, attention_mask, labels=labels, return_dict=True) return out.loss,out.logits
I have checked the logits-
predicted_ids = torch.argmax(logits, dim=-1)
dec = self.processor.batch_decode(predicted_ids,skip_special_tokens=True)
all predicted ids are coming to be zero(pad id)
but this works perfectly fine with vocab_size=32