Model input shape doesnt match

Hello,

with the following Code

from diffusers import UNet1DModel
import torch
import torch.nn as nn
class ClassConditionedUned(nn.Module):
    def __init__(self, num_ela=8, class_emb_size=4):
        super().__init__()
        self.class_emb = nn.Sequential(
            nn.Linear(num_ela, 32),
            nn.ReLU(),
            nn.Linear(32, class_emb_size)
        )
        self.model = UNet1DModel(
            sample_size=512,
            in_channels=1+class_emb_size,
            out_channels=1,
            layers_per_block=1,  
            block_out_channels = (32, 32, 64),   
            down_block_types = ("DownBlock1DNoSkip", "DownBlock1D", "AttnDownBlock1D"),
            up_block_types = ("AttnUpBlock1D", "UpBlock1D", "UpBlock1DNoSkip"),   
        )
        
    def forward(self, x, t, ela_vec):
        bs, ch, h = x.shape
        class_cond = self.class_emb(ela_vec) # Map to embedding dimension
        class_cond = class_cond.view(bs, -1, 1).expand(-1, -1, h)
        net_input = torch.cat((x, class_cond), 1)
        print(net_input.shape)
        return self.model(net_input, t).sample

model = ClassConditionedUned()
x = torch.randn(1, 1, 512)
t = torch.randint(0, 1000, (1,))
ela_vec = torch.rand(1, 8)  # normalisierte ELA-Vektoren

with torch.no_grad():
    out = model(x, t, ela_vec)

i get this error:
out = model(x, t, ela_vec)
^^^^^^^^^^^^^^^^^^^^
RuntimeError: Given groups=1, weight of size [32, 5, 1], expected input[1, 21, 512] to have 5 channels, but got 21 channels instead

What am i doing wrong?

Thank you in advance

1 Like

this solvers my issue: https://github.com/huggingface/diffusers/issues/2967#issuecomment-1500800012
i had to add 16 to the input channels

1 Like

This topic was automatically closed 12 hours after the last reply. New replies are no longer allowed.