about input channels of vitstr

I am reproducing the vit-tiny model but have a problem when testing on III5k.It prompts unmatched number of channels.Then i find the input images are RGB mode but the input channels of vitstr is just one.Why is this problem?I'm looking forward to your reply.
Thank you

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
/tmp/ipykernel_53850/3528011160.py in <module>
    296     opt.num_gpu = torch.cuda.device_count()
    297 
--> 298     test(opt)

/tmp/ipykernel_53850/3528011160.py in test(opt)
    253     with torch.no_grad():
    254 
--> 255         _, accuracy_by_best_model, norm_ED_by_best_model, _, _, _, infer_time, length_of_data = validation(
    256             model, criterion, test_loader, converter, opt)
    257         print(f'{accuracy_by_best_model:0.3f}')

/tmp/ipykernel_53850/3528011160.py in validation(model, criterion, evaluation_loader, converter, opt)
     39         start_time = time.time()
     40 
---> 41         preds = model(image, seqlen=converter.batch_max_length,is_train=False)
     42         _, preds_index = preds.topk(1, dim=-1, largest=True, sorted=True)
     43         forward_time = time.time() - start_time

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/model.py in forward(self, input, is_train, seqlen)
     44 
     45         """ Prediction stage """
---> 46         prediction = self.vitstr(input, seqlen=seqlen)
     47 
     48         return prediction

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/modules/Vistr.py in forward(self, x, seqlen)
     73 
     74     def forward(self, x, seqlen: int =25):
---> 75         x = self.forward_features(x)
     76         x = x[:, :seqlen]
     77 

~/modules/Vistr.py in forward_features(self, x)
     59     def forward_features(self, x):
     60         B = x.shape[0]
---> 61         x = self.patch_embed(x)
     62 
     63         cls_tokens = self.cls_token.expand(B, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/miniconda3/lib/python3.8/site-packages/timm/models/layers/patch_embed.py in forward(self, x)
     33         _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).")
     34         _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).")
---> 35         x = self.proj(x)
     36         if self.flatten:
     37             x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/conv.py in forward(self, input)
    421 
    422     def forward(self, input: Tensor) -> Tensor:
--> 423         return self._conv_forward(input, self.weight)
    424 
    425 class Conv3d(_ConvNd):

~/miniconda3/lib/python3.8/site-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight)
    417                             weight, self.bias, self.stride,
    418                             _pair(0), self.dilation, self.groups)
--> 419         return F.conv2d(input, weight, self.bias, self.stride,
    420                         self.padding, self.dilation, self.groups)
    421 

RuntimeError: Given groups=1, weight of size [192, 1, 16, 16], expected input[16, 3, 224, 224] to have 1 channels, but got 3 channels instead

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

about input channels of vitstr #28

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

about input channels of vitstr #28

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions