diff --git a/yolox/data/datasets/voc.py b/yolox/data/datasets/voc.py index 214a33b..425983f 100644 --- a/yolox/data/datasets/voc.py +++ b/yolox/data/datasets/voc.py @@ -144,7 +144,7 @@ def pull_item(self, index): target = self.load_anno(index) - img_info = (width, height) + img_info = (height, width) return img, target, img_info, index diff --git a/yolox/models/yolo_head.py b/yolox/models/yolo_head.py index d0a08f7..8a0ccec 100644 --- a/yolox/models/yolo_head.py +++ b/yolox/models/yolo_head.py @@ -205,7 +205,7 @@ def get_output_and_grid(self, output, k, stride): n_ch = 5 + self.num_classes hsize, wsize = output.shape[-2:] if grid.shape[2:4] != output.shape[2:4]: - xv, yv = meshgrid(F.arange(hsize), F.arange(wsize)) + xv, yv = meshgrid(F.arange(wsize), F.arange(hsize)) grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize, 2) self.grids[k] = grid @@ -223,7 +223,7 @@ def decode_outputs(self, outputs): grids = [] strides = [] for (hsize, wsize), stride in zip(self.hw, self.strides): - xv, yv = meshgrid(F.arange(hsize), F.arange(wsize)) + xv, yv = meshgrid(F.arange(wsize), F.arange(hsize)) grid = F.stack((xv, yv), 2).reshape(1, -1, 2) grids.append(grid) shape = grid.shape[:2]