diff --git a/preprocess/humanparsing/datasets/datasets.py b/preprocess/humanparsing/datasets/datasets.py deleted file mode 100644 index 433f15a..0000000 --- a/preprocess/humanparsing/datasets/datasets.py +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : datasets.py -@Time : 8/4/19 3:35 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import os -import numpy as np -import random -import torch -import cv2 -from torch.utils import data -from utils.transforms import get_affine_transform - - -class LIPDataSet(data.Dataset): - def __init__(self, root, dataset, crop_size=[473, 473], scale_factor=0.25, - rotation_factor=30, ignore_label=255, transform=None): - self.root = root - self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] - self.crop_size = np.asarray(crop_size) - self.ignore_label = ignore_label - self.scale_factor = scale_factor - self.rotation_factor = rotation_factor - self.flip_prob = 0.5 - self.transform = transform - self.dataset = dataset - - list_path = os.path.join(self.root, self.dataset + '_id.txt') - train_list = [i_id.strip() for i_id in open(list_path)] - - self.train_list = train_list - self.number_samples = len(self.train_list) - - def __len__(self): - return self.number_samples - - def _box2cs(self, box): - x, y, w, h = box[:4] - return self._xywh2cs(x, y, w, h) - - def _xywh2cs(self, x, y, w, h): - center = np.zeros((2), dtype=np.float32) - center[0] = x + w * 0.5 - center[1] = y + h * 0.5 - if w > self.aspect_ratio * h: - h = w * 1.0 / self.aspect_ratio - elif w < self.aspect_ratio * h: - w = h * self.aspect_ratio - scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) - return center, scale - - def __getitem__(self, index): - train_item = self.train_list[index] - - im_path = os.path.join(self.root, self.dataset + '_images', train_item + '.jpg') - parsing_anno_path = os.path.join(self.root, self.dataset + '_segmentations', train_item + '.png') - - im = cv2.imread(im_path, cv2.IMREAD_COLOR) - h, w, _ = im.shape - parsing_anno = np.zeros((h, w), dtype=np.long) - - # Get person center and scale - person_center, s = self._box2cs([0, 0, w - 1, h - 1]) - r = 0 - - if self.dataset != 'test': - # Get pose annotation - parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) - if self.dataset == 'train' or self.dataset == 'trainval': - sf = self.scale_factor - rf = self.rotation_factor - s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) - r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 - - if random.random() <= self.flip_prob: - im = im[:, ::-1, :] - parsing_anno = parsing_anno[:, ::-1] - person_center[0] = im.shape[1] - person_center[0] - 1 - right_idx = [15, 17, 19] - left_idx = [14, 16, 18] - for i in range(0, 3): - right_pos = np.where(parsing_anno == right_idx[i]) - left_pos = np.where(parsing_anno == left_idx[i]) - parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] - parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] - - trans = get_affine_transform(person_center, s, r, self.crop_size) - input = cv2.warpAffine( - im, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0, 0, 0)) - - if self.transform: - input = self.transform(input) - - meta = { - 'name': train_item, - 'center': person_center, - 'height': h, - 'width': w, - 'scale': s, - 'rotation': r - } - - if self.dataset == 'val' or self.dataset == 'test': - return input, meta - else: - label_parsing = cv2.warpAffine( - parsing_anno, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(255)) - - label_parsing = torch.from_numpy(label_parsing) - - return input, label_parsing, meta - - -class LIPDataValSet(data.Dataset): - def __init__(self, root, dataset='val', crop_size=[473, 473], transform=None, flip=False): - self.root = root - self.crop_size = crop_size - self.transform = transform - self.flip = flip - self.dataset = dataset - self.root = root - self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] - self.crop_size = np.asarray(crop_size) - - list_path = os.path.join(self.root, self.dataset + '_id.txt') - val_list = [i_id.strip() for i_id in open(list_path)] - - self.val_list = val_list - self.number_samples = len(self.val_list) - - def __len__(self): - return len(self.val_list) - - def _box2cs(self, box): - x, y, w, h = box[:4] - return self._xywh2cs(x, y, w, h) - - def _xywh2cs(self, x, y, w, h): - center = np.zeros((2), dtype=np.float32) - center[0] = x + w * 0.5 - center[1] = y + h * 0.5 - if w > self.aspect_ratio * h: - h = w * 1.0 / self.aspect_ratio - elif w < self.aspect_ratio * h: - w = h * self.aspect_ratio - scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) - - return center, scale - - def __getitem__(self, index): - val_item = self.val_list[index] - # Load training image - im_path = os.path.join(self.root, self.dataset + '_images', val_item + '.jpg') - im = cv2.imread(im_path, cv2.IMREAD_COLOR) - h, w, _ = im.shape - # Get person center and scale - person_center, s = self._box2cs([0, 0, w - 1, h - 1]) - r = 0 - trans = get_affine_transform(person_center, s, r, self.crop_size) - input = cv2.warpAffine( - im, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0, 0, 0)) - input = self.transform(input) - flip_input = input.flip(dims=[-1]) - if self.flip: - batch_input_im = torch.stack([input, flip_input]) - else: - batch_input_im = input - - meta = { - 'name': val_item, - 'center': person_center, - 'height': h, - 'width': w, - 'scale': s, - 'rotation': r - } - - return batch_input_im, meta diff --git a/preprocess/humanparsing/datasets/target_generation.py b/preprocess/humanparsing/datasets/target_generation.py deleted file mode 100644 index 8524db4..0000000 --- a/preprocess/humanparsing/datasets/target_generation.py +++ /dev/null @@ -1,40 +0,0 @@ -import torch -from torch.nn import functional as F - - -def generate_edge_tensor(label, edge_width=3): - label = label.type(torch.cuda.FloatTensor) - if len(label.shape) == 2: - label = label.unsqueeze(0) - n, h, w = label.shape - edge = torch.zeros(label.shape, dtype=torch.float).cuda() - # right - edge_right = edge[:, 1:h, :] - edge_right[(label[:, 1:h, :] != label[:, :h - 1, :]) & (label[:, 1:h, :] != 255) - & (label[:, :h - 1, :] != 255)] = 1 - - # up - edge_up = edge[:, :, :w - 1] - edge_up[(label[:, :, :w - 1] != label[:, :, 1:w]) - & (label[:, :, :w - 1] != 255) - & (label[:, :, 1:w] != 255)] = 1 - - # upright - edge_upright = edge[:, :h - 1, :w - 1] - edge_upright[(label[:, :h - 1, :w - 1] != label[:, 1:h, 1:w]) - & (label[:, :h - 1, :w - 1] != 255) - & (label[:, 1:h, 1:w] != 255)] = 1 - - # bottomright - edge_bottomright = edge[:, :h - 1, 1:w] - edge_bottomright[(label[:, :h - 1, 1:w] != label[:, 1:h, :w - 1]) - & (label[:, :h - 1, 1:w] != 255) - & (label[:, 1:h, :w - 1] != 255)] = 1 - - kernel = torch.ones((1, 1, edge_width, edge_width), dtype=torch.float).cuda() - with torch.no_grad(): - edge = edge.unsqueeze(1) - edge = F.conv2d(edge, kernel, stride=1, padding=1) - edge[edge!=0] = 1 - edge = edge.squeeze() - return edge diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py deleted file mode 100644 index 8eccb3a..0000000 --- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/human_to_coco.py +++ /dev/null @@ -1,166 +0,0 @@ -import argparse -import datetime -import json -import os -from PIL import Image -import numpy as np - -import pycococreatortools - - -def get_arguments(): - parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation") - parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)") - parser.add_argument("--json_save_dir", type=str, default='../data/msrcnn_finetune_annotations', - help="path to save coco-style annotation json file") - parser.add_argument("--use_val", type=bool, default=False, - help="use train+val set for finetuning or not") - parser.add_argument("--train_img_dir", type=str, default='../data/instance-level_human_parsing/Training/Images', - help="train image path") - parser.add_argument("--train_anno_dir", type=str, - default='../data/instance-level_human_parsing/Training/Human_ids', - help="train human mask path") - parser.add_argument("--val_img_dir", type=str, default='../data/instance-level_human_parsing/Validation/Images', - help="val image path") - parser.add_argument("--val_anno_dir", type=str, - default='../data/instance-level_human_parsing/Validation/Human_ids', - help="val human mask path") - return parser.parse_args() - - -def main(args): - INFO = { - "description": args.split_name + " Dataset", - "url": "", - "version": "", - "year": 2019, - "contributor": "xyq", - "date_created": datetime.datetime.utcnow().isoformat(' ') - } - - LICENSES = [ - { - "id": 1, - "name": "", - "url": "" - } - ] - - CATEGORIES = [ - { - 'id': 1, - 'name': 'person', - 'supercategory': 'person', - }, - ] - - coco_output = { - "info": INFO, - "licenses": LICENSES, - "categories": CATEGORIES, - "images": [], - "annotations": [] - } - - image_id = 1 - segmentation_id = 1 - - for image_name in os.listdir(args.train_img_dir): - image = Image.open(os.path.join(args.train_img_dir, image_name)) - image_info = pycococreatortools.create_image_info( - image_id, image_name, image.size - ) - coco_output["images"].append(image_info) - - human_mask_name = os.path.splitext(image_name)[0] + '.png' - human_mask = np.asarray(Image.open(os.path.join(args.train_anno_dir, human_mask_name))) - human_gt_labels = np.unique(human_mask) - - for i in range(1, len(human_gt_labels)): - category_info = {'id': 1, 'is_crowd': 0} - binary_mask = np.uint8(human_mask == i) - annotation_info = pycococreatortools.create_annotation_info( - segmentation_id, image_id, category_info, binary_mask, - image.size, tolerance=10 - ) - if annotation_info is not None: - coco_output["annotations"].append(annotation_info) - - segmentation_id += 1 - image_id += 1 - - if not os.path.exists(args.json_save_dir): - os.makedirs(args.json_save_dir) - if not args.use_val: - with open('{}/{}_train.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file: - json.dump(coco_output, output_json_file) - else: - for image_name in os.listdir(args.val_img_dir): - image = Image.open(os.path.join(args.val_img_dir, image_name)) - image_info = pycococreatortools.create_image_info( - image_id, image_name, image.size - ) - coco_output["images"].append(image_info) - - human_mask_name = os.path.splitext(image_name)[0] + '.png' - human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name))) - human_gt_labels = np.unique(human_mask) - - for i in range(1, len(human_gt_labels)): - category_info = {'id': 1, 'is_crowd': 0} - binary_mask = np.uint8(human_mask == i) - annotation_info = pycococreatortools.create_annotation_info( - segmentation_id, image_id, category_info, binary_mask, - image.size, tolerance=10 - ) - if annotation_info is not None: - coco_output["annotations"].append(annotation_info) - - segmentation_id += 1 - image_id += 1 - - with open('{}/{}_trainval.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file: - json.dump(coco_output, output_json_file) - - coco_output_val = { - "info": INFO, - "licenses": LICENSES, - "categories": CATEGORIES, - "images": [], - "annotations": [] - } - - image_id_val = 1 - segmentation_id_val = 1 - - for image_name in os.listdir(args.val_img_dir): - image = Image.open(os.path.join(args.val_img_dir, image_name)) - image_info = pycococreatortools.create_image_info( - image_id_val, image_name, image.size - ) - coco_output_val["images"].append(image_info) - - human_mask_name = os.path.splitext(image_name)[0] + '.png' - human_mask = np.asarray(Image.open(os.path.join(args.val_anno_dir, human_mask_name))) - human_gt_labels = np.unique(human_mask) - - for i in range(1, len(human_gt_labels)): - category_info = {'id': 1, 'is_crowd': 0} - binary_mask = np.uint8(human_mask == i) - annotation_info = pycococreatortools.create_annotation_info( - segmentation_id_val, image_id_val, category_info, binary_mask, - image.size, tolerance=10 - ) - if annotation_info is not None: - coco_output_val["annotations"].append(annotation_info) - - segmentation_id_val += 1 - image_id_val += 1 - - with open('{}/{}_val.json'.format(args.json_save_dir, args.split_name), 'w') as output_json_file_val: - json.dump(coco_output_val, output_json_file_val) - - -if __name__ == "__main__": - args = get_arguments() - main(args) diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py deleted file mode 100644 index 3f3d833..0000000 --- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/pycococreatortools.py +++ /dev/null @@ -1,114 +0,0 @@ -import re -import datetime -import numpy as np -from itertools import groupby -from skimage import measure -from PIL import Image -from pycocotools import mask - -convert = lambda text: int(text) if text.isdigit() else text.lower() -natrual_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] - - -def resize_binary_mask(array, new_size): - image = Image.fromarray(array.astype(np.uint8) * 255) - image = image.resize(new_size) - return np.asarray(image).astype(np.bool_) - - -def close_contour(contour): - if not np.array_equal(contour[0], contour[-1]): - contour = np.vstack((contour, contour[0])) - return contour - - -def binary_mask_to_rle(binary_mask): - rle = {'counts': [], 'size': list(binary_mask.shape)} - counts = rle.get('counts') - for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))): - if i == 0 and value == 1: - counts.append(0) - counts.append(len(list(elements))) - - return rle - - -def binary_mask_to_polygon(binary_mask, tolerance=0): - """Converts a binary mask to COCO polygon representation - Args: - binary_mask: a 2D binary numpy array where '1's represent the object - tolerance: Maximum distance from original points of polygon to approximated - polygonal chain. If tolerance is 0, the original coordinate array is returned. - """ - polygons = [] - # pad mask to close contours of shapes which start and end at an edge - padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0) - contours = measure.find_contours(padded_binary_mask, 0.5) - contours = np.subtract(contours, 1) - for contour in contours: - contour = close_contour(contour) - contour = measure.approximate_polygon(contour, tolerance) - if len(contour) < 3: - continue - contour = np.flip(contour, axis=1) - segmentation = contour.ravel().tolist() - # after padding and subtracting 1 we may get -0.5 points in our segmentation - segmentation = [0 if i < 0 else i for i in segmentation] - polygons.append(segmentation) - - return polygons - - -def create_image_info(image_id, file_name, image_size, - date_captured=datetime.datetime.utcnow().isoformat(' '), - license_id=1, coco_url="", flickr_url=""): - image_info = { - "id": image_id, - "file_name": file_name, - "width": image_size[0], - "height": image_size[1], - "date_captured": date_captured, - "license": license_id, - "coco_url": coco_url, - "flickr_url": flickr_url - } - - return image_info - - -def create_annotation_info(annotation_id, image_id, category_info, binary_mask, - image_size=None, tolerance=2, bounding_box=None): - if image_size is not None: - binary_mask = resize_binary_mask(binary_mask, image_size) - - binary_mask_encoded = mask.encode(np.asfortranarray(binary_mask.astype(np.uint8))) - - area = mask.area(binary_mask_encoded) - if area < 1: - return None - - if bounding_box is None: - bounding_box = mask.toBbox(binary_mask_encoded) - - if category_info["is_crowd"]: - is_crowd = 1 - segmentation = binary_mask_to_rle(binary_mask) - else: - is_crowd = 0 - segmentation = binary_mask_to_polygon(binary_mask, tolerance) - if not segmentation: - return None - - annotation_info = { - "id": annotation_id, - "image_id": image_id, - "category_id": category_info["id"], - "iscrowd": is_crowd, - "area": area.tolist(), - "bbox": bounding_box.tolist(), - "segmentation": segmentation, - "width": binary_mask.shape[1], - "height": binary_mask.shape[0], - } - - return annotation_info diff --git a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py b/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py deleted file mode 100644 index 1733918..0000000 --- a/preprocess/humanparsing/mhp_extension/coco_style_annotation_creator/test_human2coco_format.py +++ /dev/null @@ -1,74 +0,0 @@ -import argparse -import datetime -import json -import os -from PIL import Image - -import pycococreatortools - - -def get_arguments(): - parser = argparse.ArgumentParser(description="transform mask annotation to coco annotation") - parser.add_argument("--dataset", type=str, default='CIHP', help="name of dataset (CIHP, MHPv2 or VIP)") - parser.add_argument("--json_save_dir", type=str, default='../data/CIHP/annotations', - help="path to save coco-style annotation json file") - parser.add_argument("--test_img_dir", type=str, default='../data/CIHP/Testing/Images', - help="test image path") - return parser.parse_args() - -args = get_arguments() - -INFO = { - "description": args.dataset + "Dataset", - "url": "", - "version": "", - "year": 2020, - "contributor": "yunqiuxu", - "date_created": datetime.datetime.utcnow().isoformat(' ') -} - -LICENSES = [ - { - "id": 1, - "name": "", - "url": "" - } -] - -CATEGORIES = [ - { - 'id': 1, - 'name': 'person', - 'supercategory': 'person', - }, -] - - -def main(args): - coco_output = { - "info": INFO, - "licenses": LICENSES, - "categories": CATEGORIES, - "images": [], - "annotations": [] - } - - image_id = 1 - - for image_name in os.listdir(args.test_img_dir): - image = Image.open(os.path.join(args.test_img_dir, image_name)) - image_info = pycococreatortools.create_image_info( - image_id, image_name, image.size - ) - coco_output["images"].append(image_info) - image_id += 1 - - if not os.path.exists(os.path.join(args.json_save_dir)): - os.mkdir(os.path.join(args.json_save_dir)) - - with open('{}/{}.json'.format(args.json_save_dir, args.dataset), 'w') as output_json_file: - json.dump(coco_output, output_json_file) - - -if __name__ == "__main__": - main(args) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml b/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml deleted file mode 100644 index 6c60588..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.circleci/config.yml +++ /dev/null @@ -1,179 +0,0 @@ -# Python CircleCI 2.0 configuration file -# -# Check https://circleci.com/docs/2.0/language-python/ for more details -# -version: 2 - -# ------------------------------------------------------------------------------------- -# Environments to run the jobs in -# ------------------------------------------------------------------------------------- -cpu: &cpu - docker: - - image: circleci/python:3.6.8-stretch - resource_class: medium - -gpu: &gpu - machine: - image: ubuntu-1604:201903-01 - docker_layer_caching: true - resource_class: gpu.small - -# ------------------------------------------------------------------------------------- -# Re-usable commands -# ------------------------------------------------------------------------------------- -install_python: &install_python - - run: - name: Install Python - working_directory: ~/ - command: | - pyenv install 3.6.1 - pyenv global 3.6.1 - -setup_venv: &setup_venv - - run: - name: Setup Virtual Env - working_directory: ~/ - command: | - python -m venv ~/venv - echo ". ~/venv/bin/activate" >> $BASH_ENV - . ~/venv/bin/activate - python --version - which python - which pip - pip install --upgrade pip - -install_dep: &install_dep - - run: - name: Install Dependencies - command: | - pip install --progress-bar off -U 'git+https://github.com/facebookresearch/fvcore' - pip install --progress-bar off cython opencv-python - pip install --progress-bar off 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' - pip install --progress-bar off torch torchvision - -install_detectron2: &install_detectron2 - - run: - name: Install Detectron2 - command: | - gcc --version - pip install -U --progress-bar off -e .[dev] - python -m detectron2.utils.collect_env - -install_nvidia_driver: &install_nvidia_driver - - run: - name: Install nvidia driver - working_directory: ~/ - command: | - wget -q 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-430.40.run' - sudo /bin/bash ./NVIDIA-Linux-x86_64-430.40.run -s --no-drm - nvidia-smi - -run_unittests: &run_unittests - - run: - name: Run Unit Tests - command: | - python -m unittest discover -v -s tests - -# ------------------------------------------------------------------------------------- -# Jobs to run -# ------------------------------------------------------------------------------------- -jobs: - cpu_tests: - <<: *cpu - - working_directory: ~/detectron2 - - steps: - - checkout - - <<: *setup_venv - - # Cache the venv directory that contains dependencies - - restore_cache: - keys: - - cache-key-{{ .Branch }}-ID-20200425 - - - <<: *install_dep - - - save_cache: - paths: - - ~/venv - key: cache-key-{{ .Branch }}-ID-20200425 - - - <<: *install_detectron2 - - - run: - name: isort - command: | - isort -c -sp . - - run: - name: black - command: | - black --check -l 100 . - - run: - name: flake8 - command: | - flake8 . - - - <<: *run_unittests - - gpu_tests: - <<: *gpu - - working_directory: ~/detectron2 - - steps: - - checkout - - <<: *install_nvidia_driver - - - run: - name: Install nvidia-docker - working_directory: ~/ - command: | - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ - sudo tee /etc/apt/sources.list.d/nvidia-docker.list - sudo apt-get update && sudo apt-get install -y nvidia-docker2 - # reload the docker daemon configuration - sudo pkill -SIGHUP dockerd - - - run: - name: Launch docker - working_directory: ~/detectron2/docker - command: | - nvidia-docker build -t detectron2:v0 -f Dockerfile-circleci . - nvidia-docker run -itd --name d2 detectron2:v0 - docker exec -it d2 nvidia-smi - - - run: - name: Build Detectron2 - command: | - docker exec -it d2 pip install 'git+https://github.com/facebookresearch/fvcore' - docker cp ~/detectron2 d2:/detectron2 - # This will build d2 for the target GPU arch only - docker exec -it d2 pip install -e /detectron2 - docker exec -it d2 python3 -m detectron2.utils.collect_env - docker exec -it d2 python3 -c 'import torch; assert(torch.cuda.is_available())' - - - run: - name: Run Unit Tests - command: | - docker exec -e CIRCLECI=true -it d2 python3 -m unittest discover -v -s /detectron2/tests - -workflows: - version: 2 - regular_test: - jobs: - - cpu_tests - - gpu_tests - - #nightly_test: - #jobs: - #- gpu_tests - #triggers: - #- schedule: - #cron: "0 0 * * *" - #filters: - #branches: - #only: - #- master diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.clang-format b/preprocess/humanparsing/mhp_extension/detectron2/.clang-format deleted file mode 100644 index a757d4f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.clang-format +++ /dev/null @@ -1,85 +0,0 @@ -AccessModifierOffset: -1 -AlignAfterOpenBracket: AlwaysBreak -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlinesLeft: true -AlignOperands: false -AlignTrailingComments: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Empty -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: true -BinPackArguments: false -BinPackParameters: false -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - BeforeCatch: false - BeforeElse: false - IndentBraces: false -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Attach -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: false -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: true -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: true -DerivePointerAlignment: false -DisableFormat: false -ForEachMacros: [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ] -IncludeCategories: - - Regex: '^<.*\.h(pp)?>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IndentCaseLabels: true -IndentWidth: 2 -IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: false -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Left -ReflowComments: true -SortIncludes: true -SpaceAfterCStyleCast: false -SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: ControlStatements -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: true -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp11 -TabWidth: 8 -UseTab: Never diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.flake8 b/preprocess/humanparsing/mhp_extension/detectron2/.flake8 deleted file mode 100644 index 0cc61b7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.flake8 +++ /dev/null @@ -1,9 +0,0 @@ -# This is an example .flake8 config, used when developing *Black* itself. -# Keep in sync with setup.cfg which is used for source packages. - -[flake8] -ignore = W503, E203, E221, C901, C408, E741 -max-line-length = 100 -max-complexity = 18 -select = B,C,E,F,W,T4,B9 -exclude = build,__init__.py diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md deleted file mode 100644 index 0f7ad8b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,5 +0,0 @@ -# Code of Conduct - -Facebook has adopted a Code of Conduct that we expect project participants to adhere to. -Please read the [full text](https://code.fb.com/codeofconduct/) -so that you can understand what actions will and will not be tolerated. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md deleted file mode 100644 index 81936df..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/CONTRIBUTING.md +++ /dev/null @@ -1,49 +0,0 @@ -# Contributing to detectron2 - -## Issues -We use GitHub issues to track public bugs and questions. -Please make sure to follow one of the -[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose) -when reporting any issues. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## Pull Requests -We actively welcome your pull requests. - -However, if you're adding any significant features (e.g. > 50 lines), please -make sure to have a corresponding issue to discuss your motivation and proposals, -before sending a PR. We do not always accept new features, and we take the following -factors into consideration: - -1. Whether the same feature can be achieved without modifying detectron2. -Detectron2 is designed so that you can implement many extensions from the outside, e.g. -those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects). -If some part is not as extensible, you can also bring up the issue to make it more extensible. -2. Whether the feature is potentially useful to a large audience, or only to a small portion of users. -3. Whether the proposed solution has a good design / interface. -4. Whether the proposed solution adds extra mental/practical overhead to users who don't - need such feature. -5. Whether the proposed solution breaks existing APIs. - -When sending a PR, please do: - -1. If a PR contains multiple orthogonal changes, split it to several PRs. -2. If you've added code that should be tested, add tests. -3. For PRs that need experiments (e.g. adding a new model or new methods), - you don't need to update model zoo, but do provide experiment results in the description of the PR. -4. If APIs are changed, update the documentation. -5. Make sure your code lints with `./dev/linter.sh`. - - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: - -## License -By contributing to detectron2, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg b/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg deleted file mode 100644 index eb2d643..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/Detectron2-Logo-Horz.svg +++ /dev/null @@ -1 +0,0 @@ -Detectron2-Logo-Horz \ No newline at end of file diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 5e8aaa2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,5 +0,0 @@ - -Please select an issue template from -https://github.com/facebookresearch/detectron2/issues/new/choose . - -Otherwise your issue will be closed. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md deleted file mode 100644 index 52d2998..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/bugs.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -name: "🐛 Bugs" -about: Report bugs in detectron2 -title: Please read & provide the following - ---- - -## Instructions To Reproduce the 🐛 Bug: - -1. what changes you made (`git diff`) or what code you wrote -``` - -``` -2. what exact command you run: -3. what you observed (including __full logs__): -``` - -``` -4. please simplify the steps as much as possible so they do not require additional resources to - run, such as a private dataset. - -## Expected behavior: - -If there are no obvious error in "what you observed" provided above, -please tell us the expected behavior. - -## Environment: - -Provide your environment information using the following command: -``` -wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py -``` - -If your issue looks like an installation issue / environment issue, -please first try to solve it yourself with the instructions in -https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index c19e249..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,9 +0,0 @@ -# require an issue template to be chosen -blank_issues_enabled: false - -# Unexpected behaviors & bugs are split to two templates. -# When they are one template, users think "it's not a bug" and don't choose the template. -# -# But the file name is still "unexpected-problems-bugs.md" so that old references -# to this issue template still works. -# It's ok since this template should be a superset of "bugs.md" (unexpected behaviors is a superset of bugs) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md deleted file mode 100644 index dd69a33..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/feature-request.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -name: "\U0001F680Feature Request" -about: Submit a proposal/request for a new detectron2 feature - ---- - -## 🚀 Feature -A clear and concise description of the feature proposal. - - -## Motivation & Examples - -Tell us why the feature is useful. - -Describe what the feature would look like, if it is implemented. -Best demonstrated using **code examples** in addition to words. - -## Note - -We only consider adding new features if they are relevant to many users. - -If you request implementation of research papers -- -we only consider papers that have enough significance and prevalance in the object detection field. - -We do not take requests for most projects in the `projects/` directory, -because they are research code release that is mainly for other researchers to reproduce results. - -Instead of adding features inside detectron2, -you can implement many features by [extending detectron2](https://detectron2.readthedocs.io/tutorials/extend.html). -The [projects/](https://github.com/facebookresearch/detectron2/tree/master/projects/) directory contains many of such examples. - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md deleted file mode 100644 index 0811561..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/questions-help-support.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: "❓How to do something?" -about: How to do something using detectron2? What does an API do? - ---- - -## ❓ How to do something using detectron2 - -Describe what you want to do, including: -1. what inputs you will provide, if any: -2. what outputs you are expecting: - -## ❓ What does an API do and how to use it? -Please link to which API or documentation you're asking about from -https://detectron2.readthedocs.io/ - - -NOTE: - -1. Only general answers are provided. - If you want to ask about "why X did not work", please use the - [Unexpected behaviors](https://github.com/facebookresearch/detectron2/issues/new/choose) issue template. - -2. About how to implement new models / new dataloader / new training logic, etc., check documentation first. - -3. We do not answer general machine learning / computer vision questions that are not specific to detectron2, such as how a model works, how to improve your training/make it converge, or what algorithm/methods can be used to achieve X. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md deleted file mode 100644 index bafee7a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/ISSUE_TEMPLATE/unexpected-problems-bugs.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -name: "Unexpected behaviors" -about: Run into unexpected behaviors when using detectron2 -title: Please read & provide the following - ---- - -If you do not know the root cause of the problem, and wish someone to help you, please -post according to this template: - -## Instructions To Reproduce the Issue: - -1. what changes you made (`git diff`) or what code you wrote -``` - -``` -2. what exact command you run: -3. what you observed (including __full logs__): -``` - -``` -4. please simplify the steps as much as possible so they do not require additional resources to - run, such as a private dataset. - -## Expected behavior: - -If there are no obvious error in "what you observed" provided above, -please tell us the expected behavior. - -If you expect the model to converge / work better, note that we do not give suggestions -on how to train a new model. -Only in one of the two conditions we will help with it: -(1) You're unable to reproduce the results in detectron2 model zoo. -(2) It indicates a detectron2 bug. - -## Environment: - -Provide your environment information using the following command: -``` -wget -nc -q https://github.com/facebookresearch/detectron2/raw/master/detectron2/utils/collect_env.py && python collect_env.py -``` - -If your issue looks like an installation issue / environment issue, -please first try to solve it yourself with the instructions in -https://detectron2.readthedocs.io/tutorials/install.html#common-installation-issues diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md b/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md deleted file mode 100644 index 4ff5ea5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.github/pull_request_template.md +++ /dev/null @@ -1,9 +0,0 @@ -Thanks for your contribution! - -If you're sending a large PR (e.g., >50 lines), -please open an issue first about the feature / bug, and indicate how you want to contribute. - -Before submitting a PR, please run `dev/linter.sh` to lint the code. - -See https://detectron2.readthedocs.io/notes/contributing.html#pull-requests -about how we handle PRs. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/.gitignore b/preprocess/humanparsing/mhp_extension/detectron2/.gitignore deleted file mode 100644 index e85df4c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/.gitignore +++ /dev/null @@ -1,46 +0,0 @@ -# output dir -output -instant_test_output -inference_test_output - - -*.jpg -*.png -*.txt -*.json -*.diff - -# compilation and distribution -__pycache__ -_ext -*.pyc -*.so -detectron2.egg-info/ -build/ -dist/ -wheels/ - -# pytorch/python/numpy formats -*.pth -*.pkl -*.npy - -# ipython/jupyter notebooks -*.ipynb -**/.ipynb_checkpoints/ - -# Editor temporaries -*.swn -*.swo -*.swp -*~ - -# editor settings -.idea -.vscode - -# project dirs -/detectron2/model_zoo/configs -/datasets -/projects/*/datasets -/models diff --git a/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md b/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md deleted file mode 100644 index acaf13f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/GETTING_STARTED.md +++ /dev/null @@ -1,79 +0,0 @@ -## Getting Started with Detectron2 - -This document provides a brief intro of the usage of builtin command-line tools in detectron2. - -For a tutorial that involves actual coding with the API, -see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -which covers how to run inference with an -existing model, and how to train a builtin model on a custom dataset. - -For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html). - - -### Inference Demo with Pre-trained Models - -1. Pick a model and its config file from - [model zoo](MODEL_ZOO.md), - for example, `mask_rcnn_R_50_FPN_3x.yaml`. -2. We provide `demo.py` that is able to run builtin standard models. Run it with: -``` -cd demo/ -python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --input input1.jpg input2.jpg \ - [--other-options] - --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl -``` -The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation. -This command will run the inference and show visualizations in an OpenCV window. - -For details of the command line arguments, see `demo.py -h` or look at its source code -to understand its behavior. Some common arguments are: -* To run __on your webcam__, replace `--input files` with `--webcam`. -* To run __on a video__, replace `--input files` with `--video-input video.mp4`. -* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`. -* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`. - - -### Training & Evaluation in Command Line - -We provide a script in "tools/{,plain_}train_net.py", that is made to train -all the configs provided in detectron2. -You may want to use it as a reference to write your own training script. - -To train a model with "train_net.py", first -setup the corresponding datasets following -[datasets/README.md](./datasets/README.md), -then run: -``` -cd tools/ -./train_net.py --num-gpus 8 \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml -``` - -The configs are made for 8-GPU training. -To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.: -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 -``` - -For most models, CPU training is not supported. - -To evaluate a model's performance, use -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --eval-only MODEL.WEIGHTS /path/to/checkpoint_file -``` -For more options, see `./train_net.py -h`. - -### Use Detectron2 APIs in Your Code - -See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -to learn how to use detectron2 APIs to: -1. run inference with an existing model -2. train a builtin model on a custom dataset - -See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects) -for more ways to build your project on detectron2. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md b/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md deleted file mode 100644 index 3985f8a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/INSTALL.md +++ /dev/null @@ -1,184 +0,0 @@ -## Installation - -Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -has step-by-step instructions that install detectron2. -The [Dockerfile](docker) -also installs detectron2 with a few simple commands. - -### Requirements -- Linux or macOS with Python ≥ 3.6 -- PyTorch ≥ 1.4 -- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. - You can install them together at [pytorch.org](https://pytorch.org) to make sure of this. -- OpenCV, optional, needed by demo and visualization -- pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'` - - -### Build Detectron2 from Source - -gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build. -After having them, run: -``` -python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' -# (add --user if you don't have permission) - -# Or, to install it from a local clone: -git clone https://github.com/facebookresearch/detectron2.git -python -m pip install -e detectron2 - -# Or if you are on macOS -# CC=clang CXX=clang++ python -m pip install -e . -``` - -To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the -old build first. You often need to rebuild detectron2 after reinstalling PyTorch. - -### Install Pre-Built Detectron2 (Linux only) -``` -# for CUDA 10.1: -python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html -``` -You can replace cu101 with "cu{100,92}" or "cpu". - -Note that: -1. Such installation has to be used with certain version of official PyTorch release. - See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements. - It will not work with a different version of PyTorch or a non-official build of PyTorch. -2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be - compatible with the master branch of a research project that uses detectron2 (e.g. those in - [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)). - -### Common Installation Issues - -If you met issues using the pre-built detectron2, please uninstall it and try building it from source. - -Click each issue for its solutions: - -
- -Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library. - -
- -This usually happens when detectron2 or torchvision is not -compiled with the version of PyTorch you're running. - -Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch. -If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them -following [pytorch.org](http://pytorch.org). So the versions will match. - -If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases) -to see the corresponding pytorch version required for each pre-built detectron2. - -If the error comes from detectron2 or torchvision that you built manually from source, -remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment. - -If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env` -in your issue. -
- -
- -Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found. - -
-Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime. - -This often happens with old anaconda. -Try `conda update libgcc`. Then rebuild detectron2. - -The fundamental solution is to run the code with proper C++ runtime. -One way is to use `LD_PRELOAD=/path/to/libstdc++.so`. - -
- -
- -"Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available". - -
-CUDA is not found when building detectron2. -You should make sure - -``` -python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)' -``` - -print valid outputs at the time you build detectron2. - -Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config. -
- -
- -"invalid device function" or "no kernel image is available for execution". - -
-Two possibilities: - -* You build detectron2 with one version of CUDA but run it with a different version. - - To check whether it is the case, - use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. - In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" - to contain cuda libraries of the same version. - - When they are inconsistent, - you need to either install a different build of PyTorch (or build by yourself) - to match your local CUDA installation, or install a different version of CUDA to match PyTorch. - -* Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility). - - The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in - `python -m detectron2.utils.collect_env`. - - The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected - during compilation. This means the compiled code may not work on a different GPU model. - To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation. - - For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s. - Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out - the correct compute compatibility number for your device. - -
- -
- -Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures. - -
-The version of NVCC you use to build detectron2 or torchvision does -not match the version of CUDA you are running with. -This often happens when using anaconda's CUDA runtime. - -Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. -In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" -to contain cuda libraries of the same version. - -When they are inconsistent, -you need to either install a different build of PyTorch (or build by yourself) -to match your local CUDA installation, or install a different version of CUDA to match PyTorch. -
- - -
- -"ImportError: cannot import name '_C'". - -
-Please build and install detectron2 following the instructions above. - -If you are running code from detectron2's root directory, `cd` to a different one. -Otherwise you may not import the code that you installed. -
- -
- -ONNX conversion segfault after some "TraceWarning". - -
-The ONNX package is compiled with too old compiler. - -Please build and install ONNX from its source code using a compiler -whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`). -
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/LICENSE b/preprocess/humanparsing/mhp_extension/detectron2/LICENSE deleted file mode 100644 index d483689..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License -Version 2.0, January 2004 -http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - -"License" shall mean the terms and conditions for use, reproduction, -and distribution as defined by Sections 1 through 9 of this document. - -"Licensor" shall mean the copyright owner or entity authorized by -the copyright owner that is granting the License. - -"Legal Entity" shall mean the union of the acting entity and all -other entities that control, are controlled by, or are under common -control with that entity. For the purposes of this definition, -"control" means (i) the power, direct or indirect, to cause the -direction or management of such entity, whether by contract or -otherwise, or (ii) ownership of fifty percent (50%) or more of the -outstanding shares, or (iii) beneficial ownership of such entity. - -"You" (or "Your") shall mean an individual or Legal Entity -exercising permissions granted by this License. - -"Source" form shall mean the preferred form for making modifications, -including but not limited to software source code, documentation -source, and configuration files. - -"Object" form shall mean any form resulting from mechanical -transformation or translation of a Source form, including but -not limited to compiled object code, generated documentation, -and conversions to other media types. - -"Work" shall mean the work of authorship, whether in Source or -Object form, made available under the License, as indicated by a -copyright notice that is included in or attached to the work -(an example is provided in the Appendix below). - -"Derivative Works" shall mean any work, whether in Source or Object -form, that is based on (or derived from) the Work and for which the -editorial revisions, annotations, elaborations, or other modifications -represent, as a whole, an original work of authorship. For the purposes -of this License, Derivative Works shall not include works that remain -separable from, or merely link (or bind by name) to the interfaces of, -the Work and Derivative Works thereof. - -"Contribution" shall mean any work of authorship, including -the original version of the Work and any modifications or additions -to that Work or Derivative Works thereof, that is intentionally -submitted to Licensor for inclusion in the Work by the copyright owner -or by an individual or Legal Entity authorized to submit on behalf of -the copyright owner. For the purposes of this definition, "submitted" -means any form of electronic, verbal, or written communication sent -to the Licensor or its representatives, including but not limited to -communication on electronic mailing lists, source code control systems, -and issue tracking systems that are managed by, or on behalf of, the -Licensor for the purpose of discussing and improving the Work, but -excluding communication that is conspicuously marked or otherwise -designated in writing by the copyright owner as "Not a Contribution." - -"Contributor" shall mean Licensor and any individual or Legal Entity -on behalf of whom a Contribution has been received by Licensor and -subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -copyright license to reproduce, prepare Derivative Works of, -publicly display, publicly perform, sublicense, and distribute the -Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of -this License, each Contributor hereby grants to You a perpetual, -worldwide, non-exclusive, no-charge, royalty-free, irrevocable -(except as stated in this section) patent license to make, have made, -use, offer to sell, sell, import, and otherwise transfer the Work, -where such license applies only to those patent claims licensable -by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) -with the Work to which such Contribution(s) was submitted. If You -institute patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that the Work -or a Contribution incorporated within the Work constitutes direct -or contributory patent infringement, then any patent licenses -granted to You under this License for that Work shall terminate -as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the -Work or Derivative Works thereof in any medium, with or without -modifications, and in Source or Object form, provided that You -meet the following conditions: - -(a) You must give any other recipients of the Work or -Derivative Works a copy of this License; and - -(b) You must cause any modified files to carry prominent notices -stating that You changed the files; and - -(c) You must retain, in the Source form of any Derivative Works -that You distribute, all copyright, patent, trademark, and -attribution notices from the Source form of the Work, -excluding those notices that do not pertain to any part of -the Derivative Works; and - -(d) If the Work includes a "NOTICE" text file as part of its -distribution, then any Derivative Works that You distribute must -include a readable copy of the attribution notices contained -within such NOTICE file, excluding those notices that do not -pertain to any part of the Derivative Works, in at least one -of the following places: within a NOTICE text file distributed -as part of the Derivative Works; within the Source form or -documentation, if provided along with the Derivative Works; or, -within a display generated by the Derivative Works, if and -wherever such third-party notices normally appear. The contents -of the NOTICE file are for informational purposes only and -do not modify the License. You may add Your own attribution -notices within Derivative Works that You distribute, alongside -or as an addendum to the NOTICE text from the Work, provided -that such additional attribution notices cannot be construed -as modifying the License. - -You may add Your own copyright statement to Your modifications and -may provide additional or different license terms and conditions -for use, reproduction, or distribution of Your modifications, or -for any such Derivative Works as a whole, provided Your use, -reproduction, and distribution of the Work otherwise complies with -the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, -any Contribution intentionally submitted for inclusion in the Work -by You to the Licensor shall be under the terms and conditions of -this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify -the terms of any separate license agreement you may have executed -with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade -names, trademarks, service marks, or product names of the Licensor, -except as required for reasonable and customary use in describing the -origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or -agreed to in writing, Licensor provides the Work (and each -Contributor provides its Contributions) on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied, including, without limitation, any warranties or conditions -of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A -PARTICULAR PURPOSE. You are solely responsible for determining the -appropriateness of using or redistributing the Work and assume any -risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, -whether in tort (including negligence), contract, or otherwise, -unless required by applicable law (such as deliberate and grossly -negligent acts) or agreed to in writing, shall any Contributor be -liable to You for damages, including any direct, indirect, special, -incidental, or consequential damages of any character arising as a -result of this License or out of the use or inability to use the -Work (including but not limited to damages for loss of goodwill, -work stoppage, computer failure or malfunction, or any and all -other commercial damages or losses), even if such Contributor -has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing -the Work or Derivative Works thereof, You may choose to offer, -and charge a fee for, acceptance of support, warranty, indemnity, -or other liability obligations and/or rights consistent with this -License. However, in accepting such obligations, You may act only -on Your own behalf and on Your sole responsibility, not on behalf -of any other Contributor, and only if You agree to indemnify, -defend, and hold each Contributor harmless for any liability -incurred by, or claims asserted against, such Contributor by reason -of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - -To apply the Apache License to your work, attach the following -boilerplate notice, with the fields enclosed by brackets "[]" -replaced with your own identifying information. (Don't include -the brackets!) The text should be enclosed in the appropriate -comment syntax for the file format. We also recommend that a -file or class name and description of purpose be included on the -same "printed page" as the copyright notice for easier -identification within third-party archives. - -Copyright 2019 - present, Facebook, Inc - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md b/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md deleted file mode 100644 index 07b81ff..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/MODEL_ZOO.md +++ /dev/null @@ -1,903 +0,0 @@ -# Detectron2 Model Zoo and Baselines - -## Introduction - -This file documents a large collection of baselines trained -with detectron2 in Sep-Oct, 2019. -All numbers were obtained on [Big Basin](https://engineering.fb.com/data-center-engineering/introducing-big-basin-our-next-generation-ai-hardware/) -servers with 8 NVIDIA V100 GPUs & NVLink. The software in use were PyTorch 1.3, CUDA 9.2, cuDNN 7.4.2 or 7.6.3. -You can access these models from code using [detectron2.model_zoo](https://detectron2.readthedocs.io/modules/model_zoo.html) APIs. - -In addition to these official baseline models, you can find more models in [projects/](projects/). - -#### How to Read the Tables -* The "Name" column contains a link to the config file. Running `tools/train_net.py` with this config file - and 8 GPUs will reproduce the model. -* Training speed is averaged across the entire training. - We keep updating the speed with latest version of detectron2/pytorch/etc., - so they might be different from the `metrics` file. - Training speed for multi-machine jobs is not provided. -* Inference speed is measured by `tools/train_net.py --eval-only`, or [inference_on_dataset()](https://detectron2.readthedocs.io/modules/evaluation.html#detectron2.evaluation.inference_on_dataset), - with batch size 1 in detectron2 directly. - Measuring it with your own code will likely introduce other overhead. - Actual deployment in production should in general be faster than the given inference - speed due to more optimizations. -* The *model id* column is provided for ease of reference. - To check downloaded file integrity, any model on this page contains its md5 prefix in its file name. -* Training curves and other statistics can be found in `metrics` for each model. - -#### Common Settings for COCO Models -* All COCO models were trained on `train2017` and evaluated on `val2017`. -* The default settings are __not directly comparable__ with Detectron's standard settings. - For example, our default training data augmentation uses scale jittering in addition to horizontal flipping. - - To make fair comparisons with Detectron's settings, see - [Detectron1-Comparisons](configs/Detectron1-Comparisons/) for accuracy comparison, - and [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html) - for speed comparison. -* For Faster/Mask R-CNN, we provide baselines based on __3 different backbone combinations__: - * __FPN__: Use a ResNet+FPN backbone with standard conv and FC heads for mask and box prediction, - respectively. It obtains the best - speed/accuracy tradeoff, but the other two are still useful for research. - * __C4__: Use a ResNet conv4 backbone with conv5 head. The original baseline in the Faster R-CNN paper. - * __DC5__ (Dilated-C5): Use a ResNet conv5 backbone with dilations in conv5, and standard conv and FC heads - for mask and box prediction, respectively. - This is used by the Deformable ConvNet paper. -* Most models are trained with the 3x schedule (~37 COCO epochs). - Although 1x models are heavily under-trained, we provide some ResNet-50 models with the 1x (~12 COCO epochs) - training schedule for comparison when doing quick research iteration. - -#### ImageNet Pretrained Models - -We provide backbone models pretrained on ImageNet-1k dataset. -These models have __different__ format from those provided in Detectron: we do not fuse BatchNorm into an affine layer. -* [R-50.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl): converted copy of [MSRA's original ResNet-50](https://github.com/KaimingHe/deep-residual-networks) model. -* [R-101.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl): converted copy of [MSRA's original ResNet-101](https://github.com/KaimingHe/deep-residual-networks) model. -* [X-101-32x8d.pkl](https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/FAIR/X-101-32x8d.pkl): ResNeXt-101-32x8d model trained with Caffe2 at FB. - -Pretrained models in Detectron's format can still be used. For example: -* [X-152-32x8d-IN5k.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl): - ResNeXt-152-32x8d model trained on ImageNet-5k with Caffe2 at FB (see ResNeXt paper for details on ImageNet-5k). -* [R-50-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47261647/R-50-GN.pkl): - ResNet-50 with Group Normalization. -* [R-101-GN.pkl](https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/47592356/R-101-GN.pkl): - ResNet-101 with Group Normalization. - -Torchvision's ResNet models can be used after converted by [this script](tools/convert-torchvision-to-d2.py). - -#### License - -All models available for download through this document are licensed under the -[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/). - -### COCO Object Detection Baselines - -#### Faster R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
model iddownload
R50-C41x0.5510.1024.835.7137257644model | metrics
R50-DC51x0.3800.0685.037.3137847829model | metrics
R50-FPN1x0.2100.0383.037.9137257794model | metrics
R50-C43x0.5430.1044.838.4137849393model | metrics
R50-DC53x0.3780.0705.039.0137849425model | metrics
R50-FPN3x0.2090.0383.040.2137849458model | metrics
R101-C43x0.6190.1395.941.1138204752model | metrics
R101-DC53x0.4520.0866.140.6138204841model | metrics
R101-FPN3x0.2860.0514.142.0137851257model | metrics
X101-FPN3x0.6380.0986.743.0139173657model | metrics
- -#### RetinaNet: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
model iddownload
R501x0.2000.0553.936.5137593951model | metrics
R503x0.2010.0553.937.9137849486model | metrics
R1013x0.2800.0685.139.9138363263model | metrics
- -#### RPN & Fast R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
prop.
AR
model iddownload
RPN R50-C41x0.1300.0341.551.6137258005model | metrics
RPN R50-FPN1x0.1860.0322.758.0137258492model | metrics
Fast R-CNN R50-FPN1x0.1400.0292.637.8137635226model | metrics
- -### COCO Instance Segmentation Baselines with Mask R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
R50-C41x0.5840.1105.236.832.2137259246model | metrics
R50-DC51x0.4710.0766.538.334.2137260150model | metrics
R50-FPN1x0.2610.0433.438.635.2137260431model | metrics
R50-C43x0.5750.1115.239.834.4137849525model | metrics
R50-DC53x0.4700.0766.540.035.9137849551model | metrics
R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
R101-C43x0.6520.1456.342.636.7138363239model | metrics
R101-DC53x0.5450.0927.641.937.3138363294model | metrics
R101-FPN3x0.3400.0564.642.938.6138205316model | metrics
X101-FPN3x0.6900.1037.244.339.5139653917model | metrics
- -### COCO Person Keypoint Detection Baselines with Keypoint R-CNN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
kp.
AP
model iddownload
R50-FPN1x0.3150.0725.053.664.0137261548model | metrics
R50-FPN3x0.3160.0665.055.465.5137849621model | metrics
R101-FPN3x0.3900.0766.156.466.1138363331model | metrics
X101-FPN3x0.7380.1218.757.366.0139686956model | metrics
- -### COCO Panoptic Segmentation Baselines with Panoptic FPN - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
PQmodel iddownload
R50-FPN1x0.3040.0534.837.634.739.4139514544model | metrics
R50-FPN3x0.3020.0534.840.036.541.5139514569model | metrics
R101-FPN3x0.3920.0666.042.438.543.0139514519model | metrics
- - -### LVIS Instance Segmentation Baselines with Mask R-CNN - -Mask R-CNN baselines on the [LVIS dataset](https://lvisdataset.org), v0.5. -These baselines are described in Table 3(c) of the [LVIS paper](https://arxiv.org/abs/1908.03195). - -NOTE: the 1x schedule here has the same amount of __iterations__ as the COCO 1x baselines. -They are roughly 24 epochs of LVISv0.5 data. -The final results of these configs have large variance across different runs. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
R50-FPN1x0.2920.1077.123.624.4144219072model | metrics
R101-FPN1x0.3710.1147.825.625.9144219035model | metrics
X101-FPN1x0.7120.15110.226.727.1144219108model | metrics
- - - -### Cityscapes & Pascal VOC Baselines - -Simple baselines for -* Mask R-CNN on Cityscapes instance segmentation (initialized from COCO pre-training, then trained on Cityscapes fine annotations only) -* Faster R-CNN on PASCAL VOC object detection (trained on VOC 2007 train+val + VOC 2012 train+val, tested on VOC 2007 using 11-point interpolated AP) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Nametrain
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
box
AP50
mask
AP
model iddownload
R50-FPN, Cityscapes0.2400.0784.436.5142423278model | metrics
R50-C4, VOC0.5370.0814.851.980.3142202221model | metrics
- - - -### Other Settings - -Ablations for Deformable Conv and Cascade R-CNN: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Baseline R50-FPN1x0.2610.0433.438.635.2137260431model | metrics
Deformable Conv1x0.3420.0483.541.537.5138602867model | metrics
Cascade R-CNN1x0.3170.0524.042.136.4138602847model | metrics
Baseline R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
Deformable Conv3x0.3490.0473.542.738.5144998336model | metrics
Cascade R-CNN3x0.3280.0534.044.338.5144998488model | metrics
- - -Ablations for normalization methods, and a few models trained from scratch following [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883). -(Note: The baseline uses `2fc` head while the others use [`4conv1fc` head](https://arxiv.org/abs/1803.08494)) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
model iddownload
Baseline R50-FPN3x0.2610.0433.441.037.2137849600model | metrics
GN3x0.3560.0697.342.638.6138602888model | metrics
SyncBN3x0.3710.0535.541.937.8169527823model | metrics
GN (from scratch)3x0.4000.0699.839.936.6138602908model | metrics
GN (from scratch)9xN/A0.0709.843.739.6183808979model | metrics
SyncBN (from scratch)9xN/A0.0557.243.639.3184226666model | metrics
- - -A few very large models trained for a long time, for demo purposes. They are trained using multiple machines: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Nameinference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
PQmodel iddownload
Panoptic FPN R1010.10711.447.441.346.1139797668model | metrics
Mask R-CNN X1520.24215.150.244.018131413model | metrics
above + test-time aug.51.945.9
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/README.md b/preprocess/humanparsing/mhp_extension/detectron2/README.md deleted file mode 100644 index 1fbb95b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/README.md +++ /dev/null @@ -1,56 +0,0 @@ - - -Detectron2 is Facebook AI Research's next generation software system -that implements state-of-the-art object detection algorithms. -It is a ground-up rewrite of the previous version, -[Detectron](https://github.com/facebookresearch/Detectron/), -and it originates from [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark/). - -
- -
- -### What's New -* It is powered by the [PyTorch](https://pytorch.org) deep learning framework. -* Includes more features such as panoptic segmentation, densepose, Cascade R-CNN, rotated bounding boxes, etc. -* Can be used as a library to support [different projects](projects/) on top of it. - We'll open source more research projects in this way. -* It [trains much faster](https://detectron2.readthedocs.io/notes/benchmarks.html). - -See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-/) -to see more demos and learn about detectron2. - -## Installation - -See [INSTALL.md](INSTALL.md). - -## Quick Start - -See [GETTING_STARTED.md](GETTING_STARTED.md), -or the [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5). - -Learn more at our [documentation](https://detectron2.readthedocs.org). -And see [projects/](projects/) for some projects that are built on top of detectron2. - -## Model Zoo and Baselines - -We provide a large set of baseline results and trained models available for download in the [Detectron2 Model Zoo](MODEL_ZOO.md). - - -## License - -Detectron2 is released under the [Apache 2.0 license](LICENSE). - -## Citing Detectron2 - -If you use Detectron2 in your research or wish to refer to the baseline results published in the [Model Zoo](MODEL_ZOO.md), please use the following BibTeX entry. - -```BibTeX -@misc{wu2019detectron2, - author = {Yuxin Wu and Alexander Kirillov and Francisco Massa and - Wan-Yen Lo and Ross Girshick}, - title = {Detectron2}, - howpublished = {\url{https://github.com/facebookresearch/detectron2}}, - year = {2019} -} -``` diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml deleted file mode 100644 index fbf34a0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-C4.yaml +++ /dev/null @@ -1,18 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - RPN: - PRE_NMS_TOPK_TEST: 6000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "Res5ROIHeads" -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml deleted file mode 100644 index c0d6d16..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-DilatedC5.yaml +++ /dev/null @@ -1,31 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - RESNETS: - OUT_FEATURES: ["res5"] - RES5_DILATION: 2 - RPN: - IN_FEATURES: ["res5"] - PRE_NMS_TOPK_TEST: 6000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["res5"] - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml deleted file mode 100644 index 3e020f2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RCNN-FPN.yaml +++ /dev/null @@ -1,42 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml deleted file mode 100644 index 12ec9d2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Base-RetinaNet.yaml +++ /dev/null @@ -1,24 +0,0 @@ -MODEL: - META_ARCHITECTURE: "RetinaNet" - BACKBONE: - NAME: "build_retinanet_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] - FPN: - IN_FEATURES: ["res3", "res4", "res5"] - RETINANET: - IOU_THRESHOLDS: [0.4, 0.5] - IOU_LABELS: [0, -1, 1] -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 773ac10..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - LOAD_PROPOSALS: True - RESNETS: - DEPTH: 50 - PROPOSAL_GENERATOR: - NAME: "PrecomputedProposals" -DATASETS: - TRAIN: ("coco_2017_train",) - PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) - TEST: ("coco_2017_val",) - PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) -DATALOADER: - # proposals are part of the dataset_dicts, and take a lot of RAM - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml deleted file mode 100644 index db142cd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml deleted file mode 100644 index bceb6b3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 57a098f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml deleted file mode 100644 index f961301..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml deleted file mode 100644 index bc51bce..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml deleted file mode 100644 index 0fe96f5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml deleted file mode 100644 index 33fadeb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 3262019..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index 4139518..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index 9c9b5ab..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: False - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml deleted file mode 100644 index 4abb1b9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml deleted file mode 100644 index 4a24ce3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml deleted file mode 100644 index 3b5412d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RetinaNet.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml deleted file mode 100644 index e048211..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_C4_1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - META_ARCHITECTURE: "ProposalNetwork" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - RPN: - PRE_NMS_TOPK_TEST: 12000 - POST_NMS_TOPK_TEST: 2000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml deleted file mode 100644 index dc9c952..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "ProposalNetwork" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - RPN: - POST_NMS_TOPK_TEST: 2000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml deleted file mode 100644 index 1a94cc4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml deleted file mode 100644 index 67b70cf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 1935a30..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml deleted file mode 100644 index a9aeb4e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml deleted file mode 100644 index 38ed867..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml deleted file mode 100644 index b13eefa..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml deleted file mode 100644 index d401016..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-DilatedC5.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index d50fb86..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index be7d06b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index d14c63f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml deleted file mode 100644 index 4e03944..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - KEYPOINT_ON: True - ROI_HEADS: - NUM_CLASSES: 1 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss - RPN: - # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. - # 1000 proposals per-image is found to hurt box AP. - # Therefore we increase it to 1500 per-image. - POST_NMS_TOPK_TRAIN: 1500 -DATASETS: - TRAIN: ("keypoints_coco_2017_train",) - TEST: ("keypoints_coco_2017_val",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml deleted file mode 100644 index 9309535..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 7bf85cf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index a07f243..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml deleted file mode 100644 index d4bfa20..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "Base-Keypoint-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml deleted file mode 100644 index 755c120..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - MASK_ON: True - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_train_panoptic_separated",) - TEST: ("coco_2017_val_panoptic_separated",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml deleted file mode 100644 index 0e01f6f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml deleted file mode 100644 index 6afa2c1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml deleted file mode 100644 index b956b3f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml deleted file mode 100644 index 1a7aaeb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml +++ /dev/null @@ -1,27 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - # For better, more stable performance initialize from COCO - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" - MASK_ON: True - ROI_HEADS: - NUM_CLASSES: 8 -# This is similar to the setting used in Mask R-CNN paper, Appendix A -# But there are some differences, e.g., we did not initialize the output -# layer using the corresponding classes from COCO -INPUT: - MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 2048 - MAX_SIZE_TEST: 2048 -DATASETS: - TRAIN: ("cityscapes_fine_instance_seg_train",) - TEST: ("cityscapes_fine_instance_seg_val",) -SOLVER: - BASE_LR: 0.01 - STEPS: (18000,) - MAX_ITER: 24000 - IMS_PER_BATCH: 8 -TEST: - EVAL_PERIOD: 8000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md deleted file mode 100644 index a90ed9e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/README.md +++ /dev/null @@ -1,83 +0,0 @@ - -Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron. - -The differences in implementation details are shared in -[Compatibility with Other Libraries](../../docs/notes/compatibility.md). - -The differences in model zoo's experimental settings include: -* Use scale augmentation during training. This improves AP with lower training cost. -* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may - affect other AP. -* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP. -* Use `ROIAlignV2`. This does not significantly affect AP. - -In this directory, we provide a few configs that __do not__ have the above changes. -They mimic Detectron's behavior as close as possible, -and provide a fair comparison of accuracy and speed against Detectron. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
mask
AP
kp.
AP
model iddownload
Faster R-CNN1x0.2190.0383.136.9137781054model | metrics
Keypoint R-CNN1x0.3130.0715.053.164.2137781195model | metrics
Mask R-CNN1x0.2730.0433.437.834.9137781281model | metrics
- -## Comparisons: - -* Faster R-CNN: Detectron's AP is 36.7, similar to ours. -* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's - [bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be - compensated back by some parameter tuning. -* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation. - -For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html). diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml deleted file mode 100644 index 6ce77f1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - RPN: - SMOOTH_L1_BETA: 0.1111 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index aacf868..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,27 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - RPN: - SMOOTH_L1_BETA: 0.1111 - # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 - # 1000 proposals per-image is found to hurt box AP. - # Therefore we increase it to 1500 per-image. - POST_NMS_TOPK_TRAIN: 1500 -DATASETS: - TRAIN: ("keypoints_coco_2017_train",) - TEST: ("keypoints_coco_2017_val",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml deleted file mode 100644 index 4ea86a8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - # Detectron1 uses smooth L1 loss with some magic beta values. - # The defaults are changed to L1 loss in Detectron2. - RPN: - SMOOTH_L1_BETA: 0.1111 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - ROI_MASK_HEAD: - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml deleted file mode 100644 index f0c3a1b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: True - RESNETS: - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index 64b4caa..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml deleted file mode 100644 index c8b822c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +++ /dev/null @@ -1,23 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1230 - SCORE_THRESH_TEST: 0.0001 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -DATASETS: - TRAIN: ("lvis_v0.5_train",) - TEST: ("lvis_v0.5_val",) -TEST: - DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 -DATALOADER: - SAMPLER_TRAIN: "RepeatFactorTrainingSampler" - REPEAT_THRESHOLD: 0.001 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml deleted file mode 100644 index abb33b6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml +++ /dev/null @@ -1,12 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - RPN: - POST_NMS_TOPK_TRAIN: 2000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml deleted file mode 100644 index e2201ad..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml deleted file mode 100644 index fc117f6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml +++ /dev/null @@ -1,36 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 152 - DEFORM_ON_PER_STAGE: [False, True, True, True] - ROI_HEADS: - NAME: "CascadeROIHeads" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "GN" - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - NUM_CONV: 8 - NORM: "GN" - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - IMS_PER_BATCH: 128 - STEPS: (35000, 45000) - MAX_ITER: 50000 - BASE_LR: 0.16 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - EVAL_PERIOD: 2500 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml deleted file mode 100644 index 544f58f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv_parsing.yaml +++ /dev/null @@ -1,42 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - MASK_ON: True -# WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" - WEIGHTS: "model_0039999_e76410.pkl" - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 152 - DEFORM_ON_PER_STAGE: [False, True, True, True] - ROI_HEADS: - NAME: "CascadeROIHeads" - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "GN" - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - NUM_CONV: 8 - NORM: "GN" - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: -# IMS_PER_BATCH: 128 - IMS_PER_BATCH: 1 - STEPS: (35000, 45000) - MAX_ITER: 50000 - BASE_LR: 0.16 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - EVAL_PERIOD: 2500 -DATASETS: - TRAIN: ("CIHP_train","VIP_trainval") - TEST: ("CIHP_val",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml deleted file mode 100644 index bbf9685..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/demo.yaml +++ /dev/null @@ -1,25 +0,0 @@ -_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml" -MODEL: - MASK_ON: True - ROI_HEADS: - NMS_THRESH_TEST: 0.95 - SCORE_THRESH_TEST: 0.5 - NUM_CLASSES: 1 -SOLVER: - IMS_PER_BATCH: 1 - STEPS: (30000, 45000) - MAX_ITER: 50000 - BASE_LR: 0.02 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - AUG: - ENABLED: True -DATASETS: - TRAIN: ("demo_train",) - TEST: ("demo_val",) -OUTPUT_DIR: "../../data/DemoDataset/detectron2_prediction" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml deleted file mode 100644 index 4c3b767..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - CLS_AGNOSTIC_MASK: True diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml deleted file mode 100644 index 04ff988..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 - DEFORM_MODULATED: False diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml deleted file mode 100644 index 68c0ca5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 - DEFORM_MODULATED: False -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml deleted file mode 100644 index 74d274e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml +++ /dev/null @@ -1,21 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" - MASK_ON: True - RESNETS: - DEPTH: 50 - NORM: "GN" - STRIDE_IN_1X1: False - FPN: - NORM: "GN" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "GN" - ROI_MASK_HEAD: - NORM: "GN" -SOLVER: - # 3x schedule - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml deleted file mode 100644 index 11ebb07..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - NORM: "SyncBN" - STRIDE_IN_1X1: True - FPN: - NORM: "SyncBN" - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_CONV: 4 - NUM_FC: 1 - NORM: "SyncBN" - ROI_MASK_HEAD: - NORM: "SyncBN" -SOLVER: - # 3x schedule - STEPS: (210000, 250000) - MAX_ITER: 270000 -TEST: - PRECISE_BN: - ENABLED: True diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml deleted file mode 100644 index 34016ce..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# A large PanopticFPN for demo purposes. -# Use GN on backbone to support semantic seg. -# Use Cascade + Deform Conv to improve localization. -_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" -MODEL: - WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" - RESNETS: - DEPTH: 101 - NORM: "GN" - DEFORM_ON_PER_STAGE: [False, True, True, True] - STRIDE_IN_1X1: False - FPN: - NORM: "GN" - ROI_HEADS: - NAME: CascadeROIHeads - ROI_BOX_HEAD: - CLS_AGNOSTIC_BBOX_REG: True - ROI_MASK_HEAD: - NORM: "GN" - RPN: - POST_NMS_TOPK_TRAIN: 2000 -SOLVER: - STEPS: (105000, 125000) - MAX_ITER: 135000 - IMS_PER_BATCH: 32 - BASE_LR: 0.04 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml deleted file mode 100644 index 766f46a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_finetune_cihp.yaml +++ /dev/null @@ -1,24 +0,0 @@ -_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "model_0039999_e76410.pkl" - ROI_HEADS: - NUM_CLASSES: 1 -SOLVER: - IMS_PER_BATCH: 16 - STEPS: (140000, 180000) - MAX_ITER: 200000 - BASE_LR: 0.02 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - EVAL_PERIOD: 0 -DATASETS: - TRAIN: ("CIHP_train") - TEST: ("CIHP_val",) -OUTPUT_DIR: "./finetune_output" - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml deleted file mode 100644 index d6a529b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/parsing_inference.yaml +++ /dev/null @@ -1,26 +0,0 @@ -_BASE_: "cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml" -MODEL: - MASK_ON: True - WEIGHTS: "./finetune_ouput/model_final.pth" - ROI_HEADS: - NMS_THRESH_TEST: 0.95 - SCORE_THRESH_TEST: 0.5 - NUM_CLASSES: 1 -SOLVER: - IMS_PER_BATCH: 1 - STEPS: (30000, 45000) - MAX_ITER: 50000 - BASE_LR: 0.02 -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 - CROP: - ENABLED: True -TEST: - AUG: - ENABLED: True -DATASETS: - TRAIN: ("CIHP_trainval",) - TEST: ("CIHP_test",) -OUTPUT_DIR: "./inference_output" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml deleted file mode 100644 index f340028..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" -MODEL: - # Train from random initialization. - WEIGHTS: "" - # It makes sense to divide by STD when training from scratch - # But it seems to make no difference on the results and C2's models didn't do this. - # So we keep things consistent with C2. - # PIXEL_STD: [57.375, 57.12, 58.395] - MASK_ON: True - BACKBONE: - FREEZE_AT: 0 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml deleted file mode 100644 index d90c9ff..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" -MODEL: - PIXEL_STD: [57.375, 57.12, 58.395] - WEIGHTS: "" - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False - BACKBONE: - FREEZE_AT: 0 -SOLVER: - # 9x schedule - IMS_PER_BATCH: 64 # 4x the standard - STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k - MAX_ITER: 202500 # 90k * 9 / 4 - BASE_LR: 0.08 -TEST: - EVAL_PERIOD: 2500 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml deleted file mode 100644 index 60d4e42..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" -MODEL: - PIXEL_STD: [57.375, 57.12, 58.395] - WEIGHTS: "" - MASK_ON: True - RESNETS: - STRIDE_IN_1X1: False - BACKBONE: - FREEZE_AT: 0 -SOLVER: - # 9x schedule - IMS_PER_BATCH: 64 # 4x the standard - STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k - MAX_ITER: 202500 # 90k * 9 / 4 - BASE_LR: 0.08 -TEST: - EVAL_PERIOD: 2500 -# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 -# to learn what you need for training from scratch. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml deleted file mode 100644 index ac256e1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/Misc/semantic_R_50_FPN_1x.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_train_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml deleted file mode 100644 index ea2a6ba..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 20 -INPUT: - MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) - MIN_SIZE_TEST: 800 -DATASETS: - TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') - TEST: ('voc_2007_test',) -SOLVER: - STEPS: (12000, 16000) - MAX_ITER: 18000 # 17.4 epochs - WARMUP_ITERS: 100 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml deleted file mode 100644 index e554cab..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 20 -INPUT: - MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) - MIN_SIZE_TEST: 800 -DATASETS: - TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') - TEST: ('voc_2007_test',) -SOLVER: - STEPS: (12000, 16000) - MAX_ITER: 18000 # 17.4 epochs - WARMUP_ITERS: 100 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml deleted file mode 100644 index d649eed..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/my_Base-RCNN-FPN.yaml +++ /dev/null @@ -1,42 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 2 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md deleted file mode 100644 index a278199..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/README.md +++ /dev/null @@ -1 +0,0 @@ -These are quick configs for performance or accuracy regression tracking purposes. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index fc5a411..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index e41a0fe..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index a2f37e5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 52fc0ec..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) - TEST: ("coco_2017_val_100",) - PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index 14cf2aa..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" -DATASETS: - TEST: ("keypoints_coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index dc09034..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True -DATASETS: - TRAIN: ("keypoints_coco_2017_val_100",) - TEST: ("keypoints_coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml deleted file mode 100644 index 4b92392..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml +++ /dev/null @@ -1,30 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False - LOSS_WEIGHT: 4.0 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss - RPN: - SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss -DATASETS: - TRAIN: ("keypoints_coco_2017_val",) - TEST: ("keypoints_coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - WARMUP_FACTOR: 0.33333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index 9bd9628..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,28 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - KEYPOINT_ON: True - RESNETS: - DEPTH: 50 - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - NUM_CLASSES: 1 - ROI_KEYPOINT_HEAD: - POOLER_RESOLUTION: 14 - POOLER_SAMPLING_RATIO: 2 - ROI_BOX_HEAD: - SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss - RPN: - SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss -DATASETS: - TRAIN: ("keypoints_coco_2017_val",) - TEST: ("keypoints_coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - WARMUP_FACTOR: 0.33333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml deleted file mode 100644 index ab6e698..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.001 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 - CLIP_GRADIENTS: - ENABLED: True - CLIP_TYPE: "value" - CLIP_VALUE: 1.0 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml deleted file mode 100644 index b2d5b7f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml deleted file mode 100644 index 6c4f121..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.001 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml deleted file mode 100644 index f68dd8f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml +++ /dev/null @@ -1,22 +0,0 @@ -_BASE_: "../Base-RCNN-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val",) - TEST: ("coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (600,) - MAX_SIZE_TRAIN: 1000 - MIN_SIZE_TEST: 800 - MAX_SIZE_TEST: 1000 -SOLVER: - IMS_PER_BATCH: 8 # base uses 16 - WARMUP_FACTOR: 0.33333 - WARMUP_ITERS: 100 - STEPS: (11000, 11600) - MAX_ITER: 12000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml deleted file mode 100644 index e3ce6cf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index e5454bf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] - AUG: - ENABLED: True - MIN_SIZES: (700, 800) # to save some time diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 6dbfcde..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index ffca550..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,21 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - BATCH_SIZE_PER_IMAGE: 256 - MASK_ON: True -DATASETS: - TRAIN: ("coco_2017_val",) - TEST: ("coco_2017_val",) -INPUT: - MIN_SIZE_TRAIN: (600,) - MAX_SIZE_TRAIN: 1000 - MIN_SIZE_TEST: 800 - MAX_SIZE_TEST: 1000 -SOLVER: - WARMUP_FACTOR: 0.3333333 - WARMUP_ITERS: 100 - STEPS: (5500, 5800) - MAX_ITER: 6000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 42.0, 1.6], ["segm", "AP", 35.4, 1.25]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml deleted file mode 100644 index 70874e3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" -DATASETS: - TEST: ("coco_2017_val_100_panoptic_separated",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml deleted file mode 100644 index 7cdee7b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_val_100_panoptic_separated",) - TEST: ("coco_2017_val_100_panoptic_separated",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 1 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml deleted file mode 100644 index 0581631..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "PanopticFPN" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: True - RESNETS: - DEPTH: 50 - SEM_SEG_HEAD: - LOSS_WEIGHT: 0.5 -DATASETS: - TRAIN: ("coco_2017_val_panoptic_separated",) - TEST: ("coco_2017_val_panoptic_separated",) -SOLVER: - BASE_LR: 0.01 - WARMUP_FACTOR: 0.001 - WARMUP_ITERS: 500 - STEPS: (5500,) - MAX_ITER: 7000 -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 38.73, 0.7], ["sem_seg", "mIoU", 64.73, 1.2], ["panoptic_seg", "PQ", 48.13, 0.8]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index 36b9988..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/137849486/model_final_4cafe0.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 44.36, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml deleted file mode 100644 index 8d95c1f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index c7c3f90..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" -DATASETS: - TEST: ("coco_2017_val_100",) -TEST: - EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 402d432..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("coco_2017_val_100",) - TEST: ("coco_2017_val_100",) -SOLVER: - STEPS: (30,) - MAX_ITER: 40 - BASE_LR: 0.005 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index bca7498..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TEST: ("coco_2017_val_100_panoptic_stuffonly",) -TEST: - EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml deleted file mode 100644 index 14ab606..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,18 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) - TEST: ("coco_2017_val_100_panoptic_stuffonly",) -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -SOLVER: - BASE_LR: 0.005 - STEPS: (30,) - MAX_ITER: 40 - IMS_PER_BATCH: 4 -DATALOADER: - NUM_WORKERS: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index 1f78d77..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: "../Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -DATASETS: - TRAIN: ("coco_2017_val_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) -SOLVER: - BASE_LR: 0.01 - WARMUP_FACTOR: 0.001 - WARMUP_ITERS: 300 - STEPS: (5500,) - MAX_ITER: 7000 -TEST: - EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] -INPUT: - # no scale augmentation - MIN_SIZE_TRAIN: (800, ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md b/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md deleted file mode 100644 index caa755f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/demo/README.md +++ /dev/null @@ -1,8 +0,0 @@ - -## Detectron2 Demo - -We provide a command line tool to run a simple demo of builtin models. -The usage is explained in [GETTING_STARTED.md](../GETTING_STARTED.md). - -See our [blog post](https://ai.facebook.com/blog/-detectron2-a-pytorch-based-modular-object-detection-library-) -for a high-quality demo generated with this tool. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py b/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py deleted file mode 100644 index 1fd8df8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/demo/demo.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import argparse -import glob -import multiprocessing as mp -import os -import time -import cv2 -import tqdm - -from detectron2.config import get_cfg -from detectron2.data.detection_utils import read_image -from detectron2.utils.logger import setup_logger - -from predictor import VisualizationDemo - -# constants -WINDOW_NAME = "COCO detections" - - -def setup_cfg(args): - # load config from file and command-line arguments - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - # Set score_threshold for builtin models - cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold - cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold - cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold - cfg.freeze() - return cfg - - -def get_parser(): - parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models") - parser.add_argument( - "--config-file", - default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml", - metavar="FILE", - help="path to config file", - ) - parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") - parser.add_argument("--video-input", help="Path to video file.") - parser.add_argument( - "--input", - nargs="+", - help="A list of space separated input images; " - "or a single glob pattern such as 'directory/*.jpg'", - ) - parser.add_argument( - "--output", - help="A file or directory to save output visualizations. " - "If not given, will show output in an OpenCV window.", - ) - - parser.add_argument( - "--confidence-threshold", - type=float, - default=0.5, - help="Minimum score for instance predictions to be shown", - ) - parser.add_argument( - "--opts", - help="Modify config options using the command-line 'KEY VALUE' pairs", - default=[], - nargs=argparse.REMAINDER, - ) - return parser - - -if __name__ == "__main__": - mp.set_start_method("spawn", force=True) - args = get_parser().parse_args() - setup_logger(name="fvcore") - logger = setup_logger() - logger.info("Arguments: " + str(args)) - - cfg = setup_cfg(args) - - demo = VisualizationDemo(cfg) - - if args.input: - if len(args.input) == 1: - args.input = glob.glob(os.path.expanduser(args.input[0])) - assert args.input, "The input path(s) was not found" - for path in tqdm.tqdm(args.input, disable=not args.output): - # use PIL, to be consistent with evaluation - img = read_image(path, format="BGR") - start_time = time.time() - predictions, visualized_output = demo.run_on_image(img) - logger.info( - "{}: {} in {:.2f}s".format( - path, - "detected {} instances".format(len(predictions["instances"])) - if "instances" in predictions - else "finished", - time.time() - start_time, - ) - ) - - if args.output: - if os.path.isdir(args.output): - assert os.path.isdir(args.output), args.output - out_filename = os.path.join(args.output, os.path.basename(path)) - else: - assert len(args.input) == 1, "Please specify a directory with args.output" - out_filename = args.output - visualized_output.save(out_filename) - else: - cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) - cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) - if cv2.waitKey(0) == 27: - break # esc to quit - elif args.webcam: - assert args.input is None, "Cannot have both --input and --webcam!" - assert args.output is None, "output not yet supported with --webcam!" - cam = cv2.VideoCapture(0) - for vis in tqdm.tqdm(demo.run_on_video(cam)): - cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) - cv2.imshow(WINDOW_NAME, vis) - if cv2.waitKey(1) == 27: - break # esc to quit - cam.release() - cv2.destroyAllWindows() - elif args.video_input: - video = cv2.VideoCapture(args.video_input) - width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) - frames_per_second = video.get(cv2.CAP_PROP_FPS) - num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) - basename = os.path.basename(args.video_input) - - if args.output: - if os.path.isdir(args.output): - output_fname = os.path.join(args.output, basename) - output_fname = os.path.splitext(output_fname)[0] + ".mkv" - else: - output_fname = args.output - assert not os.path.isfile(output_fname), output_fname - output_file = cv2.VideoWriter( - filename=output_fname, - # some installation of opencv may not support x264 (due to its license), - # you can try other format (e.g. MPEG) - fourcc=cv2.VideoWriter_fourcc(*"x264"), - fps=float(frames_per_second), - frameSize=(width, height), - isColor=True, - ) - assert os.path.isfile(args.video_input) - for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): - if args.output: - output_file.write(vis_frame) - else: - cv2.namedWindow(basename, cv2.WINDOW_NORMAL) - cv2.imshow(basename, vis_frame) - if cv2.waitKey(1) == 27: - break # esc to quit - video.release() - if args.output: - output_file.release() - else: - cv2.destroyAllWindows() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py b/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py deleted file mode 100644 index 689fa85..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/demo/predictor.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import atexit -import bisect -import multiprocessing as mp -from collections import deque -import cv2 -import torch - -from detectron2.data import MetadataCatalog -from detectron2.engine.defaults import DefaultPredictor -from detectron2.utils.video_visualizer import VideoVisualizer -from detectron2.utils.visualizer import ColorMode, Visualizer - - -class VisualizationDemo(object): - def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): - """ - Args: - cfg (CfgNode): - instance_mode (ColorMode): - parallel (bool): whether to run the model in different processes from visualization. - Useful since the visualization logic can be slow. - """ - self.metadata = MetadataCatalog.get( - cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" - ) - self.cpu_device = torch.device("cpu") - self.instance_mode = instance_mode - - self.parallel = parallel - if parallel: - num_gpu = torch.cuda.device_count() - self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) - else: - self.predictor = DefaultPredictor(cfg) - - def run_on_image(self, image): - """ - Args: - image (np.ndarray): an image of shape (H, W, C) (in BGR order). - This is the format used by OpenCV. - - Returns: - predictions (dict): the output of the model. - vis_output (VisImage): the visualized image output. - """ - vis_output = None - predictions = self.predictor(image) - # Convert image from OpenCV BGR format to Matplotlib RGB format. - image = image[:, :, ::-1] - visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) - if "panoptic_seg" in predictions: - panoptic_seg, segments_info = predictions["panoptic_seg"] - vis_output = visualizer.draw_panoptic_seg_predictions( - panoptic_seg.to(self.cpu_device), segments_info - ) - else: - if "sem_seg" in predictions: - vis_output = visualizer.draw_sem_seg( - predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) - ) - if "instances" in predictions: - instances = predictions["instances"].to(self.cpu_device) - vis_output = visualizer.draw_instance_predictions(predictions=instances) - - return predictions, vis_output - - def _frame_from_video(self, video): - while video.isOpened(): - success, frame = video.read() - if success: - yield frame - else: - break - - def run_on_video(self, video): - """ - Visualizes predictions on frames of the input video. - - Args: - video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be - either a webcam or a video file. - - Yields: - ndarray: BGR visualizations of each video frame. - """ - video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) - - def process_predictions(frame, predictions): - frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) - if "panoptic_seg" in predictions: - panoptic_seg, segments_info = predictions["panoptic_seg"] - vis_frame = video_visualizer.draw_panoptic_seg_predictions( - frame, panoptic_seg.to(self.cpu_device), segments_info - ) - elif "instances" in predictions: - predictions = predictions["instances"].to(self.cpu_device) - vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) - elif "sem_seg" in predictions: - vis_frame = video_visualizer.draw_sem_seg( - frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) - ) - - # Converts Matplotlib RGB format to OpenCV BGR format - vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) - return vis_frame - - frame_gen = self._frame_from_video(video) - if self.parallel: - buffer_size = self.predictor.default_buffer_size - - frame_data = deque() - - for cnt, frame in enumerate(frame_gen): - frame_data.append(frame) - self.predictor.put(frame) - - if cnt >= buffer_size: - frame = frame_data.popleft() - predictions = self.predictor.get() - yield process_predictions(frame, predictions) - - while len(frame_data): - frame = frame_data.popleft() - predictions = self.predictor.get() - yield process_predictions(frame, predictions) - else: - for frame in frame_gen: - yield process_predictions(frame, self.predictor(frame)) - - -class AsyncPredictor: - """ - A predictor that runs the model asynchronously, possibly on >1 GPUs. - Because rendering the visualization takes considerably amount of time, - this helps improve throughput when rendering videos. - """ - - class _StopToken: - pass - - class _PredictWorker(mp.Process): - def __init__(self, cfg, task_queue, result_queue): - self.cfg = cfg - self.task_queue = task_queue - self.result_queue = result_queue - super().__init__() - - def run(self): - predictor = DefaultPredictor(self.cfg) - - while True: - task = self.task_queue.get() - if isinstance(task, AsyncPredictor._StopToken): - break - idx, data = task - result = predictor(data) - self.result_queue.put((idx, result)) - - def __init__(self, cfg, num_gpus: int = 1): - """ - Args: - cfg (CfgNode): - num_gpus (int): if 0, will run on CPU - """ - num_workers = max(num_gpus, 1) - self.task_queue = mp.Queue(maxsize=num_workers * 3) - self.result_queue = mp.Queue(maxsize=num_workers * 3) - self.procs = [] - for gpuid in range(max(num_gpus, 1)): - cfg = cfg.clone() - cfg.defrost() - cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" - self.procs.append( - AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) - ) - - self.put_idx = 0 - self.get_idx = 0 - self.result_rank = [] - self.result_data = [] - - for p in self.procs: - p.start() - atexit.register(self.shutdown) - - def put(self, image): - self.put_idx += 1 - self.task_queue.put((self.put_idx, image)) - - def get(self): - self.get_idx += 1 # the index needed for this request - if len(self.result_rank) and self.result_rank[0] == self.get_idx: - res = self.result_data[0] - del self.result_data[0], self.result_rank[0] - return res - - while True: - # make sure the results are returned in the correct order - idx, res = self.result_queue.get() - if idx == self.get_idx: - return res - insert = bisect.bisect(self.result_rank, idx) - self.result_rank.insert(insert, idx) - self.result_data.insert(insert, res) - - def __len__(self): - return self.put_idx - self.get_idx - - def __call__(self, image): - self.put(image) - return self.get() - - def shutdown(self): - for _ in self.procs: - self.task_queue.put(AsyncPredictor._StopToken()) - - @property - def default_buffer_size(self): - return len(self.procs) * 5 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py deleted file mode 100644 index 41816af..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -from .utils.env import setup_environment - -setup_environment() - - -# This line will be programatically read/write by setup.py. -# Leave them at the bottom of this file and don't touch them. -__version__ = "0.1.3" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py deleted file mode 100644 index e17a9df..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# File: - - -from . import catalog as _UNUSED # register the handler -from .detection_checkpoint import DetectionCheckpointer -from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer - -__all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py deleted file mode 100644 index e27ba84..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/c2_model_loading.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import re -import torch -from fvcore.common.checkpoint import ( - get_missing_parameters_message, - get_unexpected_parameters_message, -) - - -def convert_basic_c2_names(original_keys): - """ - Apply some basic name conversion to names in C2 weights. - It only deals with typical backbone models. - - Args: - original_keys (list[str]): - Returns: - list[str]: The same number of strings matching those in original_keys. - """ - layer_keys = copy.deepcopy(original_keys) - layer_keys = [ - {"pred_b": "linear_b", "pred_w": "linear_w"}.get(k, k) for k in layer_keys - ] # some hard-coded mappings - - layer_keys = [k.replace("_", ".") for k in layer_keys] - layer_keys = [re.sub("\\.b$", ".bias", k) for k in layer_keys] - layer_keys = [re.sub("\\.w$", ".weight", k) for k in layer_keys] - # Uniform both bn and gn names to "norm" - layer_keys = [re.sub("bn\\.s$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.bias$", "norm.bias", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.rm", "norm.running_mean", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.running.mean$", "norm.running_mean", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.riv$", "norm.running_var", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.running.var$", "norm.running_var", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.gamma$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("bn\\.beta$", "norm.bias", k) for k in layer_keys] - layer_keys = [re.sub("gn\\.s$", "norm.weight", k) for k in layer_keys] - layer_keys = [re.sub("gn\\.bias$", "norm.bias", k) for k in layer_keys] - - # stem - layer_keys = [re.sub("^res\\.conv1\\.norm\\.", "conv1.norm.", k) for k in layer_keys] - # to avoid mis-matching with "conv1" in other components (e.g. detection head) - layer_keys = [re.sub("^conv1\\.", "stem.conv1.", k) for k in layer_keys] - - # layer1-4 is used by torchvision, however we follow the C2 naming strategy (res2-5) - # layer_keys = [re.sub("^res2.", "layer1.", k) for k in layer_keys] - # layer_keys = [re.sub("^res3.", "layer2.", k) for k in layer_keys] - # layer_keys = [re.sub("^res4.", "layer3.", k) for k in layer_keys] - # layer_keys = [re.sub("^res5.", "layer4.", k) for k in layer_keys] - - # blocks - layer_keys = [k.replace(".branch1.", ".shortcut.") for k in layer_keys] - layer_keys = [k.replace(".branch2a.", ".conv1.") for k in layer_keys] - layer_keys = [k.replace(".branch2b.", ".conv2.") for k in layer_keys] - layer_keys = [k.replace(".branch2c.", ".conv3.") for k in layer_keys] - - # DensePose substitutions - layer_keys = [re.sub("^body.conv.fcn", "body_conv_fcn", k) for k in layer_keys] - layer_keys = [k.replace("AnnIndex.lowres", "ann_index_lowres") for k in layer_keys] - layer_keys = [k.replace("Index.UV.lowres", "index_uv_lowres") for k in layer_keys] - layer_keys = [k.replace("U.lowres", "u_lowres") for k in layer_keys] - layer_keys = [k.replace("V.lowres", "v_lowres") for k in layer_keys] - return layer_keys - - -def convert_c2_detectron_names(weights): - """ - Map Caffe2 Detectron weight names to Detectron2 names. - - Args: - weights (dict): name -> tensor - - Returns: - dict: detectron2 names -> tensor - dict: detectron2 names -> C2 names - """ - logger = logging.getLogger(__name__) - logger.info("Remapping C2 weights ......") - original_keys = sorted(weights.keys()) - layer_keys = copy.deepcopy(original_keys) - - layer_keys = convert_basic_c2_names(layer_keys) - - # -------------------------------------------------------------------------- - # RPN hidden representation conv - # -------------------------------------------------------------------------- - # FPN case - # In the C2 model, the RPN hidden layer conv is defined for FPN level 2 and then - # shared for all other levels, hence the appearance of "fpn2" - layer_keys = [ - k.replace("conv.rpn.fpn2", "proposal_generator.rpn_head.conv") for k in layer_keys - ] - # Non-FPN case - layer_keys = [k.replace("conv.rpn", "proposal_generator.rpn_head.conv") for k in layer_keys] - - # -------------------------------------------------------------------------- - # RPN box transformation conv - # -------------------------------------------------------------------------- - # FPN case (see note above about "fpn2") - layer_keys = [ - k.replace("rpn.bbox.pred.fpn2", "proposal_generator.rpn_head.anchor_deltas") - for k in layer_keys - ] - layer_keys = [ - k.replace("rpn.cls.logits.fpn2", "proposal_generator.rpn_head.objectness_logits") - for k in layer_keys - ] - # Non-FPN case - layer_keys = [ - k.replace("rpn.bbox.pred", "proposal_generator.rpn_head.anchor_deltas") for k in layer_keys - ] - layer_keys = [ - k.replace("rpn.cls.logits", "proposal_generator.rpn_head.objectness_logits") - for k in layer_keys - ] - - # -------------------------------------------------------------------------- - # Fast R-CNN box head - # -------------------------------------------------------------------------- - layer_keys = [re.sub("^bbox\\.pred", "bbox_pred", k) for k in layer_keys] - layer_keys = [re.sub("^cls\\.score", "cls_score", k) for k in layer_keys] - layer_keys = [re.sub("^fc6\\.", "box_head.fc1.", k) for k in layer_keys] - layer_keys = [re.sub("^fc7\\.", "box_head.fc2.", k) for k in layer_keys] - # 4conv1fc head tensor names: head_conv1_w, head_conv1_gn_s - layer_keys = [re.sub("^head\\.conv", "box_head.conv", k) for k in layer_keys] - - # -------------------------------------------------------------------------- - # FPN lateral and output convolutions - # -------------------------------------------------------------------------- - def fpn_map(name): - """ - Look for keys with the following patterns: - 1) Starts with "fpn.inner." - Example: "fpn.inner.res2.2.sum.lateral.weight" - Meaning: These are lateral pathway convolutions - 2) Starts with "fpn.res" - Example: "fpn.res2.2.sum.weight" - Meaning: These are FPN output convolutions - """ - splits = name.split(".") - norm = ".norm" if "norm" in splits else "" - if name.startswith("fpn.inner."): - # splits example: ['fpn', 'inner', 'res2', '2', 'sum', 'lateral', 'weight'] - stage = int(splits[2][len("res") :]) - return "fpn_lateral{}{}.{}".format(stage, norm, splits[-1]) - elif name.startswith("fpn.res"): - # splits example: ['fpn', 'res2', '2', 'sum', 'weight'] - stage = int(splits[1][len("res") :]) - return "fpn_output{}{}.{}".format(stage, norm, splits[-1]) - return name - - layer_keys = [fpn_map(k) for k in layer_keys] - - # -------------------------------------------------------------------------- - # Mask R-CNN mask head - # -------------------------------------------------------------------------- - # roi_heads.StandardROIHeads case - layer_keys = [k.replace(".[mask].fcn", "mask_head.mask_fcn") for k in layer_keys] - layer_keys = [re.sub("^\\.mask\\.fcn", "mask_head.mask_fcn", k) for k in layer_keys] - layer_keys = [k.replace("mask.fcn.logits", "mask_head.predictor") for k in layer_keys] - # roi_heads.Res5ROIHeads case - layer_keys = [k.replace("conv5.mask", "mask_head.deconv") for k in layer_keys] - - # -------------------------------------------------------------------------- - # Keypoint R-CNN head - # -------------------------------------------------------------------------- - # interestingly, the keypoint head convs have blob names that are simply "conv_fcnX" - layer_keys = [k.replace("conv.fcn", "roi_heads.keypoint_head.conv_fcn") for k in layer_keys] - layer_keys = [ - k.replace("kps.score.lowres", "roi_heads.keypoint_head.score_lowres") for k in layer_keys - ] - layer_keys = [k.replace("kps.score.", "roi_heads.keypoint_head.score.") for k in layer_keys] - - # -------------------------------------------------------------------------- - # Done with replacements - # -------------------------------------------------------------------------- - assert len(set(layer_keys)) == len(layer_keys) - assert len(original_keys) == len(layer_keys) - - new_weights = {} - new_keys_to_original_keys = {} - for orig, renamed in zip(original_keys, layer_keys): - new_keys_to_original_keys[renamed] = orig - if renamed.startswith("bbox_pred.") or renamed.startswith("mask_head.predictor."): - # remove the meaningless prediction weight for background class - new_start_idx = 4 if renamed.startswith("bbox_pred.") else 1 - new_weights[renamed] = weights[orig][new_start_idx:] - logger.info( - "Remove prediction weight for background class in {}. The shape changes from " - "{} to {}.".format( - renamed, tuple(weights[orig].shape), tuple(new_weights[renamed].shape) - ) - ) - elif renamed.startswith("cls_score."): - # move weights of bg class from original index 0 to last index - logger.info( - "Move classification weights for background class in {} from index 0 to " - "index {}.".format(renamed, weights[orig].shape[0] - 1) - ) - new_weights[renamed] = torch.cat([weights[orig][1:], weights[orig][:1]]) - else: - new_weights[renamed] = weights[orig] - - return new_weights, new_keys_to_original_keys - - -# Note the current matching is not symmetric. -# it assumes model_state_dict will have longer names. -def align_and_update_state_dicts(model_state_dict, ckpt_state_dict, c2_conversion=True): - """ - Match names between the two state-dict, and update the values of model_state_dict in-place with - copies of the matched tensor in ckpt_state_dict. - If `c2_conversion==True`, `ckpt_state_dict` is assumed to be a Caffe2 - model and will be renamed at first. - - Strategy: suppose that the models that we will create will have prefixes appended - to each of its keys, for example due to an extra level of nesting that the original - pre-trained weights from ImageNet won't contain. For example, model.state_dict() - might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains - res2.conv1.weight. We thus want to match both parameters together. - For that, we look for each model weight, look among all loaded keys if there is one - that is a suffix of the current weight name, and use it if that's the case. - If multiple matches exist, take the one with longest size - of the corresponding name. For example, for the same model as before, the pretrained - weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, - we want to match backbone[0].body.conv1.weight to conv1.weight, and - backbone[0].body.res2.conv1.weight to res2.conv1.weight. - """ - model_keys = sorted(model_state_dict.keys()) - if c2_conversion: - ckpt_state_dict, original_keys = convert_c2_detectron_names(ckpt_state_dict) - # original_keys: the name in the original dict (before renaming) - else: - original_keys = {x: x for x in ckpt_state_dict.keys()} - ckpt_keys = sorted(ckpt_state_dict.keys()) - - def match(a, b): - # Matched ckpt_key should be a complete (starts with '.') suffix. - # For example, roi_heads.mesh_head.whatever_conv1 does not match conv1, - # but matches whatever_conv1 or mesh_head.whatever_conv1. - return a == b or a.endswith("." + b) - - # get a matrix of string matches, where each (i, j) entry correspond to the size of the - # ckpt_key string, if it matches - match_matrix = [len(j) if match(i, j) else 0 for i in model_keys for j in ckpt_keys] - match_matrix = torch.as_tensor(match_matrix).view(len(model_keys), len(ckpt_keys)) - # use the matched one with longest size in case of multiple matches - max_match_size, idxs = match_matrix.max(1) - # remove indices that correspond to no-match - idxs[max_match_size == 0] = -1 - - # used for logging - max_len_model = max(len(key) for key in model_keys) if model_keys else 1 - max_len_ckpt = max(len(key) for key in ckpt_keys) if ckpt_keys else 1 - log_str_template = "{: <{}} loaded from {: <{}} of shape {}" - logger = logging.getLogger(__name__) - # matched_pairs (matched checkpoint key --> matched model key) - matched_keys = {} - for idx_model, idx_ckpt in enumerate(idxs.tolist()): - if idx_ckpt == -1: - continue - key_model = model_keys[idx_model] - key_ckpt = ckpt_keys[idx_ckpt] - value_ckpt = ckpt_state_dict[key_ckpt] - shape_in_model = model_state_dict[key_model].shape - - if shape_in_model != value_ckpt.shape: - logger.warning( - "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( - key_ckpt, value_ckpt.shape, key_model, shape_in_model - ) - ) - logger.warning( - "{} will not be loaded. Please double check and see if this is desired.".format( - key_ckpt - ) - ) - continue - - model_state_dict[key_model] = value_ckpt.clone() - if key_ckpt in matched_keys: # already added to matched_keys - logger.error( - "Ambiguity found for {} in checkpoint!" - "It matches at least two keys in the model ({} and {}).".format( - key_ckpt, key_model, matched_keys[key_ckpt] - ) - ) - raise ValueError("Cannot match one checkpoint key to multiple keys in the model.") - - matched_keys[key_ckpt] = key_model - logger.info( - log_str_template.format( - key_model, - max_len_model, - original_keys[key_ckpt], - max_len_ckpt, - tuple(shape_in_model), - ) - ) - matched_model_keys = matched_keys.values() - matched_ckpt_keys = matched_keys.keys() - # print warnings about unmatched keys on both side - unmatched_model_keys = [k for k in model_keys if k not in matched_model_keys] - if len(unmatched_model_keys): - logger.info(get_missing_parameters_message(unmatched_model_keys)) - - unmatched_ckpt_keys = [k for k in ckpt_keys if k not in matched_ckpt_keys] - if len(unmatched_ckpt_keys): - logger.info( - get_unexpected_parameters_message(original_keys[x] for x in unmatched_ckpt_keys) - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py deleted file mode 100644 index 62f81f3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/catalog.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -from fvcore.common.file_io import PathHandler, PathManager - - -class ModelCatalog(object): - """ - Store mappings from names to third-party models. - """ - - S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" - - # MSRA models have STRIDE_IN_1X1=True. False otherwise. - # NOTE: all BN models here have fused BN into an affine layer. - # As a result, you should only load them to a model with "FrozenBN". - # Loading them to a model with regular BN or SyncBN is wrong. - # Even when loaded to FrozenBN, it is still different from affine by an epsilon, - # which should be negligible for training. - # NOTE: all models here uses PIXEL_STD=[1,1,1] - # NOTE: Most of the BN models here are no longer used. We use the - # re-converted pre-trained models under detectron2 model zoo instead. - C2_IMAGENET_MODELS = { - "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", - "MSRA/R-101": "ImageNetPretrained/MSRA/R-101.pkl", - "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", - "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", - "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", - "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", - "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", - } - - C2_DETECTRON_PATH_FORMAT = ( - "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" # noqa B950 - ) - - C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" - C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" - - # format: {model_name} -> part of the url - C2_DETECTRON_MODELS = { - "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 - "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 - "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 - "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 - "35858791/e2e_mask_rcnn_R-50-C4_1x": "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 - "35858933/e2e_mask_rcnn_R-50-FPN_1x": "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 - "35861795/e2e_mask_rcnn_R-101-FPN_1x": "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 - "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 - "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 - "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 - "35998355/rpn_R-50-C4_1x": "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 - "35998814/rpn_R-50-FPN_1x": "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 - "36225147/fast_R-50-FPN_1x": "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 - } - - @staticmethod - def get(name): - if name.startswith("Caffe2Detectron/COCO"): - return ModelCatalog._get_c2_detectron_baseline(name) - if name.startswith("ImageNetPretrained/"): - return ModelCatalog._get_c2_imagenet_pretrained(name) - raise RuntimeError("model not present in the catalog: {}".format(name)) - - @staticmethod - def _get_c2_imagenet_pretrained(name): - prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX - name = name[len("ImageNetPretrained/") :] - name = ModelCatalog.C2_IMAGENET_MODELS[name] - url = "/".join([prefix, name]) - return url - - @staticmethod - def _get_c2_detectron_baseline(name): - name = name[len("Caffe2Detectron/COCO/") :] - url = ModelCatalog.C2_DETECTRON_MODELS[name] - if "keypoint_rcnn" in name: - dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS - else: - dataset = ModelCatalog.C2_DATASET_COCO - - if "35998355/rpn_R-50-C4_1x" in name: - # this one model is somehow different from others .. - type = "rpn" - else: - type = "generalized_rcnn" - - # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. - url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( - prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset - ) - return url - - -class ModelCatalogHandler(PathHandler): - """ - Resolve URL like catalog://. - """ - - PREFIX = "catalog://" - - def _get_supported_prefixes(self): - return [self.PREFIX] - - def _get_local_path(self, path): - logger = logging.getLogger(__name__) - catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) - logger.info("Catalog entry {} points to {}".format(path, catalog_path)) - return PathManager.get_local_path(catalog_path) - - def _open(self, path, mode="r", **kwargs): - return PathManager.open(self._get_local_path(path), mode, **kwargs) - - -class Detectron2Handler(PathHandler): - """ - Resolve anything that's in Detectron2 model zoo. - """ - - PREFIX = "detectron2://" - S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" - - def _get_supported_prefixes(self): - return [self.PREFIX] - - def _get_local_path(self, path): - name = path[len(self.PREFIX) :] - return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) - - def _open(self, path, mode="r", **kwargs): - return PathManager.open(self._get_local_path(path), mode, **kwargs) - - -PathManager.register_handler(ModelCatalogHandler()) -PathManager.register_handler(Detectron2Handler()) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py deleted file mode 100644 index 06e6739..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/checkpoint/detection_checkpoint.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import pickle -from fvcore.common.checkpoint import Checkpointer -from fvcore.common.file_io import PathManager - -import detectron2.utils.comm as comm - -from .c2_model_loading import align_and_update_state_dicts - - -class DetectionCheckpointer(Checkpointer): - """ - Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 - model zoo, and apply conversions for legacy models. - """ - - def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): - is_main_process = comm.is_main_process() - super().__init__( - model, - save_dir, - save_to_disk=is_main_process if save_to_disk is None else save_to_disk, - **checkpointables, - ) - - def _load_file(self, filename): - if filename.endswith(".pkl"): - with PathManager.open(filename, "rb") as f: - data = pickle.load(f, encoding="latin1") - if "model" in data and "__author__" in data: - # file is in Detectron2 model zoo format - self.logger.info("Reading a file from '{}'".format(data["__author__"])) - return data - else: - # assume file is from Caffe2 / Detectron1 model zoo - if "blobs" in data: - # Detection models have "blobs", but ImageNet models don't - data = data["blobs"] - data = {k: v for k, v in data.items() if not k.endswith("_momentum")} - return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} - - loaded = super()._load_file(filename) # load native pth checkpoint - if "model" not in loaded: - loaded = {"model": loaded} - return loaded - - def _load_model(self, checkpoint): - if checkpoint.get("matching_heuristics", False): - self._convert_ndarray_to_tensor(checkpoint["model"]) - # convert weights by name-matching heuristics - model_state_dict = self.model.state_dict() - align_and_update_state_dicts( - model_state_dict, - checkpoint["model"], - c2_conversion=checkpoint.get("__author__", None) == "Caffe2", - ) - checkpoint["model"] = model_state_dict - # for non-caffe2 models, use standard ways to load it - incompatible = super()._load_model(checkpoint) - if incompatible is None: # support older versions of fvcore - return None - - model_buffers = dict(self.model.named_buffers(recurse=False)) - for k in ["pixel_mean", "pixel_std"]: - # Ignore missing key message about pixel_mean/std. - # Though they may be missing in old checkpoints, they will be correctly - # initialized from config anyway. - if k in model_buffers: - try: - incompatible.missing_keys.remove(k) - except ValueError: - pass - return incompatible diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py deleted file mode 100644 index f996ecd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .compat import downgrade_config, upgrade_config -from .config import CfgNode, get_cfg, global_cfg, set_global_cfg, configurable - -__all__ = [ - "CfgNode", - "get_cfg", - "global_cfg", - "set_global_cfg", - "downgrade_config", - "upgrade_config", - "configurable", -] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py deleted file mode 100644 index 41fe3a0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/compat.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Backward compatibility of configs. - -Instructions to bump version: -+ It's not needed to bump version if new keys are added. - It's only needed when backward-incompatible changes happen - (i.e., some existing keys disappear, or the meaning of a key changes) -+ To bump version, do the following: - 1. Increment _C.VERSION in defaults.py - 2. Add a converter in this file. - - Each ConverterVX has a function "upgrade" which in-place upgrades config from X-1 to X, - and a function "downgrade" which in-place downgrades config from X to X-1 - - In each function, VERSION is left unchanged. - - Each converter assumes that its input has the relevant keys - (i.e., the input is not a partial config). - 3. Run the tests (test_config.py) to make sure the upgrade & downgrade - functions are consistent. -""" - -import logging -from typing import List, Optional, Tuple - -from .config import CfgNode as CN -from .defaults import _C - -__all__ = ["upgrade_config", "downgrade_config"] - - -def upgrade_config(cfg: CN, to_version: Optional[int] = None) -> CN: - """ - Upgrade a config from its current version to a newer version. - - Args: - cfg (CfgNode): - to_version (int): defaults to the latest version. - """ - cfg = cfg.clone() - if to_version is None: - to_version = _C.VERSION - - assert cfg.VERSION <= to_version, "Cannot upgrade from v{} to v{}!".format( - cfg.VERSION, to_version - ) - for k in range(cfg.VERSION, to_version): - converter = globals()["ConverterV" + str(k + 1)] - converter.upgrade(cfg) - cfg.VERSION = k + 1 - return cfg - - -def downgrade_config(cfg: CN, to_version: int) -> CN: - """ - Downgrade a config from its current version to an older version. - - Args: - cfg (CfgNode): - to_version (int): - - Note: - A general downgrade of arbitrary configs is not always possible due to the - different functionalities in different versions. - The purpose of downgrade is only to recover the defaults in old versions, - allowing it to load an old partial yaml config. - Therefore, the implementation only needs to fill in the default values - in the old version when a general downgrade is not possible. - """ - cfg = cfg.clone() - assert cfg.VERSION >= to_version, "Cannot downgrade from v{} to v{}!".format( - cfg.VERSION, to_version - ) - for k in range(cfg.VERSION, to_version, -1): - converter = globals()["ConverterV" + str(k)] - converter.downgrade(cfg) - cfg.VERSION = k - 1 - return cfg - - -def guess_version(cfg: CN, filename: str) -> int: - """ - Guess the version of a partial config where the VERSION field is not specified. - Returns the version, or the latest if cannot make a guess. - - This makes it easier for users to migrate. - """ - logger = logging.getLogger(__name__) - - def _has(name: str) -> bool: - cur = cfg - for n in name.split("."): - if n not in cur: - return False - cur = cur[n] - return True - - # Most users' partial configs have "MODEL.WEIGHT", so guess on it - ret = None - if _has("MODEL.WEIGHT") or _has("TEST.AUG_ON"): - ret = 1 - - if ret is not None: - logger.warning("Config '{}' has no VERSION. Assuming it to be v{}.".format(filename, ret)) - else: - ret = _C.VERSION - logger.warning( - "Config '{}' has no VERSION. Assuming it to be compatible with latest v{}.".format( - filename, ret - ) - ) - return ret - - -def _rename(cfg: CN, old: str, new: str) -> None: - old_keys = old.split(".") - new_keys = new.split(".") - - def _set(key_seq: List[str], val: str) -> None: - cur = cfg - for k in key_seq[:-1]: - if k not in cur: - cur[k] = CN() - cur = cur[k] - cur[key_seq[-1]] = val - - def _get(key_seq: List[str]) -> CN: - cur = cfg - for k in key_seq: - cur = cur[k] - return cur - - def _del(key_seq: List[str]) -> None: - cur = cfg - for k in key_seq[:-1]: - cur = cur[k] - del cur[key_seq[-1]] - if len(cur) == 0 and len(key_seq) > 1: - _del(key_seq[:-1]) - - _set(new_keys, _get(old_keys)) - _del(old_keys) - - -class _RenameConverter: - """ - A converter that handles simple rename. - """ - - RENAME: List[Tuple[str, str]] = [] # list of tuples of (old name, new name) - - @classmethod - def upgrade(cls, cfg: CN) -> None: - for old, new in cls.RENAME: - _rename(cfg, old, new) - - @classmethod - def downgrade(cls, cfg: CN) -> None: - for old, new in cls.RENAME[::-1]: - _rename(cfg, new, old) - - -class ConverterV1(_RenameConverter): - RENAME = [("MODEL.RPN_HEAD.NAME", "MODEL.RPN.HEAD_NAME")] - - -class ConverterV2(_RenameConverter): - """ - A large bulk of rename, before public release. - """ - - RENAME = [ - ("MODEL.WEIGHT", "MODEL.WEIGHTS"), - ("MODEL.PANOPTIC_FPN.SEMANTIC_LOSS_SCALE", "MODEL.SEM_SEG_HEAD.LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.RPN_LOSS_SCALE", "MODEL.RPN.LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.INSTANCE_LOSS_SCALE", "MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT"), - ("MODEL.PANOPTIC_FPN.COMBINE_ON", "MODEL.PANOPTIC_FPN.COMBINE.ENABLED"), - ( - "MODEL.PANOPTIC_FPN.COMBINE_OVERLAP_THRESHOLD", - "MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH", - ), - ( - "MODEL.PANOPTIC_FPN.COMBINE_STUFF_AREA_LIMIT", - "MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT", - ), - ( - "MODEL.PANOPTIC_FPN.COMBINE_INSTANCES_CONFIDENCE_THRESHOLD", - "MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH", - ), - ("MODEL.ROI_HEADS.SCORE_THRESH", "MODEL.ROI_HEADS.SCORE_THRESH_TEST"), - ("MODEL.ROI_HEADS.NMS", "MODEL.ROI_HEADS.NMS_THRESH_TEST"), - ("MODEL.RETINANET.INFERENCE_SCORE_THRESHOLD", "MODEL.RETINANET.SCORE_THRESH_TEST"), - ("MODEL.RETINANET.INFERENCE_TOPK_CANDIDATES", "MODEL.RETINANET.TOPK_CANDIDATES_TEST"), - ("MODEL.RETINANET.INFERENCE_NMS_THRESHOLD", "MODEL.RETINANET.NMS_THRESH_TEST"), - ("TEST.DETECTIONS_PER_IMG", "TEST.DETECTIONS_PER_IMAGE"), - ("TEST.AUG_ON", "TEST.AUG.ENABLED"), - ("TEST.AUG_MIN_SIZES", "TEST.AUG.MIN_SIZES"), - ("TEST.AUG_MAX_SIZE", "TEST.AUG.MAX_SIZE"), - ("TEST.AUG_FLIP", "TEST.AUG.FLIP"), - ] - - @classmethod - def upgrade(cls, cfg: CN) -> None: - super().upgrade(cfg) - - if cfg.MODEL.META_ARCHITECTURE == "RetinaNet": - _rename( - cfg, "MODEL.RETINANET.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS" - ) - _rename(cfg, "MODEL.RETINANET.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") - del cfg["MODEL"]["RPN"]["ANCHOR_SIZES"] - del cfg["MODEL"]["RPN"]["ANCHOR_ASPECT_RATIOS"] - else: - _rename(cfg, "MODEL.RPN.ANCHOR_ASPECT_RATIOS", "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS") - _rename(cfg, "MODEL.RPN.ANCHOR_SIZES", "MODEL.ANCHOR_GENERATOR.SIZES") - del cfg["MODEL"]["RETINANET"]["ANCHOR_SIZES"] - del cfg["MODEL"]["RETINANET"]["ANCHOR_ASPECT_RATIOS"] - del cfg["MODEL"]["RETINANET"]["ANCHOR_STRIDES"] - - @classmethod - def downgrade(cls, cfg: CN) -> None: - super().downgrade(cfg) - - _rename(cfg, "MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS", "MODEL.RPN.ANCHOR_ASPECT_RATIOS") - _rename(cfg, "MODEL.ANCHOR_GENERATOR.SIZES", "MODEL.RPN.ANCHOR_SIZES") - cfg.MODEL.RETINANET.ANCHOR_ASPECT_RATIOS = cfg.MODEL.RPN.ANCHOR_ASPECT_RATIOS - cfg.MODEL.RETINANET.ANCHOR_SIZES = cfg.MODEL.RPN.ANCHOR_SIZES - cfg.MODEL.RETINANET.ANCHOR_STRIDES = [] # this is not used anywhere in any version diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py deleted file mode 100644 index 14ad524..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/config.py +++ /dev/null @@ -1,202 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import functools -import inspect -import logging -from fvcore.common.config import CfgNode as _CfgNode -from fvcore.common.file_io import PathManager - - -class CfgNode(_CfgNode): - """ - The same as `fvcore.common.config.CfgNode`, but different in: - - 1. Use unsafe yaml loading by default. - Note that this may lead to arbitrary code execution: you must not - load a config file from untrusted sources before manually inspecting - the content of the file. - 2. Support config versioning. - When attempting to merge an old config, it will convert the old config automatically. - """ - - # Note that the default value of allow_unsafe is changed to True - def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: - assert PathManager.isfile(cfg_filename), f"Config file '{cfg_filename}' does not exist!" - loaded_cfg = _CfgNode.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) - loaded_cfg = type(self)(loaded_cfg) - - # defaults.py needs to import CfgNode - from .defaults import _C - - latest_ver = _C.VERSION - assert ( - latest_ver == self.VERSION - ), "CfgNode.merge_from_file is only allowed on a config object of latest version!" - - logger = logging.getLogger(__name__) - - loaded_ver = loaded_cfg.get("VERSION", None) - if loaded_ver is None: - from .compat import guess_version - - loaded_ver = guess_version(loaded_cfg, cfg_filename) - assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( - loaded_ver, self.VERSION - ) - - if loaded_ver == self.VERSION: - self.merge_from_other_cfg(loaded_cfg) - else: - # compat.py needs to import CfgNode - from .compat import upgrade_config, downgrade_config - - logger.warning( - "Loading an old v{} config file '{}' by automatically upgrading to v{}. " - "See docs/CHANGELOG.md for instructions to update your files.".format( - loaded_ver, cfg_filename, self.VERSION - ) - ) - # To convert, first obtain a full config at an old version - old_self = downgrade_config(self, to_version=loaded_ver) - old_self.merge_from_other_cfg(loaded_cfg) - new_config = upgrade_config(old_self) - self.clear() - self.update(new_config) - - def dump(self, *args, **kwargs): - """ - Returns: - str: a yaml string representation of the config - """ - # to make it show up in docs - return super().dump(*args, **kwargs) - - -global_cfg = CfgNode() - - -def get_cfg() -> CfgNode: - """ - Get a copy of the default config. - - Returns: - a detectron2 CfgNode instance. - """ - from .defaults import _C - - return _C.clone() - - -def set_global_cfg(cfg: CfgNode) -> None: - """ - Let the global config point to the given cfg. - - Assume that the given "cfg" has the key "KEY", after calling - `set_global_cfg(cfg)`, the key can be accessed by: - - .. code-block:: python - - from detectron2.config import global_cfg - print(global_cfg.KEY) - - By using a hacky global config, you can access these configs anywhere, - without having to pass the config object or the values deep into the code. - This is a hacky feature introduced for quick prototyping / research exploration. - """ - global global_cfg - global_cfg.clear() - global_cfg.update(cfg) - - -def configurable(init_func): - """ - Decorate a class's __init__ method so that it can be called with a CfgNode - object using the class's from_config classmethod. - - Examples: - - .. code-block:: python - - class A: - @configurable - def __init__(self, a, b=2, c=3): - pass - - @classmethod - def from_config(cls, cfg): - # Returns kwargs to be passed to __init__ - return {"a": cfg.A, "b": cfg.B} - - a1 = A(a=1, b=2) # regular construction - a2 = A(cfg) # construct with a cfg - a3 = A(cfg, b=3, c=4) # construct with extra overwrite - """ - assert init_func.__name__ == "__init__", "@configurable should only be used for __init__!" - if init_func.__module__.startswith("detectron2."): - assert ( - init_func.__doc__ is not None and "experimental" in init_func.__doc__ - ), f"configurable {init_func} should be marked experimental" - - @functools.wraps(init_func) - def wrapped(self, *args, **kwargs): - try: - from_config_func = type(self).from_config - except AttributeError: - raise AttributeError("Class with @configurable must have a 'from_config' classmethod.") - if not inspect.ismethod(from_config_func): - raise TypeError("Class with @configurable must have a 'from_config' classmethod.") - - if _called_with_cfg(*args, **kwargs): - explicit_args = _get_args_from_config(from_config_func, *args, **kwargs) - init_func(self, **explicit_args) - else: - init_func(self, *args, **kwargs) - - return wrapped - - -def _get_args_from_config(from_config_func, *args, **kwargs): - """ - Use `from_config` to obtain explicit arguments. - - Returns: - dict: arguments to be used for cls.__init__ - """ - signature = inspect.signature(from_config_func) - if list(signature.parameters.keys())[0] != "cfg": - raise TypeError( - f"{from_config_func.__self__}.from_config must take 'cfg' as the first argument!" - ) - support_var_arg = any( - param.kind in [param.VAR_POSITIONAL, param.VAR_KEYWORD] - for param in signature.parameters.values() - ) - if support_var_arg: # forward all arguments to from_config, if from_config accepts them - ret = from_config_func(*args, **kwargs) - else: - # forward supported arguments to from_config - supported_arg_names = set(signature.parameters.keys()) - extra_kwargs = {} - for name in list(kwargs.keys()): - if name not in supported_arg_names: - extra_kwargs[name] = kwargs.pop(name) - ret = from_config_func(*args, **kwargs) - # forward the other arguments to __init__ - ret.update(extra_kwargs) - return ret - - -def _called_with_cfg(*args, **kwargs): - """ - Returns: - bool: whether the arguments contain CfgNode and should be considered - forwarded to from_config. - """ - if len(args) and isinstance(args[0], _CfgNode): - return True - if isinstance(kwargs.pop("cfg", None), _CfgNode): - return True - # `from_config`'s first argument is forced to be "cfg". - # So the above check covers all cases. - return False diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py deleted file mode 100644 index b9ad62f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/config/defaults.py +++ /dev/null @@ -1,598 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .config import CfgNode as CN - -# ----------------------------------------------------------------------------- -# Convention about Training / Test specific parameters -# ----------------------------------------------------------------------------- -# Whenever an argument can be either used for training or for testing, the -# corresponding name will be post-fixed by a _TRAIN for a training parameter, -# or _TEST for a test-specific parameter. -# For example, the number of images during training will be -# IMAGES_PER_BATCH_TRAIN, while the number of images for testing will be -# IMAGES_PER_BATCH_TEST - -# ----------------------------------------------------------------------------- -# Config definition -# ----------------------------------------------------------------------------- - -_C = CN() - -# The version number, to upgrade from old configs to new ones if any -# changes happen. It's recommended to keep a VERSION in your config file. -_C.VERSION = 2 - -_C.MODEL = CN() -_C.MODEL.LOAD_PROPOSALS = False -_C.MODEL.MASK_ON = False -_C.MODEL.KEYPOINT_ON = False -_C.MODEL.DEVICE = "cuda" -_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN" - -# Path (possibly with schema like catalog:// or detectron2://) to a checkpoint file -# to be loaded to the model. You can find available models in the model zoo. -_C.MODEL.WEIGHTS = "" - -# Values to be used for image normalization (BGR order, since INPUT.FORMAT defaults to BGR). -# To train on images of different number of channels, just set different mean & std. -# Default values are the mean pixel value from ImageNet: [103.53, 116.28, 123.675] -_C.MODEL.PIXEL_MEAN = [103.530, 116.280, 123.675] -# When using pre-trained models in Detectron1 or any MSRA models, -# std has been absorbed into its conv1 weights, so the std needs to be set 1. -# Otherwise, you can use [57.375, 57.120, 58.395] (ImageNet std) -_C.MODEL.PIXEL_STD = [1.0, 1.0, 1.0] - - -# ----------------------------------------------------------------------------- -# INPUT -# ----------------------------------------------------------------------------- -_C.INPUT = CN() -# Size of the smallest side of the image during training -_C.INPUT.MIN_SIZE_TRAIN = (800,) -# Sample size of smallest side by choice or random selection from range give by -# INPUT.MIN_SIZE_TRAIN -_C.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice" -# Maximum size of the side of the image during training -_C.INPUT.MAX_SIZE_TRAIN = 1333 -# Size of the smallest side of the image during testing. Set to zero to disable resize in testing. -_C.INPUT.MIN_SIZE_TEST = 800 -# Maximum size of the side of the image during testing -_C.INPUT.MAX_SIZE_TEST = 1333 - -# `True` if cropping is used for data augmentation during training -_C.INPUT.CROP = CN({"ENABLED": False}) -# Cropping type: -# - "relative" crop (H * CROP.SIZE[0], W * CROP.SIZE[1]) part of an input of size (H, W) -# - "relative_range" uniformly sample relative crop size from between [CROP.SIZE[0], [CROP.SIZE[1]]. -# and [1, 1] and use it as in "relative" scenario. -# - "absolute" crop part of an input with absolute size: (CROP.SIZE[0], CROP.SIZE[1]). -_C.INPUT.CROP.TYPE = "relative_range" -# Size of crop in range (0, 1] if CROP.TYPE is "relative" or "relative_range" and in number of -# pixels if CROP.TYPE is "absolute" -_C.INPUT.CROP.SIZE = [0.9, 0.9] - - -# Whether the model needs RGB, YUV, HSV etc. -# Should be one of the modes defined here, as we use PIL to read the image: -# https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-modes -# with BGR being the one exception. One can set image format to BGR, we will -# internally use RGB for conversion and flip the channels over -_C.INPUT.FORMAT = "BGR" -# The ground truth mask format that the model will use. -# Mask R-CNN supports either "polygon" or "bitmask" as ground truth. -_C.INPUT.MASK_FORMAT = "polygon" # alternative: "bitmask" - - -# ----------------------------------------------------------------------------- -# Dataset -# ----------------------------------------------------------------------------- -_C.DATASETS = CN() -# List of the dataset names for training. Must be registered in DatasetCatalog -_C.DATASETS.TRAIN = () -# List of the pre-computed proposal files for training, which must be consistent -# with data listed in DATASETS.TRAIN. -_C.DATASETS.PROPOSAL_FILES_TRAIN = () -# Number of top scoring precomputed proposals to keep for training -_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN = 2000 -# List of the dataset names for testing. Must be registered in DatasetCatalog -_C.DATASETS.TEST = () -# List of the pre-computed proposal files for test, which must be consistent -# with data listed in DATASETS.TEST. -_C.DATASETS.PROPOSAL_FILES_TEST = () -# Number of top scoring precomputed proposals to keep for test -_C.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST = 1000 - -# ----------------------------------------------------------------------------- -# DataLoader -# ----------------------------------------------------------------------------- -_C.DATALOADER = CN() -# Number of data loading threads -_C.DATALOADER.NUM_WORKERS = 4 -# If True, each batch should contain only images for which the aspect ratio -# is compatible. This groups portrait images together, and landscape images -# are not batched with portrait images. -_C.DATALOADER.ASPECT_RATIO_GROUPING = True -# Options: TrainingSampler, RepeatFactorTrainingSampler -_C.DATALOADER.SAMPLER_TRAIN = "TrainingSampler" -# Repeat threshold for RepeatFactorTrainingSampler -_C.DATALOADER.REPEAT_THRESHOLD = 0.0 -# if True, the dataloader will filter out images that have no associated -# annotations at train time. -_C.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True - -# ---------------------------------------------------------------------------- # -# Backbone options -# ---------------------------------------------------------------------------- # -_C.MODEL.BACKBONE = CN() - -_C.MODEL.BACKBONE.NAME = "build_resnet_backbone" -# Freeze the first several stages so they are not trained. -# There are 5 stages in ResNet. The first is a convolution, and the following -# stages are each group of residual blocks. -_C.MODEL.BACKBONE.FREEZE_AT = 2 - - -# ---------------------------------------------------------------------------- # -# FPN options -# ---------------------------------------------------------------------------- # -_C.MODEL.FPN = CN() -# Names of the input feature maps to be used by FPN -# They must have contiguous power of 2 strides -# e.g., ["res2", "res3", "res4", "res5"] -_C.MODEL.FPN.IN_FEATURES = [] -_C.MODEL.FPN.OUT_CHANNELS = 256 - -# Options: "" (no norm), "GN" -_C.MODEL.FPN.NORM = "" - -# Types for fusing the FPN top-down and lateral features. Can be either "sum" or "avg" -_C.MODEL.FPN.FUSE_TYPE = "sum" - - -# ---------------------------------------------------------------------------- # -# Proposal generator options -# ---------------------------------------------------------------------------- # -_C.MODEL.PROPOSAL_GENERATOR = CN() -# Current proposal generators include "RPN", "RRPN" and "PrecomputedProposals" -_C.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" -# Proposal height and width both need to be greater than MIN_SIZE -# (a the scale used during training or inference) -_C.MODEL.PROPOSAL_GENERATOR.MIN_SIZE = 0 - - -# ---------------------------------------------------------------------------- # -# Anchor generator options -# ---------------------------------------------------------------------------- # -_C.MODEL.ANCHOR_GENERATOR = CN() -# The generator can be any name in the ANCHOR_GENERATOR registry -_C.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" -# Anchor sizes (i.e. sqrt of area) in absolute pixels w.r.t. the network input. -# Format: list[list[float]]. SIZES[i] specifies the list of sizes -# to use for IN_FEATURES[i]; len(SIZES) == len(IN_FEATURES) must be true, -# or len(SIZES) == 1 is true and size list SIZES[0] is used for all -# IN_FEATURES. -_C.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]] -# Anchor aspect ratios. For each area given in `SIZES`, anchors with different aspect -# ratios are generated by an anchor generator. -# Format: list[list[float]]. ASPECT_RATIOS[i] specifies the list of aspect ratios (H/W) -# to use for IN_FEATURES[i]; len(ASPECT_RATIOS) == len(IN_FEATURES) must be true, -# or len(ASPECT_RATIOS) == 1 is true and aspect ratio list ASPECT_RATIOS[0] is used -# for all IN_FEATURES. -_C.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]] -# Anchor angles. -# list[list[float]], the angle in degrees, for each input feature map. -# ANGLES[i] specifies the list of angles for IN_FEATURES[i]. -_C.MODEL.ANCHOR_GENERATOR.ANGLES = [[-90, 0, 90]] -# Relative offset between the center of the first anchor and the top-left corner of the image -# Value has to be in [0, 1). Recommend to use 0.5, which means half stride. -# The value is not expected to affect model accuracy. -_C.MODEL.ANCHOR_GENERATOR.OFFSET = 0.0 - -# ---------------------------------------------------------------------------- # -# RPN options -# ---------------------------------------------------------------------------- # -_C.MODEL.RPN = CN() -_C.MODEL.RPN.HEAD_NAME = "StandardRPNHead" # used by RPN_HEAD_REGISTRY - -# Names of the input feature maps to be used by RPN -# e.g., ["p2", "p3", "p4", "p5", "p6"] for FPN -_C.MODEL.RPN.IN_FEATURES = ["res4"] -# Remove RPN anchors that go outside the image by BOUNDARY_THRESH pixels -# Set to -1 or a large value, e.g. 100000, to disable pruning anchors -_C.MODEL.RPN.BOUNDARY_THRESH = -1 -# IOU overlap ratios [BG_IOU_THRESHOLD, FG_IOU_THRESHOLD] -# Minimum overlap required between an anchor and ground-truth box for the -# (anchor, gt box) pair to be a positive example (IoU >= FG_IOU_THRESHOLD -# ==> positive RPN example: 1) -# Maximum overlap allowed between an anchor and ground-truth box for the -# (anchor, gt box) pair to be a negative examples (IoU < BG_IOU_THRESHOLD -# ==> negative RPN example: 0) -# Anchors with overlap in between (BG_IOU_THRESHOLD <= IoU < FG_IOU_THRESHOLD) -# are ignored (-1) -_C.MODEL.RPN.IOU_THRESHOLDS = [0.3, 0.7] -_C.MODEL.RPN.IOU_LABELS = [0, -1, 1] -# Total number of RPN examples per image -_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256 -# Target fraction of foreground (positive) examples per RPN minibatch -_C.MODEL.RPN.POSITIVE_FRACTION = 0.5 -# Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets -_C.MODEL.RPN.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. -_C.MODEL.RPN.SMOOTH_L1_BETA = 0.0 -_C.MODEL.RPN.LOSS_WEIGHT = 1.0 -# Number of top scoring RPN proposals to keep before applying NMS -# When FPN is used, this is *per FPN level* (not total) -_C.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 12000 -_C.MODEL.RPN.PRE_NMS_TOPK_TEST = 6000 -# Number of top scoring RPN proposals to keep after applying NMS -# When FPN is used, this limit is applied per level and then again to the union -# of proposals from all levels -# NOTE: When FPN is used, the meaning of this config is different from Detectron1. -# It means per-batch topk in Detectron1, but per-image topk here. -# See "modeling/rpn/rpn_outputs.py" for details. -_C.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000 -_C.MODEL.RPN.POST_NMS_TOPK_TEST = 1000 -# NMS threshold used on RPN proposals -_C.MODEL.RPN.NMS_THRESH = 0.7 - -# ---------------------------------------------------------------------------- # -# ROI HEADS options -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_HEADS = CN() -_C.MODEL.ROI_HEADS.NAME = "Res5ROIHeads" -# Number of foreground classes -_C.MODEL.ROI_HEADS.NUM_CLASSES = 80 -# Names of the input feature maps to be used by ROI heads -# Currently all heads (box, mask, ...) use the same input feature map list -# e.g., ["p2", "p3", "p4", "p5"] is commonly used for FPN -_C.MODEL.ROI_HEADS.IN_FEATURES = ["res4"] -# IOU overlap ratios [IOU_THRESHOLD] -# Overlap threshold for an RoI to be considered background (if < IOU_THRESHOLD) -# Overlap threshold for an RoI to be considered foreground (if >= IOU_THRESHOLD) -_C.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5] -_C.MODEL.ROI_HEADS.IOU_LABELS = [0, 1] -# RoI minibatch size *per image* (number of regions of interest [ROIs]) -# Total number of RoIs per training minibatch = -# ROI_HEADS.BATCH_SIZE_PER_IMAGE * SOLVER.IMS_PER_BATCH -# E.g., a common configuration is: 512 * 16 = 8192 -_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 -# Target fraction of RoI minibatch that is labeled foreground (i.e. class > 0) -_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25 - -# Only used on test mode - -# Minimum score threshold (assuming scores in a [0, 1] range); a value chosen to -# balance obtaining high recall with not having too many low precision -# detections that will slow down inference post processing steps (like NMS) -# A default threshold of 0.0 increases AP by ~0.2-0.3 but significantly slows down -# inference. -_C.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.05 -# Overlap threshold used for non-maximum suppression (suppress boxes with -# IoU >= this threshold) -_C.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5 -# If True, augment proposals with ground-truth boxes before sampling proposals to -# train ROI heads. -_C.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT = True - -# ---------------------------------------------------------------------------- # -# Box Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_BOX_HEAD = CN() -# C4 don't use head name option -# Options for non-C4 models: FastRCNNConvFCHead, -_C.MODEL.ROI_BOX_HEAD.NAME = "" -# Default weights on (dx, dy, dw, dh) for normalizing bbox regression targets -# These are empirically chosen to approximately lead to unit variance targets -_C.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0) -# The transition point from L1 to L2 loss. Set to 0.0 to make the loss simply L1. -_C.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA = 0.0 -_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0 -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" - -_C.MODEL.ROI_BOX_HEAD.NUM_FC = 0 -# Hidden layer dimension for FC layers in the RoI box head -_C.MODEL.ROI_BOX_HEAD.FC_DIM = 1024 -_C.MODEL.ROI_BOX_HEAD.NUM_CONV = 0 -# Channel dimension for Conv layers in the RoI box head -_C.MODEL.ROI_BOX_HEAD.CONV_DIM = 256 -# Normalization method for the convolution layers. -# Options: "" (no norm), "GN", "SyncBN". -_C.MODEL.ROI_BOX_HEAD.NORM = "" -# Whether to use class agnostic for bbox regression -_C.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG = False -# If true, RoI heads use bounding boxes predicted by the box head rather than proposal boxes. -_C.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES = False - -# ---------------------------------------------------------------------------- # -# Cascaded Box Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_BOX_CASCADE_HEAD = CN() -# The number of cascade stages is implicitly defined by the length of the following two configs. -_C.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS = ( - (10.0, 10.0, 5.0, 5.0), - (20.0, 20.0, 10.0, 10.0), - (30.0, 30.0, 15.0, 15.0), -) -_C.MODEL.ROI_BOX_CASCADE_HEAD.IOUS = (0.5, 0.6, 0.7) - - -# ---------------------------------------------------------------------------- # -# Mask Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_MASK_HEAD = CN() -_C.MODEL.ROI_MASK_HEAD.NAME = "MaskRCNNConvUpsampleHead" -_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0 -_C.MODEL.ROI_MASK_HEAD.NUM_CONV = 0 # The number of convs in the mask head -_C.MODEL.ROI_MASK_HEAD.CONV_DIM = 256 -# Normalization method for the convolution layers. -# Options: "" (no norm), "GN", "SyncBN". -_C.MODEL.ROI_MASK_HEAD.NORM = "" -# Whether to use class agnostic for mask prediction -_C.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK = False -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_MASK_HEAD.POOLER_TYPE = "ROIAlignV2" - - -# ---------------------------------------------------------------------------- # -# Keypoint Head -# ---------------------------------------------------------------------------- # -_C.MODEL.ROI_KEYPOINT_HEAD = CN() -_C.MODEL.ROI_KEYPOINT_HEAD.NAME = "KRCNNConvDeconvUpsampleHead" -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION = 14 -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO = 0 -_C.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS = tuple(512 for _ in range(8)) -_C.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 17 # 17 is the number of keypoints in COCO. - -# Images with too few (or no) keypoints are excluded from training. -_C.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE = 1 -# Normalize by the total number of visible keypoints in the minibatch if True. -# Otherwise, normalize by the total number of keypoints that could ever exist -# in the minibatch. -# The keypoint softmax loss is only calculated on visible keypoints. -# Since the number of visible keypoints can vary significantly between -# minibatches, this has the effect of up-weighting the importance of -# minibatches with few visible keypoints. (Imagine the extreme case of -# only one visible keypoint versus N: in the case of N, each one -# contributes 1/N to the gradient compared to the single keypoint -# determining the gradient direction). Instead, we can normalize the -# loss by the total number of keypoints, if it were the case that all -# keypoints were visible in a full minibatch. (Returning to the example, -# this means that the one visible keypoint contributes as much as each -# of the N keypoints.) -_C.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = True -# Multi-task loss weight to use for keypoints -# Recommended values: -# - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True -# - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False -_C.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 1.0 -# Type of pooling operation applied to the incoming feature map for each RoI -_C.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE = "ROIAlignV2" - -# ---------------------------------------------------------------------------- # -# Semantic Segmentation Head -# ---------------------------------------------------------------------------- # -_C.MODEL.SEM_SEG_HEAD = CN() -_C.MODEL.SEM_SEG_HEAD.NAME = "SemSegFPNHead" -_C.MODEL.SEM_SEG_HEAD.IN_FEATURES = ["p2", "p3", "p4", "p5"] -# Label in the semantic segmentation ground truth that is ignored, i.e., no loss is calculated for -# the correposnding pixel. -_C.MODEL.SEM_SEG_HEAD.IGNORE_VALUE = 255 -# Number of classes in the semantic segmentation head -_C.MODEL.SEM_SEG_HEAD.NUM_CLASSES = 54 -# Number of channels in the 3x3 convs inside semantic-FPN heads. -_C.MODEL.SEM_SEG_HEAD.CONVS_DIM = 128 -# Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. -_C.MODEL.SEM_SEG_HEAD.COMMON_STRIDE = 4 -# Normalization method for the convolution layers. Options: "" (no norm), "GN". -_C.MODEL.SEM_SEG_HEAD.NORM = "GN" -_C.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT = 1.0 - -_C.MODEL.PANOPTIC_FPN = CN() -# Scaling of all losses from instance detection / segmentation head. -_C.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT = 1.0 - -# options when combining instance & semantic segmentation outputs -_C.MODEL.PANOPTIC_FPN.COMBINE = CN({"ENABLED": True}) -_C.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH = 0.5 -_C.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT = 4096 -_C.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5 - - -# ---------------------------------------------------------------------------- # -# RetinaNet Head -# ---------------------------------------------------------------------------- # -_C.MODEL.RETINANET = CN() - -# This is the number of foreground classes. -_C.MODEL.RETINANET.NUM_CLASSES = 80 - -_C.MODEL.RETINANET.IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"] - -# Convolutions to use in the cls and bbox tower -# NOTE: this doesn't include the last conv for logits -_C.MODEL.RETINANET.NUM_CONVS = 4 - -# IoU overlap ratio [bg, fg] for labeling anchors. -# Anchors with < bg are labeled negative (0) -# Anchors with >= bg and < fg are ignored (-1) -# Anchors with >= fg are labeled positive (1) -_C.MODEL.RETINANET.IOU_THRESHOLDS = [0.4, 0.5] -_C.MODEL.RETINANET.IOU_LABELS = [0, -1, 1] - -# Prior prob for rare case (i.e. foreground) at the beginning of training. -# This is used to set the bias for the logits layer of the classifier subnet. -# This improves training stability in the case of heavy class imbalance. -_C.MODEL.RETINANET.PRIOR_PROB = 0.01 - -# Inference cls score threshold, only anchors with score > INFERENCE_TH are -# considered for inference (to improve speed) -_C.MODEL.RETINANET.SCORE_THRESH_TEST = 0.05 -_C.MODEL.RETINANET.TOPK_CANDIDATES_TEST = 1000 -_C.MODEL.RETINANET.NMS_THRESH_TEST = 0.5 - -# Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets -_C.MODEL.RETINANET.BBOX_REG_WEIGHTS = (1.0, 1.0, 1.0, 1.0) - -# Loss parameters -_C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0 -_C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25 -_C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1 - - -# ---------------------------------------------------------------------------- # -# ResNe[X]t options (ResNets = {ResNet, ResNeXt} -# Note that parts of a resnet may be used for both the backbone and the head -# These options apply to both -# ---------------------------------------------------------------------------- # -_C.MODEL.RESNETS = CN() - -_C.MODEL.RESNETS.DEPTH = 50 -_C.MODEL.RESNETS.OUT_FEATURES = ["res4"] # res4 for C4 backbone, res2..5 for FPN backbone - -# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt -_C.MODEL.RESNETS.NUM_GROUPS = 1 - -# Options: FrozenBN, GN, "SyncBN", "BN" -_C.MODEL.RESNETS.NORM = "FrozenBN" - -# Baseline width of each group. -# Scaling this parameters will scale the width of all bottleneck layers. -_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64 - -# Place the stride 2 conv on the 1x1 filter -# Use True only for the original MSRA ResNet; use False for C2 and Torch models -_C.MODEL.RESNETS.STRIDE_IN_1X1 = True - -# Apply dilation in stage "res5" -_C.MODEL.RESNETS.RES5_DILATION = 1 - -# Output width of res2. Scaling this parameters will scale the width of all 1x1 convs in ResNet -# For R18 and R34, this needs to be set to 64 -_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256 -_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64 - -# Apply Deformable Convolution in stages -# Specify if apply deform_conv on Res2, Res3, Res4, Res5 -_C.MODEL.RESNETS.DEFORM_ON_PER_STAGE = [False, False, False, False] -# Use True to use modulated deform_conv (DeformableV2, https://arxiv.org/abs/1811.11168); -# Use False for DeformableV1. -_C.MODEL.RESNETS.DEFORM_MODULATED = False -# Number of groups in deformable conv. -_C.MODEL.RESNETS.DEFORM_NUM_GROUPS = 1 - - -# ---------------------------------------------------------------------------- # -# Solver -# ---------------------------------------------------------------------------- # -_C.SOLVER = CN() - -# See detectron2/solver/build.py for LR scheduler options -_C.SOLVER.LR_SCHEDULER_NAME = "WarmupMultiStepLR" - -_C.SOLVER.MAX_ITER = 40000 - -_C.SOLVER.BASE_LR = 0.001 - -_C.SOLVER.MOMENTUM = 0.9 - -_C.SOLVER.NESTEROV = False - -_C.SOLVER.WEIGHT_DECAY = 0.0001 -# The weight decay that's applied to parameters of normalization layers -# (typically the affine transformation) -_C.SOLVER.WEIGHT_DECAY_NORM = 0.0 - -_C.SOLVER.GAMMA = 0.1 -# The iteration number to decrease learning rate by GAMMA. -_C.SOLVER.STEPS = (30000,) - -_C.SOLVER.WARMUP_FACTOR = 1.0 / 1000 -_C.SOLVER.WARMUP_ITERS = 1000 -_C.SOLVER.WARMUP_METHOD = "linear" - -# Save a checkpoint after every this number of iterations -_C.SOLVER.CHECKPOINT_PERIOD = 5000 - -# Number of images per batch across all machines. -# If we have 16 GPUs and IMS_PER_BATCH = 32, -# each GPU will see 2 images per batch. -_C.SOLVER.IMS_PER_BATCH = 16 - -# Detectron v1 (and previous detection code) used a 2x higher LR and 0 WD for -# biases. This is not useful (at least for recent models). You should avoid -# changing these and they exist only to reproduce Detectron v1 training if -# desired. -_C.SOLVER.BIAS_LR_FACTOR = 1.0 -_C.SOLVER.WEIGHT_DECAY_BIAS = _C.SOLVER.WEIGHT_DECAY - -# Gradient clipping -_C.SOLVER.CLIP_GRADIENTS = CN({"ENABLED": False}) -# Type of gradient clipping, currently 2 values are supported: -# - "value": the absolute values of elements of each gradients are clipped -# - "norm": the norm of the gradient for each parameter is clipped thus -# affecting all elements in the parameter -_C.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value" -# Maximum absolute value used for clipping gradients -_C.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0 -# Floating point number p for L-p norm to be used with the "norm" -# gradient clipping type; for L-inf, please specify .inf -_C.SOLVER.CLIP_GRADIENTS.NORM_TYPE = 2.0 - -# ---------------------------------------------------------------------------- # -# Specific test options -# ---------------------------------------------------------------------------- # -_C.TEST = CN() -# For end-to-end tests to verify the expected accuracy. -# Each item is [task, metric, value, tolerance] -# e.g.: [['bbox', 'AP', 38.5, 0.2]] -_C.TEST.EXPECTED_RESULTS = [] -# The period (in terms of steps) to evaluate the model during training. -# Set to 0 to disable. -_C.TEST.EVAL_PERIOD = 0 -# The sigmas used to calculate keypoint OKS. See http://cocodataset.org/#keypoints-eval -# When empty it will use the defaults in COCO. -# Otherwise it should have the same length as ROI_KEYPOINT_HEAD.NUM_KEYPOINTS. -_C.TEST.KEYPOINT_OKS_SIGMAS = [] -# Maximum number of detections to return per image during inference (100 is -# based on the limit established for the COCO dataset). -_C.TEST.DETECTIONS_PER_IMAGE = 100 - -_C.TEST.AUG = CN({"ENABLED": False}) -_C.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) -_C.TEST.AUG.MAX_SIZE = 4000 -_C.TEST.AUG.FLIP = True - -_C.TEST.PRECISE_BN = CN({"ENABLED": False}) -_C.TEST.PRECISE_BN.NUM_ITER = 200 - -# ---------------------------------------------------------------------------- # -# Misc options -# ---------------------------------------------------------------------------- # -# Directory where output files are written -_C.OUTPUT_DIR = "./output" -# Set seed to negative to fully randomize everything. -# Set seed to positive to use a fixed seed. Note that a fixed seed increases -# reproducibility but does not guarantee fully deterministic behavior. -# Disabling all parallelism further increases reproducibility. -_C.SEED = -1 -# Benchmark different cudnn algorithms. -# If input images have very different sizes, this option will have large overhead -# for about 10k iterations. It usually hurts total time, but can benefit for certain models. -# If input images have the same or similar sizes, benchmark is often helpful. -_C.CUDNN_BENCHMARK = False -# The period (in terms of steps) for minibatch visualization at train time. -# Set to 0 to disable. -_C.VIS_PERIOD = 0 - -# global config is for quick hack purposes. -# You can set them in command line or config files, -# and access it with: -# -# from detectron2.config import global_cfg -# print(global_cfg.HACK) -# -# Do not commit any configs into it. -_C.GLOBAL = CN() -_C.GLOBAL.HACK = 1.0 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py deleted file mode 100644 index e8f72e0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from . import transforms # isort:skip - -from .build import ( - build_detection_test_loader, - build_detection_train_loader, - get_detection_dataset_dicts, - load_proposals_into_dataset, - print_instances_class_histogram, -) -from .catalog import DatasetCatalog, MetadataCatalog -from .common import DatasetFromList, MapDataset -from .dataset_mapper import DatasetMapper - -# ensure the builtin data are registered -from . import datasets, samplers # isort:skip - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py deleted file mode 100644 index cb7e857..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/build.py +++ /dev/null @@ -1,397 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import bisect -import copy -import itertools -import logging -import numpy as np -import operator -import pickle -import torch.utils.data -from fvcore.common.file_io import PathManager -from tabulate import tabulate -from termcolor import colored - -from detectron2.structures import BoxMode -from detectron2.utils.comm import get_world_size -from detectron2.utils.env import seed_all_rng -from detectron2.utils.logger import log_first_n - -from . import samplers -from .catalog import DatasetCatalog, MetadataCatalog -from .common import AspectRatioGroupedDataset, DatasetFromList, MapDataset -from .dataset_mapper import DatasetMapper -from .detection_utils import check_metadata_consistency - -""" -This file contains the default logic to build a dataloader for training or testing. -""" - -__all__ = [ - "build_detection_train_loader", - "build_detection_test_loader", - "get_detection_dataset_dicts", - "load_proposals_into_dataset", - "print_instances_class_histogram", -] - - -def filter_images_with_only_crowd_annotations(dataset_dicts): - """ - Filter out images with none annotations or only crowd annotations - (i.e., images without non-crowd annotations). - A common training-time preprocessing on COCO dataset. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - - Returns: - list[dict]: the same format, but filtered. - """ - num_before = len(dataset_dicts) - - def valid(anns): - for ann in anns: - if ann.get("iscrowd", 0) == 0: - return True - return False - - dataset_dicts = [x for x in dataset_dicts if valid(x["annotations"])] - num_after = len(dataset_dicts) - logger = logging.getLogger(__name__) - logger.info( - "Removed {} images with no usable annotations. {} images left.".format( - num_before - num_after, num_after - ) - ) - return dataset_dicts - - -def filter_images_with_few_keypoints(dataset_dicts, min_keypoints_per_image): - """ - Filter out images with too few number of keypoints. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - - Returns: - list[dict]: the same format as dataset_dicts, but filtered. - """ - num_before = len(dataset_dicts) - - def visible_keypoints_in_image(dic): - # Each keypoints field has the format [x1, y1, v1, ...], where v is visibility - annotations = dic["annotations"] - return sum( - (np.array(ann["keypoints"][2::3]) > 0).sum() - for ann in annotations - if "keypoints" in ann - ) - - dataset_dicts = [ - x for x in dataset_dicts if visible_keypoints_in_image(x) >= min_keypoints_per_image - ] - num_after = len(dataset_dicts) - logger = logging.getLogger(__name__) - logger.info( - "Removed {} images with fewer than {} keypoints.".format( - num_before - num_after, min_keypoints_per_image - ) - ) - return dataset_dicts - - -def load_proposals_into_dataset(dataset_dicts, proposal_file): - """ - Load precomputed object proposals into the dataset. - - The proposal file should be a pickled dict with the following keys: - - - "ids": list[int] or list[str], the image ids - - "boxes": list[np.ndarray], each is an Nx4 array of boxes corresponding to the image id - - "objectness_logits": list[np.ndarray], each is an N sized array of objectness scores - corresponding to the boxes. - - "bbox_mode": the BoxMode of the boxes array. Defaults to ``BoxMode.XYXY_ABS``. - - Args: - dataset_dicts (list[dict]): annotations in Detectron2 Dataset format. - proposal_file (str): file path of pre-computed proposals, in pkl format. - - Returns: - list[dict]: the same format as dataset_dicts, but added proposal field. - """ - logger = logging.getLogger(__name__) - logger.info("Loading proposals from: {}".format(proposal_file)) - - with PathManager.open(proposal_file, "rb") as f: - proposals = pickle.load(f, encoding="latin1") - - # Rename the key names in D1 proposal files - rename_keys = {"indexes": "ids", "scores": "objectness_logits"} - for key in rename_keys: - if key in proposals: - proposals[rename_keys[key]] = proposals.pop(key) - - # Fetch the indexes of all proposals that are in the dataset - # Convert image_id to str since they could be int. - img_ids = set({str(record["image_id"]) for record in dataset_dicts}) - id_to_index = {str(id): i for i, id in enumerate(proposals["ids"]) if str(id) in img_ids} - - # Assuming default bbox_mode of precomputed proposals are 'XYXY_ABS' - bbox_mode = BoxMode(proposals["bbox_mode"]) if "bbox_mode" in proposals else BoxMode.XYXY_ABS - - for record in dataset_dicts: - # Get the index of the proposal - i = id_to_index[str(record["image_id"])] - - boxes = proposals["boxes"][i] - objectness_logits = proposals["objectness_logits"][i] - # Sort the proposals in descending order of the scores - inds = objectness_logits.argsort()[::-1] - record["proposal_boxes"] = boxes[inds] - record["proposal_objectness_logits"] = objectness_logits[inds] - record["proposal_bbox_mode"] = bbox_mode - - return dataset_dicts - - -def _quantize(x, bin_edges): - bin_edges = copy.copy(bin_edges) - bin_edges = sorted(bin_edges) - quantized = list(map(lambda y: bisect.bisect_right(bin_edges, y), x)) - return quantized - - -def print_instances_class_histogram(dataset_dicts, class_names): - """ - Args: - dataset_dicts (list[dict]): list of dataset dicts. - class_names (list[str]): list of class names (zero-indexed). - """ - num_classes = len(class_names) - hist_bins = np.arange(num_classes + 1) - histogram = np.zeros((num_classes,), dtype=np.int) - for entry in dataset_dicts: - annos = entry["annotations"] - classes = [x["category_id"] for x in annos if not x.get("iscrowd", 0)] - histogram += np.histogram(classes, bins=hist_bins)[0] - - N_COLS = min(6, len(class_names) * 2) - - def short_name(x): - # make long class names shorter. useful for lvis - if len(x) > 13: - return x[:11] + ".." - return x - - data = list( - itertools.chain(*[[short_name(class_names[i]), int(v)] for i, v in enumerate(histogram)]) - ) - total_num_instances = sum(data[1::2]) - data.extend([None] * (N_COLS - (len(data) % N_COLS))) - if num_classes > 1: - data.extend(["total", total_num_instances]) - data = itertools.zip_longest(*[data[i::N_COLS] for i in range(N_COLS)]) - table = tabulate( - data, - headers=["category", "#instances"] * (N_COLS // 2), - tablefmt="pipe", - numalign="left", - stralign="center", - ) - log_first_n( - logging.INFO, - "Distribution of instances among all {} categories:\n".format(num_classes) - + colored(table, "cyan"), - key="message", - ) - - -def get_detection_dataset_dicts( - dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None -): - """ - Load and prepare dataset dicts for instance detection/segmentation and semantic segmentation. - - Args: - dataset_names (list[str]): a list of dataset names - filter_empty (bool): whether to filter out images without instance annotations - min_keypoints (int): filter out images with fewer keypoints than - `min_keypoints`. Set to 0 to do nothing. - proposal_files (list[str]): if given, a list of object proposal files - that match each dataset in `dataset_names`. - """ - assert len(dataset_names) - dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] - for dataset_name, dicts in zip(dataset_names, dataset_dicts): - assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) - - if proposal_files is not None: - assert len(dataset_names) == len(proposal_files) - # load precomputed proposals from proposal files - dataset_dicts = [ - load_proposals_into_dataset(dataset_i_dicts, proposal_file) - for dataset_i_dicts, proposal_file in zip(dataset_dicts, proposal_files) - ] - - dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) - - has_instances = "annotations" in dataset_dicts[0] - # Keep images without instance-level GT if the dataset has semantic labels. - if filter_empty and has_instances and "sem_seg_file_name" not in dataset_dicts[0]: - dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) - - if min_keypoints > 0 and has_instances: - dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints) - - if has_instances: - try: - class_names = MetadataCatalog.get(dataset_names[0]).thing_classes - check_metadata_consistency("thing_classes", dataset_names) - print_instances_class_histogram(dataset_dicts, class_names) - except AttributeError: # class names are not available for this dataset - pass - return dataset_dicts - - -def build_detection_train_loader(cfg, mapper=None): - """ - A data loader is created by the following steps: - - 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. - 2. Coordinate a random shuffle order shared among all processes (all GPUs) - 3. Each process spawn another few workers to process the dicts. Each worker will: - * Map each metadata dict into another format to be consumed by the model. - * Batch them by simply putting dicts into a list. - - The batched ``list[mapped_dict]`` is what this dataloader will yield. - - Args: - cfg (CfgNode): the config - mapper (callable): a callable which takes a sample (dict) from dataset and - returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, True)`. - - Returns: - an infinite iterator of training data - """ - num_workers = get_world_size() - images_per_batch = cfg.SOLVER.IMS_PER_BATCH - assert ( - images_per_batch % num_workers == 0 - ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( - images_per_batch, num_workers - ) - assert ( - images_per_batch >= num_workers - ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( - images_per_batch, num_workers - ) - images_per_worker = images_per_batch // num_workers - - dataset_dicts = get_detection_dataset_dicts( - cfg.DATASETS.TRAIN, - filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, - min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE - if cfg.MODEL.KEYPOINT_ON - else 0, - proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, - ) - dataset = DatasetFromList(dataset_dicts, copy=False) - - if mapper is None: - mapper = DatasetMapper(cfg, True) - dataset = MapDataset(dataset, mapper) - - sampler_name = cfg.DATALOADER.SAMPLER_TRAIN - logger = logging.getLogger(__name__) - logger.info("Using training sampler {}".format(sampler_name)) - if sampler_name == "TrainingSampler": - sampler = samplers.TrainingSampler(len(dataset)) - elif sampler_name == "RepeatFactorTrainingSampler": - sampler = samplers.RepeatFactorTrainingSampler( - dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD - ) - else: - raise ValueError("Unknown training sampler: {}".format(sampler_name)) - - if cfg.DATALOADER.ASPECT_RATIO_GROUPING: - data_loader = torch.utils.data.DataLoader( - dataset, - sampler=sampler, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=None, - collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements - worker_init_fn=worker_init_reset_seed, - ) # yield individual mapped dict - data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) - else: - batch_sampler = torch.utils.data.sampler.BatchSampler( - sampler, images_per_worker, drop_last=True - ) - # drop_last so the batch always have the same size - data_loader = torch.utils.data.DataLoader( - dataset, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=batch_sampler, - collate_fn=trivial_batch_collator, - worker_init_fn=worker_init_reset_seed, - ) - - return data_loader - - -def build_detection_test_loader(cfg, dataset_name, mapper=None): - """ - Similar to `build_detection_train_loader`. - But this function uses the given `dataset_name` argument (instead of the names in cfg), - and uses batch size 1. - - Args: - cfg: a detectron2 CfgNode - dataset_name (str): a name of the dataset that's available in the DatasetCatalog - mapper (callable): a callable which takes a sample (dict) from dataset - and returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, False)`. - - Returns: - DataLoader: a torch DataLoader, that loads the given detection - dataset, with test-time transformation and batching. - """ - dataset_dicts = get_detection_dataset_dicts( - [dataset_name], - filter_empty=False, - proposal_files=[ - cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] - ] - if cfg.MODEL.LOAD_PROPOSALS - else None, - ) - - dataset = DatasetFromList(dataset_dicts) - if mapper is None: - mapper = DatasetMapper(cfg, False) - dataset = MapDataset(dataset, mapper) - - sampler = samplers.InferenceSampler(len(dataset)) - # Always use 1 image per worker during inference since this is the - # standard when reporting inference time in papers. - batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) - - data_loader = torch.utils.data.DataLoader( - dataset, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=batch_sampler, - collate_fn=trivial_batch_collator, - ) - return data_loader - - -def trivial_batch_collator(batch): - """ - A batch collator that does nothing. - """ - return batch - - -def worker_init_reset_seed(worker_id): - seed_all_rng(np.random.randint(2 ** 31) + worker_id) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py deleted file mode 100644 index 57f18c8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/catalog.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import types -from typing import List - -from detectron2.utils.logger import log_first_n - -__all__ = ["DatasetCatalog", "MetadataCatalog"] - - -class DatasetCatalog(object): - """ - A catalog that stores information about the data and how to obtain them. - - It contains a mapping from strings - (which are names that identify a dataset, e.g. "coco_2014_train") - to a function which parses the dataset and returns the samples in the - format of `list[dict]`. - - The returned dicts should be in Detectron2 Dataset format (See DATASETS.md for details) - if used with the data loader functionalities in `data/build.py,data/detection_transform.py`. - - The purpose of having this catalog is to make it easy to choose - different data, by just using the strings in the config. - """ - - _REGISTERED = {} - - @staticmethod - def register(name, func): - """ - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - func (callable): a callable which takes no arguments and returns a list of dicts. - """ - assert callable(func), "You must register a function with `DatasetCatalog.register`!" - assert name not in DatasetCatalog._REGISTERED, "Dataset '{}' is already registered!".format( - name - ) - DatasetCatalog._REGISTERED[name] = func - - @staticmethod - def get(name): - """ - Call the registered function and return its results. - - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - - Returns: - list[dict]: dataset annotations.0 - """ - try: - f = DatasetCatalog._REGISTERED[name] - except KeyError: - raise KeyError( - "Dataset '{}' is not registered! Available data are: {}".format( - name, ", ".join(DatasetCatalog._REGISTERED.keys()) - ) - ) - return f() - - @staticmethod - def list() -> List[str]: - """ - List all registered data. - - Returns: - list[str] - """ - return list(DatasetCatalog._REGISTERED.keys()) - - @staticmethod - def clear(): - """ - Remove all registered dataset. - """ - DatasetCatalog._REGISTERED.clear() - - -class Metadata(types.SimpleNamespace): - """ - A class that supports simple attribute setter/getter. - It is intended for storing metadata of a dataset and make it accessible globally. - - Examples: - - .. code-block:: python - - # somewhere when you load the data: - MetadataCatalog.get("mydataset").thing_classes = ["person", "dog"] - - # somewhere when you print statistics or visualize: - classes = MetadataCatalog.get("mydataset").thing_classes - """ - - # the name of the dataset - # set default to N/A so that `self.name` in the errors will not trigger getattr again - name: str = "N/A" - - _RENAMED = { - "class_names": "thing_classes", - "dataset_id_to_contiguous_id": "thing_dataset_id_to_contiguous_id", - "stuff_class_names": "stuff_classes", - } - - def __getattr__(self, key): - if key in self._RENAMED: - log_first_n( - logging.WARNING, - "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), - n=10, - ) - return getattr(self, self._RENAMED[key]) - - raise AttributeError( - "Attribute '{}' does not exist in the metadata of '{}'. Available keys are {}.".format( - key, self.name, str(self.__dict__.keys()) - ) - ) - - def __setattr__(self, key, val): - if key in self._RENAMED: - log_first_n( - logging.WARNING, - "Metadata '{}' was renamed to '{}'!".format(key, self._RENAMED[key]), - n=10, - ) - setattr(self, self._RENAMED[key], val) - - # Ensure that metadata of the same name stays consistent - try: - oldval = getattr(self, key) - assert oldval == val, ( - "Attribute '{}' in the metadata of '{}' cannot be set " - "to a different value!\n{} != {}".format(key, self.name, oldval, val) - ) - except AttributeError: - super().__setattr__(key, val) - - def as_dict(self): - """ - Returns all the metadata as a dict. - Note that modifications to the returned dict will not reflect on the Metadata object. - """ - return copy.copy(self.__dict__) - - def set(self, **kwargs): - """ - Set multiple metadata with kwargs. - """ - for k, v in kwargs.items(): - setattr(self, k, v) - return self - - def get(self, key, default=None): - """ - Access an attribute and return its value if exists. - Otherwise return default. - """ - try: - return getattr(self, key) - except AttributeError: - return default - - -class MetadataCatalog: - """ - MetadataCatalog provides access to "Metadata" of a given dataset. - - The metadata associated with a certain name is a singleton: once created, - the metadata will stay alive and will be returned by future calls to `get(name)`. - - It's like global variables, so don't abuse it. - It's meant for storing knowledge that's constant and shared across the execution - of the program, e.g.: the class names in COCO. - """ - - _NAME_TO_META = {} - - @staticmethod - def get(name): - """ - Args: - name (str): name of a dataset (e.g. coco_2014_train). - - Returns: - Metadata: The :class:`Metadata` instance associated with this name, - or create an empty one if none is available. - """ - assert len(name) - if name in MetadataCatalog._NAME_TO_META: - ret = MetadataCatalog._NAME_TO_META[name] - # TODO this is for the BC breaking change in D15247032. - # Remove this in the future. - if hasattr(ret, "dataset_name"): - logger = logging.getLogger() - logger.warning( - """ -The 'dataset_name' key in metadata is no longer used for -sharing metadata among splits after D15247032! Add -metadata to each split (now called dataset) separately! - """ - ) - parent_meta = MetadataCatalog.get(ret.dataset_name).as_dict() - ret.set(**parent_meta) - return ret - else: - m = MetadataCatalog._NAME_TO_META[name] = Metadata(name=name) - return m - - @staticmethod - def list(): - """ - List all registered metadata. - - Returns: - list[str]: keys (names of data) of all registered metadata - """ - return list(MetadataCatalog._NAME_TO_META.keys()) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py deleted file mode 100644 index a42c8b2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/common.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import numpy as np -import pickle -import random -import torch.utils.data as data - -from detectron2.utils.serialize import PicklableWrapper - -__all__ = ["MapDataset", "DatasetFromList", "AspectRatioGroupedDataset"] - - -class MapDataset(data.Dataset): - """ - Map a function over the elements in a dataset. - - Args: - dataset: a dataset where map function is applied. - map_func: a callable which maps the element in dataset. map_func is - responsible for error handling, when error happens, it needs to - return None so the MapDataset will randomly use other - elements from the dataset. - """ - - def __init__(self, dataset, map_func): - self._dataset = dataset - self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work - - self._rng = random.Random(42) - self._fallback_candidates = set(range(len(dataset))) - - def __len__(self): - return len(self._dataset) - - def __getitem__(self, idx): - retry_count = 0 - cur_idx = int(idx) - - while True: - data = self._map_func(self._dataset[cur_idx]) - if data is not None: - self._fallback_candidates.add(cur_idx) - return data - - # _map_func fails for this idx, use a random new index from the pool - retry_count += 1 - self._fallback_candidates.discard(cur_idx) - cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] - - if retry_count >= 3: - logger = logging.getLogger(__name__) - logger.warning( - "Failed to apply `_map_func` for idx: {}, retry count: {}".format( - idx, retry_count - ) - ) - - -class DatasetFromList(data.Dataset): - """ - Wrap a list to a torch Dataset. It produces elements of the list as data. - """ - - def __init__(self, lst: list, copy: bool = True, serialize: bool = True): - """ - Args: - lst (list): a list which contains elements to produce. - copy (bool): whether to deepcopy the element when producing it, - so that the result can be modified in place without affecting the - source in the list. - serialize (bool): whether to hold memory using serialized objects, when - enabled, data loader workers can use shared RAM from master - process instead of making a copy. - """ - self._lst = lst - self._copy = copy - self._serialize = serialize - - def _serialize(data): - buffer = pickle.dumps(data, protocol=-1) - return np.frombuffer(buffer, dtype=np.uint8) - - if self._serialize: - logger = logging.getLogger(__name__) - logger.info( - "Serializing {} elements to byte tensors and concatenating them all ...".format( - len(self._lst) - ) - ) - self._lst = [_serialize(x) for x in self._lst] - self._addr = np.asarray([len(x) for x in self._lst], dtype=np.int64) - self._addr = np.cumsum(self._addr) - self._lst = np.concatenate(self._lst) - logger.info("Serialized dataset takes {:.2f} MiB".format(len(self._lst) / 1024 ** 2)) - - def __len__(self): - if self._serialize: - return len(self._addr) - else: - return len(self._lst) - - def __getitem__(self, idx): - if self._serialize: - start_addr = 0 if idx == 0 else self._addr[idx - 1].item() - end_addr = self._addr[idx].item() - bytes = memoryview(self._lst[start_addr:end_addr]) - return pickle.loads(bytes) - elif self._copy: - return copy.deepcopy(self._lst[idx]) - else: - return self._lst[idx] - - -class AspectRatioGroupedDataset(data.IterableDataset): - """ - Batch data that have similar aspect ratio together. - In this implementation, images whose aspect ratio < (or >) 1 will - be batched together. - This improves training speed because the images then need less padding - to form a batch. - - It assumes the underlying dataset produces dicts with "width" and "height" keys. - It will then produce a list of original dicts with length = batch_size, - all with similar aspect ratios. - """ - - def __init__(self, dataset, batch_size): - """ - Args: - dataset: an iterable. Each element must be a dict with keys - "width" and "height", which will be used to batch data. - batch_size (int): - """ - self.dataset = dataset - self.batch_size = batch_size - self._buckets = [[] for _ in range(2)] - # Hard-coded two aspect ratio groups: w > h and w < h. - # Can add support for more aspect ratio groups, but doesn't seem useful - - def __iter__(self): - for d in self.dataset: - w, h = d["width"], d["height"] - bucket_id = 0 if w > h else 1 - bucket = self._buckets[bucket_id] - bucket.append(d) - if len(bucket) == self.batch_size: - yield bucket[:] - del bucket[:] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py deleted file mode 100644 index db73b37..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/dataset_mapper.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import numpy as np -import torch -from fvcore.common.file_io import PathManager -from PIL import Image - -from . import detection_utils as utils -from . import transforms as T - -""" -This file contains the default mapping that's applied to "dataset dicts". -""" - -__all__ = ["DatasetMapper"] - - -class DatasetMapper: - """ - A callable which takes a dataset dict in Detectron2 Dataset format, - and map it into a format used by the model. - - This is the default callable to be used to map your dataset dict into training data. - You may need to follow it to implement your own one for customized logic, - such as a different way to read or transform images. - See :doc:`/tutorials/data_loading` for details. - - The callable currently does the following: - - 1. Read the image from "file_name" - 2. Applies cropping/geometric transforms to the image and annotations - 3. Prepare data and annotations to Tensor and :class:`Instances` - """ - - def __init__(self, cfg, is_train=True): - if cfg.INPUT.CROP.ENABLED and is_train: - self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) - logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen)) - else: - self.crop_gen = None - - self.tfm_gens = utils.build_transform_gen(cfg, is_train) - - # fmt: off - self.img_format = cfg.INPUT.FORMAT - self.mask_on = cfg.MODEL.MASK_ON - self.mask_format = cfg.INPUT.MASK_FORMAT - self.keypoint_on = cfg.MODEL.KEYPOINT_ON - self.load_proposals = cfg.MODEL.LOAD_PROPOSALS - # fmt: on - if self.keypoint_on and is_train: - # Flip only makes sense in training - self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) - else: - self.keypoint_hflip_indices = None - - if self.load_proposals: - self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE - self.proposal_topk = ( - cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN - if is_train - else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST - ) - self.is_train = is_train - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - # USER: Write your own image loading if it's not from a file - image = utils.read_image(dataset_dict["file_name"], format=self.img_format) - utils.check_image_size(dataset_dict, image) - - if "annotations" not in dataset_dict: - image, transforms = T.apply_transform_gens( - ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image - ) - else: - # Crop around an instance if there are instances in the image. - # USER: Remove if you don't use cropping - if self.crop_gen: - crop_tfm = utils.gen_crop_transform_with_instance( - self.crop_gen.get_crop_size(image.shape[:2]), - image.shape[:2], - np.random.choice(dataset_dict["annotations"]), - ) - image = crop_tfm.apply_image(image) - image, transforms = T.apply_transform_gens(self.tfm_gens, image) - if self.crop_gen: - transforms = crop_tfm + transforms - - image_shape = image.shape[:2] # h, w - - # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, - # but not efficient on large generic data structures due to the use of pickle & mp.Queue. - # Therefore it's important to use torch.Tensor. - dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) - - # USER: Remove if you don't use pre-computed proposals. - if self.load_proposals: - utils.transform_proposals( - dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk - ) - - if not self.is_train: - # USER: Modify this if you want to keep them for some reason. - dataset_dict.pop("annotations", None) - dataset_dict.pop("sem_seg_file_name", None) - return dataset_dict - - if "annotations" in dataset_dict: - # USER: Modify this if you want to keep them for some reason. - for anno in dataset_dict["annotations"]: - if not self.mask_on: - anno.pop("segmentation", None) - if not self.keypoint_on: - anno.pop("keypoints", None) - - # USER: Implement additional transformations if you have other types of data - annos = [ - utils.transform_instance_annotations( - obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices - ) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - instances = utils.annotations_to_instances( - annos, image_shape, mask_format=self.mask_format - ) - # Create a tight bounding box from masks, useful when image is cropped - if self.crop_gen and instances.has("gt_masks"): - instances.gt_boxes = instances.gt_masks.get_bounding_boxes() - dataset_dict["instances"] = utils.filter_empty_instances(instances) - - # USER: Remove if you don't do semantic/panoptic segmentation. - if "sem_seg_file_name" in dataset_dict: - with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: - sem_seg_gt = Image.open(f) - sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") - sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) - sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) - dataset_dict["sem_seg"] = sem_seg_gt - return dataset_dict diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md deleted file mode 100644 index 9fb3e4f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/README.md +++ /dev/null @@ -1,9 +0,0 @@ - - -### Common Datasets - -The dataset implemented here do not need to load the data into the final format. -It should provide the minimal data structure needed to use the dataset, so it can be very efficient. - -For example, for an image dataset, just provide the file names and labels, but don't read the images. -Let the downstream decide how to read. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py deleted file mode 100644 index 9c3f556..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .cityscapes import load_cityscapes_instances -from .coco import load_coco_json, load_sem_seg -from .lvis import load_lvis_json, register_lvis_instances, get_lvis_instances_meta -from .register_coco import register_coco_instances, register_coco_panoptic_separated -from . import builtin # ensure the builtin data are registered - - -__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py deleted file mode 100644 index 21ac222..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin.py +++ /dev/null @@ -1,220 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -""" -This file registers pre-defined data at hard-coded paths, and their metadata. - -We hard-code metadata for common data. This will enable: -1. Consistency check when loading the data -2. Use models on these standard data directly and run demos, - without having to download the dataset annotations - -We hard-code some paths to the dataset that's assumed to -exist in "./data/". - -Users SHOULD NOT use this file to create new dataset / metadata for new dataset. -To add new dataset, refer to the tutorial "docs/DATASETS.md". -""" - -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog - -from .builtin_meta import _get_builtin_metadata -from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic -from .lvis import get_lvis_instances_meta, register_lvis_instances -from .pascal_voc import register_pascal_voc -from .register_coco import register_coco_instances, register_coco_panoptic_separated - -# ==== Predefined data and splits for COCO ========== - -_PREDEFINED_SPLITS_COCO = {} -_PREDEFINED_SPLITS_COCO["coco"] = { - "coco_2014_train": ("coco/train2014", "coco/annotations/instances_train2014.json"), - "coco_2014_val": ("coco/val2014", "coco/annotations/instances_val2014.json"), - "coco_2014_minival": ("coco/val2014", "coco/annotations/instances_minival2014.json"), - "coco_2014_minival_100": ("coco/val2014", "coco/annotations/instances_minival2014_100.json"), - "coco_2014_valminusminival": ( - "coco/val2014", - "coco/annotations/instances_valminusminival2014.json", - ), - "coco_2017_train": ("coco/train2017", "coco/annotations/instances_train2017.json"), - "coco_2017_val": ("coco/val2017", "coco/annotations/instances_val2017.json"), - "coco_2017_test": ("coco/test2017", "coco/annotations/image_info_test2017.json"), - "coco_2017_test-dev": ("coco/test2017", "coco/annotations/image_info_test-dev2017.json"), - "coco_2017_val_100": ("coco/val2017", "coco/annotations/instances_val2017_100.json"), -} - -_PREDEFINED_SPLITS_COCO["coco_person"] = { - "keypoints_coco_2014_train": ( - "coco/train2014", - "coco/annotations/person_keypoints_train2014.json", - ), - "keypoints_coco_2014_val": ("coco/val2014", "coco/annotations/person_keypoints_val2014.json"), - "keypoints_coco_2014_minival": ( - "coco/val2014", - "coco/annotations/person_keypoints_minival2014.json", - ), - "keypoints_coco_2014_valminusminival": ( - "coco/val2014", - "coco/annotations/person_keypoints_valminusminival2014.json", - ), - "keypoints_coco_2014_minival_100": ( - "coco/val2014", - "coco/annotations/person_keypoints_minival2014_100.json", - ), - "keypoints_coco_2017_train": ( - "coco/train2017", - "coco/annotations/person_keypoints_train2017.json", - ), - "keypoints_coco_2017_val": ("coco/val2017", "coco/annotations/person_keypoints_val2017.json"), - "keypoints_coco_2017_val_100": ( - "coco/val2017", - "coco/annotations/person_keypoints_val2017_100.json", - ), -} - - -_PREDEFINED_SPLITS_COCO_PANOPTIC = { - "coco_2017_train_panoptic": ( - # This is the original panoptic annotation directory - "coco/panoptic_train2017", - "coco/annotations/panoptic_train2017.json", - # This directory contains semantic annotations that are - # converted from panoptic annotations. - # It is used by PanopticFPN. - # You can use the script at detectron2/data/prepare_panoptic_fpn.py - # to create these directories. - "coco/panoptic_stuff_train2017", - ), - "coco_2017_val_panoptic": ( - "coco/panoptic_val2017", - "coco/annotations/panoptic_val2017.json", - "coco/panoptic_stuff_val2017", - ), - "coco_2017_val_100_panoptic": ( - "coco/panoptic_val2017_100", - "coco/annotations/panoptic_val2017_100.json", - "coco/panoptic_stuff_val2017_100", - ), -} - - -def register_all_coco(root): - for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): - for key, (image_root, json_file) in splits_per_dataset.items(): - # Assume pre-defined data live in `./data`. - register_coco_instances( - key, - _get_builtin_metadata(dataset_name), - os.path.join(root, json_file) if "://" not in json_file else json_file, - os.path.join(root, image_root), - ) - - for ( - prefix, - (panoptic_root, panoptic_json, semantic_root), - ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): - prefix_instances = prefix[: -len("_panoptic")] - instances_meta = MetadataCatalog.get(prefix_instances) - image_root, instances_json = instances_meta.image_root, instances_meta.json_file - register_coco_panoptic_separated( - prefix, - _get_builtin_metadata("coco_panoptic_separated"), - image_root, - os.path.join(root, panoptic_root), - os.path.join(root, panoptic_json), - os.path.join(root, semantic_root), - instances_json, - ) - - -# ==== Predefined data and splits for LVIS ========== - - -_PREDEFINED_SPLITS_LVIS = { - "lvis_v0.5": { - "lvis_v0.5_train": ("coco/train2017", "lvis/lvis_v0.5_train.json"), - "lvis_v0.5_val": ("coco/val2017", "lvis/lvis_v0.5_val.json"), - "lvis_v0.5_val_rand_100": ("coco/val2017", "lvis/lvis_v0.5_val_rand_100.json"), - "lvis_v0.5_test": ("coco/test2017", "lvis/lvis_v0.5_image_info_test.json"), - }, - "lvis_v0.5_cocofied": { - "lvis_v0.5_train_cocofied": ("coco/train2017", "lvis/lvis_v0.5_train_cocofied.json"), - "lvis_v0.5_val_cocofied": ("coco/val2017", "lvis/lvis_v0.5_val_cocofied.json"), - }, -} - - -def register_all_lvis(root): - for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_LVIS.items(): - for key, (image_root, json_file) in splits_per_dataset.items(): - # Assume pre-defined data live in `./data`. - register_lvis_instances( - key, - get_lvis_instances_meta(dataset_name), - os.path.join(root, json_file) if "://" not in json_file else json_file, - os.path.join(root, image_root), - ) - - -# ==== Predefined splits for raw cityscapes images =========== - - -_RAW_CITYSCAPES_SPLITS = { - "cityscapes_fine_{task}_train": ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"), - "cityscapes_fine_{task}_val": ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"), - "cityscapes_fine_{task}_test": ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"), -} - - -def register_all_cityscapes(root): - for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): - meta = _get_builtin_metadata("cityscapes") - image_dir = os.path.join(root, image_dir) - gt_dir = os.path.join(root, gt_dir) - - inst_key = key.format(task="instance_seg") - DatasetCatalog.register( - inst_key, - lambda x=image_dir, y=gt_dir: load_cityscapes_instances( - x, y, from_json=True, to_polygons=True - ), - ) - MetadataCatalog.get(inst_key).set( - image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_instance", **meta - ) - - sem_key = key.format(task="sem_seg") - DatasetCatalog.register( - sem_key, lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y) - ) - MetadataCatalog.get(sem_key).set( - image_dir=image_dir, gt_dir=gt_dir, evaluator_type="cityscapes_sem_seg", **meta - ) - - -# ==== Predefined splits for PASCAL VOC =========== -def register_all_pascal_voc(root): - SPLITS = [ - ("voc_2007_trainval", "VOC2007", "trainval"), - ("voc_2007_train", "VOC2007", "train"), - ("voc_2007_val", "VOC2007", "val"), - ("voc_2007_test", "VOC2007", "test"), - ("voc_2012_trainval", "VOC2012", "trainval"), - ("voc_2012_train", "VOC2012", "train"), - ("voc_2012_val", "VOC2012", "val"), - ] - for name, dirname, split in SPLITS: - year = 2007 if "2007" in name else 2012 - register_pascal_voc(name, os.path.join(root, dirname), split, year) - MetadataCatalog.get(name).evaluator_type = "pascal_voc" - - -# Register them all under "./data" -_root = os.getenv("DETECTRON2_DATASETS", "data") -register_all_coco(_root) -register_all_lvis(_root) -register_all_cityscapes(_root) -register_all_pascal_voc(_root) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py deleted file mode 100644 index 74c7986..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/builtin_meta.py +++ /dev/null @@ -1,267 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -# All coco categories, together with their nice-looking visualization colors -# It's from https://github.com/cocodataset/panopticapi/blob/master/panoptic_coco_categories.json -COCO_CATEGORIES = [ - {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, - {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, - {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, - {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, - {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, - {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, - {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, - {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, - {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, - {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, - {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, - {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, - {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, - {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, - {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, - {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, - {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, - {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, - {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, - {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, - {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, - {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, - {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, - {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, - {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, - {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, - {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, - {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, - {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, - {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, - {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, - {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, - {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, - {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, - {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, - {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, - {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, - {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, - {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, - {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, - {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, - {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, - {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, - {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, - {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, - {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, - {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, - {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, - {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, - {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, - {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, - {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, - {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, - {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, - {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, - {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, - {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, - {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, - {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, - {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, - {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, - {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, - {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, - {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, - {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, - {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, - {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, - {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, - {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, - {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, - {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, - {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, - {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, - {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, - {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, - {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, - {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, - {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, - {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, - {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, - {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, - {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, - {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, - {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, - {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, - {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, - {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, - {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, - {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, - {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, - {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, - {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, - {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, - {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, - {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, - {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, - {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, - {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, - {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, - {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, - {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, - {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, - {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, - {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, - {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, - {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, - {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, - {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, - {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, - {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, - {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, - {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, - {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, - {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, - {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, - {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, - {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, - {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, - {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, - {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, - {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, - {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, - {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, - {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, - {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, - {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, - {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, - {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, - {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, - {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, - {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, - {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, - {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, -] - -# fmt: off -COCO_PERSON_KEYPOINT_NAMES = ( - "nose", - "left_eye", "right_eye", - "left_ear", "right_ear", - "left_shoulder", "right_shoulder", - "left_elbow", "right_elbow", - "left_wrist", "right_wrist", - "left_hip", "right_hip", - "left_knee", "right_knee", - "left_ankle", "right_ankle", -) -# fmt: on - -# Pairs of keypoints that should be exchanged under horizontal flipping -COCO_PERSON_KEYPOINT_FLIP_MAP = ( - ("left_eye", "right_eye"), - ("left_ear", "right_ear"), - ("left_shoulder", "right_shoulder"), - ("left_elbow", "right_elbow"), - ("left_wrist", "right_wrist"), - ("left_hip", "right_hip"), - ("left_knee", "right_knee"), - ("left_ankle", "right_ankle"), -) - -# rules for pairs of keypoints to draw a line between, and the line color to use. -KEYPOINT_CONNECTION_RULES = [ - # face - ("left_ear", "left_eye", (102, 204, 255)), - ("right_ear", "right_eye", (51, 153, 255)), - ("left_eye", "nose", (102, 0, 204)), - ("nose", "right_eye", (51, 102, 255)), - # upper-body - ("left_shoulder", "right_shoulder", (255, 128, 0)), - ("left_shoulder", "left_elbow", (153, 255, 204)), - ("right_shoulder", "right_elbow", (128, 229, 255)), - ("left_elbow", "left_wrist", (153, 255, 153)), - ("right_elbow", "right_wrist", (102, 255, 224)), - # lower-body - ("left_hip", "right_hip", (255, 102, 0)), - ("left_hip", "left_knee", (255, 255, 77)), - ("right_hip", "right_knee", (153, 255, 204)), - ("left_knee", "left_ankle", (191, 255, 128)), - ("right_knee", "right_ankle", (255, 195, 77)), -] - - -def _get_coco_instances_meta(): - thing_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 1] - thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] - assert len(thing_ids) == 80, len(thing_ids) - # Mapping from the incontiguous COCO category id to an id in [0, 79] - thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} - thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] - ret = { - "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, - "thing_classes": thing_classes, - "thing_colors": thing_colors, - } - return ret - - -def _get_coco_panoptic_separated_meta(): - """ - Returns metadata for "separated" version of the panoptic segmentation dataset. - """ - stuff_ids = [k["id"] for k in COCO_CATEGORIES if k["isthing"] == 0] - assert len(stuff_ids) == 53, len(stuff_ids) - - # For semantic segmentation, this mapping maps from contiguous stuff id - # (in [0, 53], used in models) to ids in the dataset (used for processing results) - # The id 0 is mapped to an extra category "thing". - stuff_dataset_id_to_contiguous_id = {k: i + 1 for i, k in enumerate(stuff_ids)} - # When converting COCO panoptic annotations to semantic annotations - # We label the "thing" category to 0 - stuff_dataset_id_to_contiguous_id[0] = 0 - - # 54 names for COCO stuff categories (including "things") - stuff_classes = ["things"] + [ - k["name"].replace("-other", "").replace("-merged", "") - for k in COCO_CATEGORIES - if k["isthing"] == 0 - ] - - # NOTE: I randomly picked a color for things - stuff_colors = [[82, 18, 128]] + [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 0] - ret = { - "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, - "stuff_classes": stuff_classes, - "stuff_colors": stuff_colors, - } - ret.update(_get_coco_instances_meta()) - return ret - - -def _get_builtin_metadata(dataset_name): - if dataset_name == "coco": - return _get_coco_instances_meta() - if dataset_name == "coco_panoptic_separated": - return _get_coco_panoptic_separated_meta() - elif dataset_name == "coco_person": - return { - "thing_classes": ["person"], - "keypoint_names": COCO_PERSON_KEYPOINT_NAMES, - "keypoint_flip_map": COCO_PERSON_KEYPOINT_FLIP_MAP, - "keypoint_connection_rules": KEYPOINT_CONNECTION_RULES, - } - elif dataset_name == "cityscapes": - # fmt: off - CITYSCAPES_THING_CLASSES = [ - "person", "rider", "car", "truck", - "bus", "train", "motorcycle", "bicycle", - ] - CITYSCAPES_STUFF_CLASSES = [ - "road", "sidewalk", "building", "wall", "fence", "pole", "traffic light", - "traffic sign", "vegetation", "terrain", "sky", "person", "rider", "car", - "truck", "bus", "train", "motorcycle", "bicycle", "license plate", - ] - # fmt: on - return { - "thing_classes": CITYSCAPES_THING_CLASSES, - "stuff_classes": CITYSCAPES_STUFF_CLASSES, - } - raise KeyError("No built-in metadata for dataset {}".format(dataset_name)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py deleted file mode 100644 index 062a555..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/cityscapes.py +++ /dev/null @@ -1,329 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import functools -import json -import logging -import multiprocessing as mp -import numpy as np -import os -from itertools import chain -import pycocotools.mask as mask_util -from fvcore.common.file_io import PathManager -from PIL import Image - -from detectron2.structures import BoxMode -from detectron2.utils.comm import get_world_size -from detectron2.utils.logger import setup_logger - -try: - import cv2 # noqa -except ImportError: - # OpenCV is an optional dependency at the moment - pass - - -logger = logging.getLogger(__name__) - - -def get_cityscapes_files(image_dir, gt_dir): - files = [] - # scan through the directory - cities = PathManager.ls(image_dir) - logger.info(f"{len(cities)} cities found in '{image_dir}'.") - for city in cities: - city_img_dir = os.path.join(image_dir, city) - city_gt_dir = os.path.join(gt_dir, city) - for basename in PathManager.ls(city_img_dir): - image_file = os.path.join(city_img_dir, basename) - - suffix = "leftImg8bit.png" - assert basename.endswith(suffix) - basename = basename[: -len(suffix)] - - instance_file = os.path.join(city_gt_dir, basename + "gtFine_instanceIds.png") - label_file = os.path.join(city_gt_dir, basename + "gtFine_labelIds.png") - json_file = os.path.join(city_gt_dir, basename + "gtFine_polygons.json") - - files.append((image_file, instance_file, label_file, json_file)) - assert len(files), "No images found in {}".format(image_dir) - for f in files[0]: - assert PathManager.isfile(f), f - return files - - -def load_cityscapes_instances(image_dir, gt_dir, from_json=True, to_polygons=True): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". - gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". - from_json (bool): whether to read annotations from the raw json file or the png files. - to_polygons (bool): whether to represent the segmentation as polygons - (COCO's format) instead of masks (cityscapes's format). - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - """ - if from_json: - assert to_polygons, ( - "Cityscapes's json annotations are in polygon format. " - "Converting to mask format is not supported now." - ) - files = get_cityscapes_files(image_dir, gt_dir) - - logger.info("Preprocessing cityscapes annotations ...") - # This is still not fast: all workers will execute duplicate works and will - # take up to 10m on a 8GPU server. - pool = mp.Pool(processes=max(mp.cpu_count() // get_world_size() // 2, 4)) - - ret = pool.map( - functools.partial(cityscapes_files_to_dict, from_json=from_json, to_polygons=to_polygons), - files, - ) - logger.info("Loaded {} images from {}".format(len(ret), image_dir)) - - # Map cityscape ids to contiguous ids - from cityscapesscripts.helpers.labels import labels - - labels = [l for l in labels if l.hasInstances and not l.ignoreInEval] - dataset_id_to_contiguous_id = {l.id: idx for idx, l in enumerate(labels)} - for dict_per_image in ret: - for anno in dict_per_image["annotations"]: - anno["category_id"] = dataset_id_to_contiguous_id[anno["category_id"]] - return ret - - -def load_cityscapes_semantic(image_dir, gt_dir): - """ - Args: - image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". - gt_dir (str): path to the raw annotations. e.g., "~/cityscapes/gtFine/train". - - Returns: - list[dict]: a list of dict, each has "file_name" and - "sem_seg_file_name". - """ - ret = [] - # gt_dir is small and contain many small files. make sense to fetch to local first - gt_dir = PathManager.get_local_path(gt_dir) - for image_file, _, label_file, json_file in get_cityscapes_files(image_dir, gt_dir): - label_file = label_file.replace("labelIds", "labelTrainIds") - - with PathManager.open(json_file, "r") as f: - jsonobj = json.load(f) - ret.append( - { - "file_name": image_file, - "sem_seg_file_name": label_file, - "height": jsonobj["imgHeight"], - "width": jsonobj["imgWidth"], - } - ) - assert len(ret), f"No images found in {image_dir}!" - assert PathManager.isfile( - ret[0]["sem_seg_file_name"] - ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa - return ret - - -def cityscapes_files_to_dict(files, from_json, to_polygons): - """ - Parse cityscapes annotation files to a instance segmentation dataset dict. - - Args: - files (tuple): consists of (image_file, instance_id_file, label_id_file, json_file) - from_json (bool): whether to read annotations from the raw json file or the png files. - to_polygons (bool): whether to represent the segmentation as polygons - (COCO's format) instead of masks (cityscapes's format). - - Returns: - A dict in Detectron2 Dataset format. - """ - from cityscapesscripts.helpers.labels import id2label, name2label - - image_file, instance_id_file, _, json_file = files - - annos = [] - - if from_json: - from shapely.geometry import MultiPolygon, Polygon - - with PathManager.open(json_file, "r") as f: - jsonobj = json.load(f) - ret = { - "file_name": image_file, - "image_id": os.path.basename(image_file), - "height": jsonobj["imgHeight"], - "width": jsonobj["imgWidth"], - } - - # `polygons_union` contains the union of all valid polygons. - polygons_union = Polygon() - - # CityscapesScripts draw the polygons in sequential order - # and each polygon *overwrites* existing ones. See - # (https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/preparation/json2instanceImg.py) # noqa - # We use reverse order, and each polygon *avoids* early ones. - # This will resolve the ploygon overlaps in the same way as CityscapesScripts. - for obj in jsonobj["objects"][::-1]: - if "deleted" in obj: # cityscapes data format specific - continue - label_name = obj["label"] - - try: - label = name2label[label_name] - except KeyError: - if label_name.endswith("group"): # crowd area - label = name2label[label_name[: -len("group")]] - else: - raise - if label.id < 0: # cityscapes data format - continue - - # Cityscapes's raw annotations uses integer coordinates - # Therefore +0.5 here - poly_coord = np.asarray(obj["polygon"], dtype="f4") + 0.5 - # CityscapesScript uses PIL.ImageDraw.polygon to rasterize - # polygons for evaluation. This function operates in integer space - # and draws each pixel whose center falls into the polygon. - # Therefore it draws a polygon which is 0.5 "fatter" in expectation. - # We therefore dilate the input polygon by 0.5 as our input. - poly = Polygon(poly_coord).buffer(0.5, resolution=4) - - if not label.hasInstances or label.ignoreInEval: - # even if we won't store the polygon it still contributes to overlaps resolution - polygons_union = polygons_union.union(poly) - continue - - # Take non-overlapping part of the polygon - poly_wo_overlaps = poly.difference(polygons_union) - if poly_wo_overlaps.is_empty: - continue - polygons_union = polygons_union.union(poly) - - anno = {} - anno["iscrowd"] = label_name.endswith("group") - anno["category_id"] = label.id - - if isinstance(poly_wo_overlaps, Polygon): - poly_list = [poly_wo_overlaps] - elif isinstance(poly_wo_overlaps, MultiPolygon): - poly_list = poly_wo_overlaps.geoms - else: - raise NotImplementedError("Unknown geometric structure {}".format(poly_wo_overlaps)) - - poly_coord = [] - for poly_el in poly_list: - # COCO API can work only with exterior boundaries now, hence we store only them. - # TODO: store both exterior and interior boundaries once other parts of the - # codebase support holes in polygons. - poly_coord.append(list(chain(*poly_el.exterior.coords))) - anno["segmentation"] = poly_coord - (xmin, ymin, xmax, ymax) = poly_wo_overlaps.bounds - - anno["bbox"] = (xmin, ymin, xmax, ymax) - anno["bbox_mode"] = BoxMode.XYXY_ABS - - annos.append(anno) - else: - # See also the official annotation parsing scripts at - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/instances2dict.py # noqa - with PathManager.open(instance_id_file, "rb") as f: - inst_image = np.asarray(Image.open(f), order="F") - # ids < 24 are stuff labels (filtering them first is about 5% faster) - flattened_ids = np.unique(inst_image[inst_image >= 24]) - - ret = { - "file_name": image_file, - "image_id": os.path.basename(image_file), - "height": inst_image.shape[0], - "width": inst_image.shape[1], - } - - for instance_id in flattened_ids: - # For non-crowd annotations, instance_id // 1000 is the label_id - # Crowd annotations have <1000 instance ids - label_id = instance_id // 1000 if instance_id >= 1000 else instance_id - label = id2label[label_id] - if not label.hasInstances or label.ignoreInEval: - continue - - anno = {} - anno["iscrowd"] = instance_id < 1000 - anno["category_id"] = label.id - - mask = np.asarray(inst_image == instance_id, dtype=np.uint8, order="F") - - inds = np.nonzero(mask) - ymin, ymax = inds[0].min(), inds[0].max() - xmin, xmax = inds[1].min(), inds[1].max() - anno["bbox"] = (xmin, ymin, xmax, ymax) - if xmax <= xmin or ymax <= ymin: - continue - anno["bbox_mode"] = BoxMode.XYXY_ABS - if to_polygons: - # This conversion comes from D4809743 and D5171122, - # when Mask-RCNN was first developed. - contours = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[ - -2 - ] - polygons = [c.reshape(-1).tolist() for c in contours if len(c) >= 3] - # opencv's can produce invalid polygons - if len(polygons) == 0: - continue - anno["segmentation"] = polygons - else: - anno["segmentation"] = mask_util.encode(mask[:, :, None])[0] - annos.append(anno) - ret["annotations"] = annos - return ret - - -if __name__ == "__main__": - """ - Test the cityscapes dataset loader. - - Usage: - python -m detectron2.data.data.cityscapes \ - cityscapes/leftImg8bit/train cityscapes/gtFine/train - """ - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("image_dir") - parser.add_argument("gt_dir") - parser.add_argument("--type", choices=["instance", "semantic"], default="instance") - args = parser.parse_args() - from detectron2.data.catalog import Metadata - from detectron2.utils.visualizer import Visualizer - from cityscapesscripts.helpers.labels import labels - - logger = setup_logger(name=__name__) - - dirname = "cityscapes-data-vis" - os.makedirs(dirname, exist_ok=True) - - if args.type == "instance": - dicts = load_cityscapes_instances( - args.image_dir, args.gt_dir, from_json=True, to_polygons=True - ) - logger.info("Done loading {} samples.".format(len(dicts))) - - thing_classes = [k.name for k in labels if k.hasInstances and not k.ignoreInEval] - meta = Metadata().set(thing_classes=thing_classes) - - else: - dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) - logger.info("Done loading {} samples.".format(len(dicts))) - - stuff_names = [k.name for k in labels if k.trainId != 255] - stuff_colors = [k.color for k in labels if k.trainId != 255] - meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) - - for d in dicts: - img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - # cv2.imshow("a", vis.get_image()[:, :, ::-1]) - # cv2.waitKey() - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py deleted file mode 100644 index f6f099e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/coco.py +++ /dev/null @@ -1,466 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import contextlib -import datetime -import io -import json -import logging -import numpy as np -import os -import pycocotools.mask as mask_util -from fvcore.common.file_io import PathManager, file_lock -from fvcore.common.timer import Timer -from PIL import Image - -from detectron2.structures import Boxes, BoxMode, PolygonMasks - -from .. import DatasetCatalog, MetadataCatalog - -""" -This file contains functions to parse COCO-format annotations into dicts in "Detectron2 format". -""" - - -logger = logging.getLogger(__name__) - -__all__ = ["load_coco_json", "load_sem_seg", "convert_to_coco_json"] - - -def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): - """ - Load a json file with COCO's instances annotation format. - Currently supports instance detection, instance segmentation, - and person keypoints annotations. - - Args: - json_file (str): full path to the json file in COCO instances annotation format. - image_root (str or path-like): the directory where the images in this json file exists. - dataset_name (str): the name of the dataset (e.g., coco_2017_train). - If provided, this function will also put "thing_classes" into - the metadata associated with this dataset. - extra_annotation_keys (list[str]): list of per-annotation keys that should also be - loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", - "category_id", "segmentation"). The values for these keys will be returned as-is. - For example, the densepose annotations are loaded in this way. - - Returns: - list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See - `Using Custom Datasets `_ ) - - Notes: - 1. This function does not read the image files. - The results do not have the "image" field. - """ - from pycocotools.coco import COCO - - timer = Timer() - json_file = PathManager.get_local_path(json_file) - with contextlib.redirect_stdout(io.StringIO()): - coco_api = COCO(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - - id_map = None - if dataset_name is not None: - meta = MetadataCatalog.get(dataset_name) - cat_ids = sorted(coco_api.getCatIds()) - cats = coco_api.loadCats(cat_ids) - # The categories in a custom json file may not be sorted. - thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] - meta.thing_classes = thing_classes - - # In COCO, certain category ids are artificially removed, - # and by convention they are always ignored. - # We deal with COCO's id issue and translate - # the category ids to contiguous ids in [0, 80). - - # It works by looking at the "categories" field in the json, therefore - # if users' own json also have incontiguous ids, we'll - # apply this mapping as well but print a warning. - if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): - if "coco" not in dataset_name: - logger.warning( - """ -Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. -""" - ) - id_map = {v: i for i, v in enumerate(cat_ids)} - meta.thing_dataset_id_to_contiguous_id = id_map - - # sort indices for reproducible results - img_ids = sorted(coco_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = coco_api.loadImgs(img_ids) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. Example of anns[0]: - # [{'segmentation': [[192.81, - # 247.09, - # ... - # 219.03, - # 249.06]], - # 'area': 1035.749, - # 'iscrowd': 0, - # 'image_id': 1268, - # 'bbox': [192.81, 224.8, 74.73, 33.43], - # 'category_id': 16, - # 'id': 42986}, - # ...] - anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] - - if "minival" not in json_file: - # The popular valminusminival & minival annotations for COCO2014 contain this bug. - # However the ratio of buggy annotations there is tiny and does not affect accuracy. - # Therefore we explicitly white-list them. - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( - json_file - ) - - imgs_anns = list(zip(imgs, anns)) - - logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) - - dataset_dicts = [] - - ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) - - num_instances_without_valid_segmentation = 0 - - for (img_dict, anno_dict_list) in imgs_anns: - record = {} - record["file_name"] = os.path.join(image_root, img_dict["file_name"]) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - image_id = record["image_id"] = img_dict["id"] - - objs = [] - for anno in anno_dict_list: - # Check that the image_id in this annotation is the same as - # the image_id we're looking at. - # This fails only when the data parsing logic or the annotation file is buggy. - - # The original COCO valminusminival2014 & minival2014 annotation files - # actually contains bugs that, together with certain ways of using COCO API, - # can trigger this assertion. - assert anno["image_id"] == image_id - - assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' - - obj = {key: anno[key] for key in ann_keys if key in anno} - - segm = anno.get("segmentation", None) - if segm: # either list[list[float]] or dict(RLE) - if not isinstance(segm, dict): - # filter out invalid polygons (< 3 points) - segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - if len(segm) == 0: - num_instances_without_valid_segmentation += 1 - continue # ignore this instance - obj["segmentation"] = segm - - keypts = anno.get("keypoints", None) - if keypts: # list[int] - for idx, v in enumerate(keypts): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # Therefore we assume the coordinates are "pixel indices" and - # add 0.5 to convert to floating point coordinates. - keypts[idx] = v + 0.5 - obj["keypoints"] = keypts - - obj["bbox_mode"] = BoxMode.XYWH_ABS - if id_map: - obj["category_id"] = id_map[obj["category_id"]] - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - - if num_instances_without_valid_segmentation > 0: - logger.warning( - "Filtered out {} instances without valid segmentation. " - "There might be issues in your dataset generation process.".format( - num_instances_without_valid_segmentation - ) - ) - return dataset_dicts - - -def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg"): - """ - Load semantic segmentation data. All files under "gt_root" with "gt_ext" extension are - treated as ground truth annotations and all files under "image_root" with "image_ext" extension - as input images. Ground truth and input images are matched using file paths relative to - "gt_root" and "image_root" respectively without taking into account file extensions. - This works for COCO as well as some other data. - - Args: - gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation - annotations are stored as images with integer values in pixels that represent - corresponding semantic labels. - image_root (str): the directory where the input images are. - gt_ext (str): file extension for ground truth annotations. - image_ext (str): file extension for input images. - - Returns: - list[dict]: - a list of dicts in detectron2 standard format without instance-level - annotation. - - Notes: - 1. This function does not read the image and ground truth files. - The results do not have the "image" and "sem_seg" fields. - """ - - # We match input images with ground truth based on their relative filepaths (without file - # extensions) starting from 'image_root' and 'gt_root' respectively. - def file2id(folder_path, file_path): - # extract relative path starting from `folder_path` - image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path)) - # remove file extension - image_id = os.path.splitext(image_id)[0] - return image_id - - input_files = sorted( - (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), - key=lambda file_path: file2id(image_root, file_path), - ) - gt_files = sorted( - (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), - key=lambda file_path: file2id(gt_root, file_path), - ) - - assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) - - # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images - if len(input_files) != len(gt_files): - logger.warn( - "Directory {} and {} has {} and {} files, respectively.".format( - image_root, gt_root, len(input_files), len(gt_files) - ) - ) - input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files] - gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files] - intersect = list(set(input_basenames) & set(gt_basenames)) - # sort, otherwise each worker may obtain a list[dict] in different order - intersect = sorted(intersect) - logger.warn("Will use their intersection of {} files.".format(len(intersect))) - input_files = [os.path.join(image_root, f + image_ext) for f in intersect] - gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] - - logger.info( - "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root) - ) - - dataset_dicts = [] - for (img_path, gt_path) in zip(input_files, gt_files): - record = {} - record["file_name"] = img_path - record["sem_seg_file_name"] = gt_path - dataset_dicts.append(record) - - return dataset_dicts - - -def convert_to_coco_dict(dataset_name): - """ - Convert an instance detection/segmentation or keypoint detection dataset - in detectron2's standard format into COCO json format. - - Generic dataset description can be found here: - https://detectron2.readthedocs.io/tutorials/datasets.html#register-a-dataset - - COCO data format description can be found here: - http://cocodataset.org/#format-data - - Args: - dataset_name (str): - name of the source dataset - Must be registered in DatastCatalog and in detectron2's standard format. - Must have corresponding metadata "thing_classes" - Returns: - coco_dict: serializable dict in COCO json format - """ - - dataset_dicts = DatasetCatalog.get(dataset_name) - metadata = MetadataCatalog.get(dataset_name) - - # unmap the category mapping ids for COCO - if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = {v: k for k, v in metadata.thing_dataset_id_to_contiguous_id.items()} - reverse_id_mapper = lambda contiguous_id: reverse_id_mapping[contiguous_id] # noqa - else: - reverse_id_mapper = lambda contiguous_id: contiguous_id # noqa - - categories = [ - {"id": reverse_id_mapper(id), "name": name} - for id, name in enumerate(metadata.thing_classes) - ] - - logger.info("Converting dataset dicts into COCO format") - coco_images = [] - coco_annotations = [] - - for image_id, image_dict in enumerate(dataset_dicts): - coco_image = { - "id": image_dict.get("image_id", image_id), - "width": image_dict["width"], - "height": image_dict["height"], - "file_name": image_dict["file_name"], - } - coco_images.append(coco_image) - - anns_per_image = image_dict["annotations"] - for annotation in anns_per_image: - # create a new dict with only COCO fields - coco_annotation = {} - - # COCO requirement: XYWH box format - bbox = annotation["bbox"] - bbox_mode = annotation["bbox_mode"] - bbox = BoxMode.convert(bbox, bbox_mode, BoxMode.XYWH_ABS) - - # COCO requirement: instance area - if "segmentation" in annotation: - # Computing areas for instances by counting the pixels - segmentation = annotation["segmentation"] - # TODO: check segmentation type: RLE, BinaryMask or Polygon - if isinstance(segmentation, list): - polygons = PolygonMasks([segmentation]) - area = polygons.area()[0].item() - elif isinstance(segmentation, dict): # RLE - area = mask_util.area(segmentation).item() - else: - raise TypeError(f"Unknown segmentation type {type(segmentation)}!") - else: - # Computing areas using bounding boxes - bbox_xy = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - area = Boxes([bbox_xy]).area()[0].item() - - if "keypoints" in annotation: - keypoints = annotation["keypoints"] # list[int] - for idx, v in enumerate(keypoints): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # For COCO format consistency we substract 0.5 - # https://github.com/facebookresearch/detectron2/pull/175#issuecomment-551202163 - keypoints[idx] = v - 0.5 - if "num_keypoints" in annotation: - num_keypoints = annotation["num_keypoints"] - else: - num_keypoints = sum(kp > 0 for kp in keypoints[2::3]) - - # COCO requirement: - # linking annotations to images - # "id" field must start with 1 - coco_annotation["id"] = len(coco_annotations) + 1 - coco_annotation["image_id"] = coco_image["id"] - coco_annotation["bbox"] = [round(float(x), 3) for x in bbox] - coco_annotation["area"] = float(area) - coco_annotation["iscrowd"] = annotation.get("iscrowd", 0) - coco_annotation["category_id"] = reverse_id_mapper(annotation["category_id"]) - - # Add optional fields - if "keypoints" in annotation: - coco_annotation["keypoints"] = keypoints - coco_annotation["num_keypoints"] = num_keypoints - - if "segmentation" in annotation: - coco_annotation["segmentation"] = annotation["segmentation"] - if isinstance(coco_annotation["segmentation"], dict): # RLE - coco_annotation["segmentation"]["counts"] = coco_annotation["segmentation"][ - "counts" - ].decode("ascii") - - coco_annotations.append(coco_annotation) - - logger.info( - "Conversion finished, " - f"#images: {len(coco_images)}, #annotations: {len(coco_annotations)}" - ) - - info = { - "date_created": str(datetime.datetime.now()), - "description": "Automatically generated COCO json file for Detectron2.", - } - coco_dict = { - "info": info, - "images": coco_images, - "annotations": coco_annotations, - "categories": categories, - "licenses": None, - } - return coco_dict - - -def convert_to_coco_json(dataset_name, output_file, allow_cached=True): - """ - Converts dataset into COCO format and saves it to a json file. - dataset_name must be registered in DatasetCatalog and in detectron2's standard format. - - Args: - dataset_name: - reference from the config file to the catalogs - must be registered in DatasetCatalog and in detectron2's standard format - output_file: path of json file that will be saved to - allow_cached: if json file is already present then skip conversion - """ - - # TODO: The dataset or the conversion script *may* change, - # a checksum would be useful for validating the cached data - - PathManager.mkdirs(os.path.dirname(output_file)) - with file_lock(output_file): - if PathManager.exists(output_file) and allow_cached: - logger.warning( - f"Using previously cached COCO format annotations at '{output_file}'. " - "You need to clear the cache file if your dataset has been modified." - ) - else: - logger.info(f"Converting annotations of dataset '{dataset_name}' to COCO format ...)") - coco_dict = convert_to_coco_dict(dataset_name) - - logger.info(f"Caching COCO format annotations at '{output_file}' ...") - with PathManager.open(output_file, "w") as f: - json.dump(coco_dict, f) - - -if __name__ == "__main__": - """ - Test the COCO json dataset loader. - - Usage: - python -m detectron2.data.data.coco \ - path/to/json path/to/image_root dataset_name - - "dataset_name" can be "coco_2014_minival_100", or other - pre-registered ones - """ - from detectron2.utils.logger import setup_logger - from detectron2.utils.visualizer import Visualizer - import detectron2.data.datasets # noqa # add pre-defined metadata - import sys - - logger = setup_logger(name=__name__) - assert sys.argv[3] in DatasetCatalog.list() - meta = MetadataCatalog.get(sys.argv[3]) - - dicts = load_coco_json(sys.argv[1], sys.argv[2], sys.argv[3]) - logger.info("Done loading {} samples.".format(len(dicts))) - - dirname = "coco-data-vis" - os.makedirs(dirname, exist_ok=True) - for d in dicts: - img = np.array(Image.open(d["file_name"])) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py deleted file mode 100644 index 7b95be3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import os -from fvcore.common.file_io import PathManager -from fvcore.common.timer import Timer - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -from .builtin_meta import _get_coco_instances_meta -from .lvis_v0_5_categories import LVIS_CATEGORIES - -""" -This file contains functions to parse LVIS-format annotations into dicts in the -"Detectron2 format". -""" - -logger = logging.getLogger(__name__) - -__all__ = ["load_lvis_json", "register_lvis_instances", "get_lvis_instances_meta"] - - -def register_lvis_instances(name, metadata, json_file, image_root): - """ - Register a dataset in LVIS's json annotation format for instance detection and segmentation. - - Args: - name (str): a name that identifies the dataset, e.g. "lvis_v0.5_train". - metadata (dict): extra metadata associated with this dataset. It can be an empty dict. - json_file (str): path to the json instance annotation file. - image_root (str or path-like): directory which contains all the images. - """ - DatasetCatalog.register(name, lambda: load_lvis_json(json_file, image_root, name)) - MetadataCatalog.get(name).set( - json_file=json_file, image_root=image_root, evaluator_type="lvis", **metadata - ) - - -def load_lvis_json(json_file, image_root, dataset_name=None): - """ - Load a json file in LVIS's annotation format. - - Args: - json_file (str): full path to the LVIS json annotation file. - image_root (str): the directory where the images in this json file exists. - dataset_name (str): the name of the dataset (e.g., "lvis_v0.5_train"). - If provided, this function will put "thing_classes" into the metadata - associated with this dataset. - - Returns: - list[dict]: a list of dicts in Detectron2 standard format. (See - `Using Custom Datasets `_ ) - - Notes: - 1. This function does not read the image files. - The results do not have the "image" field. - """ - from lvis import LVIS - - json_file = PathManager.get_local_path(json_file) - - timer = Timer() - lvis_api = LVIS(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - - if dataset_name is not None: - meta = get_lvis_instances_meta(dataset_name) - MetadataCatalog.get(dataset_name).set(**meta) - - # sort indices for reproducible results - img_ids = sorted(lvis_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = lvis_api.load_imgs(img_ids) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. Example of anns[0]: - # [{'segmentation': [[192.81, - # 247.09, - # ... - # 219.03, - # 249.06]], - # 'area': 1035.749, - # 'image_id': 1268, - # 'bbox': [192.81, 224.8, 74.73, 33.43], - # 'category_id': 16, - # 'id': 42986}, - # ...] - anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] - - # Sanity check that each annotation has a unique id - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique".format( - json_file - ) - - imgs_anns = list(zip(imgs, anns)) - - logger.info("Loaded {} images in the LVIS format from {}".format(len(imgs_anns), json_file)) - - dataset_dicts = [] - - for (img_dict, anno_dict_list) in imgs_anns: - record = {} - file_name = img_dict["file_name"] - if img_dict["file_name"].startswith("COCO"): - # Convert form the COCO 2014 file naming convention of - # COCO_[train/val/test]2014_000000000000.jpg to the 2017 naming convention of - # 000000000000.jpg (LVIS v1 will fix this naming issue) - file_name = file_name[-16:] - record["file_name"] = os.path.join(image_root, file_name) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) - record["neg_category_ids"] = img_dict.get("neg_category_ids", []) - image_id = record["image_id"] = img_dict["id"] - - objs = [] - for anno in anno_dict_list: - # Check that the image_id in this annotation is the same as - # the image_id we're looking at. - # This fails only when the data parsing logic or the annotation file is buggy. - assert anno["image_id"] == image_id - obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} - obj["category_id"] = anno["category_id"] - 1 # Convert 1-indexed to 0-indexed - segm = anno["segmentation"] # list[list[float]] - # filter out invalid polygons (< 3 points) - valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - assert len(segm) == len( - valid_segm - ), "Annotation contains an invalid polygon with < 3 points" - assert len(segm) > 0 - obj["segmentation"] = segm - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - - return dataset_dicts - - -def get_lvis_instances_meta(dataset_name): - """ - Load LVIS metadata. - - Args: - dataset_name (str): LVIS dataset name without the split name (e.g., "lvis_v0.5"). - - Returns: - dict: LVIS metadata with keys: thing_classes - """ - if "cocofied" in dataset_name: - return _get_coco_instances_meta() - if "v0.5" in dataset_name: - return _get_lvis_instances_meta_v0_5() - # There will be a v1 in the future - # elif dataset_name == "lvis_v1": - # return get_lvis_instances_meta_v1() - raise ValueError("No built-in metadata for dataset {}".format(dataset_name)) - - -def _get_lvis_instances_meta_v0_5(): - assert len(LVIS_CATEGORIES) == 1230 - cat_ids = [k["id"] for k in LVIS_CATEGORIES] - assert min(cat_ids) == 1 and max(cat_ids) == len( - cat_ids - ), "Category ids are not in [1, #categories], as expected" - # Ensure that the category list is sorted by id - lvis_categories = sorted(LVIS_CATEGORIES, key=lambda x: x["id"]) - thing_classes = [k["synonyms"][0] for k in lvis_categories] - meta = {"thing_classes": thing_classes} - return meta - - -if __name__ == "__main__": - """ - Test the LVIS json dataset loader. - - Usage: - python -m detectron2.data.data.lvis \ - path/to/json path/to/image_root dataset_name vis_limit - """ - import sys - import numpy as np - from detectron2.utils.logger import setup_logger - from PIL import Image - import detectron2.data.datasets # noqa # add pre-defined metadata - from detectron2.utils.visualizer import Visualizer - - logger = setup_logger(name=__name__) - meta = MetadataCatalog.get(sys.argv[3]) - - dicts = load_lvis_json(sys.argv[1], sys.argv[2], sys.argv[3]) - logger.info("Done loading {} samples.".format(len(dicts))) - - dirname = "lvis-data-vis" - os.makedirs(dirname, exist_ok=True) - for d in dicts[: int(sys.argv[4])]: - img = np.array(Image.open(d["file_name"])) - visualizer = Visualizer(img, metadata=meta) - vis = visualizer.draw_dataset_dict(d) - fpath = os.path.join(dirname, os.path.basename(d["file_name"])) - vis.save(fpath) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py deleted file mode 100644 index 8205e60..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/lvis_v0_5_categories.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# Autogen with -# with open("lvis_v0.5_val.json", "r") as f: -# a = json.load(f) -# c = a["categories"] -# for x in c: -# del x["image_count"] -# del x["instance_count"] -# LVIS_CATEGORIES = repr(c) + " # noqa" - -# fmt: off -LVIS_CATEGORIES = [{'frequency': 'r', 'id': 1, 'synset': 'acorn.n.01', 'synonyms': ['acorn'], 'def': 'nut from an oak tree', 'name': 'acorn'}, {'frequency': 'c', 'id': 2, 'synset': 'aerosol.n.02', 'synonyms': ['aerosol_can', 'spray_can'], 'def': 'a dispenser that holds a substance under pressure', 'name': 'aerosol_can'}, {'frequency': 'f', 'id': 3, 'synset': 'air_conditioner.n.01', 'synonyms': ['air_conditioner'], 'def': 'a machine that keeps air cool and dry', 'name': 'air_conditioner'}, {'frequency': 'f', 'id': 4, 'synset': 'airplane.n.01', 'synonyms': ['airplane', 'aeroplane'], 'def': 'an aircraft that has a fixed wing and is powered by propellers or jets', 'name': 'airplane'}, {'frequency': 'c', 'id': 5, 'synset': 'alarm_clock.n.01', 'synonyms': ['alarm_clock'], 'def': 'a clock that wakes a sleeper at some preset time', 'name': 'alarm_clock'}, {'frequency': 'c', 'id': 6, 'synset': 'alcohol.n.01', 'synonyms': ['alcohol', 'alcoholic_beverage'], 'def': 'a liquor or brew containing alcohol as the active agent', 'name': 'alcohol'}, {'frequency': 'r', 'id': 7, 'synset': 'alligator.n.02', 'synonyms': ['alligator', 'gator'], 'def': 'amphibious reptiles related to crocodiles but with shorter broader snouts', 'name': 'alligator'}, {'frequency': 'c', 'id': 8, 'synset': 'almond.n.02', 'synonyms': ['almond'], 'def': 'oval-shaped edible seed of the almond tree', 'name': 'almond'}, {'frequency': 'c', 'id': 9, 'synset': 'ambulance.n.01', 'synonyms': ['ambulance'], 'def': 'a vehicle that takes people to and from hospitals', 'name': 'ambulance'}, {'frequency': 'r', 'id': 10, 'synset': 'amplifier.n.01', 'synonyms': ['amplifier'], 'def': 'electronic equipment that increases strength of signals', 'name': 'amplifier'}, {'frequency': 'c', 'id': 11, 'synset': 'anklet.n.03', 'synonyms': ['anklet', 'ankle_bracelet'], 'def': 'an ornament worn around the ankle', 'name': 'anklet'}, {'frequency': 'f', 'id': 12, 'synset': 'antenna.n.01', 'synonyms': ['antenna', 'aerial', 'transmitting_aerial'], 'def': 'an electrical device that sends or receives radio or television signals', 'name': 'antenna'}, {'frequency': 'f', 'id': 13, 'synset': 'apple.n.01', 'synonyms': ['apple'], 'def': 'fruit with red or yellow or green skin and sweet to tart crisp whitish flesh', 'name': 'apple'}, {'frequency': 'r', 'id': 14, 'synset': 'apple_juice.n.01', 'synonyms': ['apple_juice'], 'def': 'the juice of apples', 'name': 'apple_juice'}, {'frequency': 'r', 'id': 15, 'synset': 'applesauce.n.01', 'synonyms': ['applesauce'], 'def': 'puree of stewed apples usually sweetened and spiced', 'name': 'applesauce'}, {'frequency': 'r', 'id': 16, 'synset': 'apricot.n.02', 'synonyms': ['apricot'], 'def': 'downy yellow to rosy-colored fruit resembling a small peach', 'name': 'apricot'}, {'frequency': 'f', 'id': 17, 'synset': 'apron.n.01', 'synonyms': ['apron'], 'def': 'a garment of cloth that is tied about the waist and worn to protect clothing', 'name': 'apron'}, {'frequency': 'c', 'id': 18, 'synset': 'aquarium.n.01', 'synonyms': ['aquarium', 'fish_tank'], 'def': 'a tank/pool/bowl filled with water for keeping live fish and underwater animals', 'name': 'aquarium'}, {'frequency': 'c', 'id': 19, 'synset': 'armband.n.02', 'synonyms': ['armband'], 'def': 'a band worn around the upper arm', 'name': 'armband'}, {'frequency': 'f', 'id': 20, 'synset': 'armchair.n.01', 'synonyms': ['armchair'], 'def': 'chair with a support on each side for arms', 'name': 'armchair'}, {'frequency': 'r', 'id': 21, 'synset': 'armoire.n.01', 'synonyms': ['armoire'], 'def': 'a large wardrobe or cabinet', 'name': 'armoire'}, {'frequency': 'r', 'id': 22, 'synset': 'armor.n.01', 'synonyms': ['armor', 'armour'], 'def': 'protective covering made of metal and used in combat', 'name': 'armor'}, {'frequency': 'c', 'id': 23, 'synset': 'artichoke.n.02', 'synonyms': ['artichoke'], 'def': 'a thistlelike flower head with edible fleshy leaves and heart', 'name': 'artichoke'}, {'frequency': 'f', 'id': 24, 'synset': 'ashcan.n.01', 'synonyms': ['trash_can', 'garbage_can', 'wastebin', 'dustbin', 'trash_barrel', 'trash_bin'], 'def': 'a bin that holds rubbish until it is collected', 'name': 'trash_can'}, {'frequency': 'c', 'id': 25, 'synset': 'ashtray.n.01', 'synonyms': ['ashtray'], 'def': "a receptacle for the ash from smokers' cigars or cigarettes", 'name': 'ashtray'}, {'frequency': 'c', 'id': 26, 'synset': 'asparagus.n.02', 'synonyms': ['asparagus'], 'def': 'edible young shoots of the asparagus plant', 'name': 'asparagus'}, {'frequency': 'c', 'id': 27, 'synset': 'atomizer.n.01', 'synonyms': ['atomizer', 'atomiser', 'spray', 'sprayer', 'nebulizer', 'nebuliser'], 'def': 'a dispenser that turns a liquid (such as perfume) into a fine mist', 'name': 'atomizer'}, {'frequency': 'c', 'id': 28, 'synset': 'avocado.n.01', 'synonyms': ['avocado'], 'def': 'a pear-shaped fruit with green or blackish skin and rich yellowish pulp enclosing a single large seed', 'name': 'avocado'}, {'frequency': 'c', 'id': 29, 'synset': 'award.n.02', 'synonyms': ['award', 'accolade'], 'def': 'a tangible symbol signifying approval or distinction', 'name': 'award'}, {'frequency': 'f', 'id': 30, 'synset': 'awning.n.01', 'synonyms': ['awning'], 'def': 'a canopy made of canvas to shelter people or things from rain or sun', 'name': 'awning'}, {'frequency': 'r', 'id': 31, 'synset': 'ax.n.01', 'synonyms': ['ax', 'axe'], 'def': 'an edge tool with a heavy bladed head mounted across a handle', 'name': 'ax'}, {'frequency': 'f', 'id': 32, 'synset': 'baby_buggy.n.01', 'synonyms': ['baby_buggy', 'baby_carriage', 'perambulator', 'pram', 'stroller'], 'def': 'a small vehicle with four wheels in which a baby or child is pushed around', 'name': 'baby_buggy'}, {'frequency': 'c', 'id': 33, 'synset': 'backboard.n.01', 'synonyms': ['basketball_backboard'], 'def': 'a raised vertical board with basket attached; used to play basketball', 'name': 'basketball_backboard'}, {'frequency': 'f', 'id': 34, 'synset': 'backpack.n.01', 'synonyms': ['backpack', 'knapsack', 'packsack', 'rucksack', 'haversack'], 'def': 'a bag carried by a strap on your back or shoulder', 'name': 'backpack'}, {'frequency': 'f', 'id': 35, 'synset': 'bag.n.04', 'synonyms': ['handbag', 'purse', 'pocketbook'], 'def': 'a container used for carrying money and small personal items or accessories', 'name': 'handbag'}, {'frequency': 'f', 'id': 36, 'synset': 'bag.n.06', 'synonyms': ['suitcase', 'baggage', 'luggage'], 'def': 'cases used to carry belongings when traveling', 'name': 'suitcase'}, {'frequency': 'c', 'id': 37, 'synset': 'bagel.n.01', 'synonyms': ['bagel', 'beigel'], 'def': 'glazed yeast-raised doughnut-shaped roll with hard crust', 'name': 'bagel'}, {'frequency': 'r', 'id': 38, 'synset': 'bagpipe.n.01', 'synonyms': ['bagpipe'], 'def': 'a tubular wind instrument; the player blows air into a bag and squeezes it out', 'name': 'bagpipe'}, {'frequency': 'r', 'id': 39, 'synset': 'baguet.n.01', 'synonyms': ['baguet', 'baguette'], 'def': 'narrow French stick loaf', 'name': 'baguet'}, {'frequency': 'r', 'id': 40, 'synset': 'bait.n.02', 'synonyms': ['bait', 'lure'], 'def': 'something used to lure fish or other animals into danger so they can be trapped or killed', 'name': 'bait'}, {'frequency': 'f', 'id': 41, 'synset': 'ball.n.06', 'synonyms': ['ball'], 'def': 'a spherical object used as a plaything', 'name': 'ball'}, {'frequency': 'r', 'id': 42, 'synset': 'ballet_skirt.n.01', 'synonyms': ['ballet_skirt', 'tutu'], 'def': 'very short skirt worn by ballerinas', 'name': 'ballet_skirt'}, {'frequency': 'f', 'id': 43, 'synset': 'balloon.n.01', 'synonyms': ['balloon'], 'def': 'large tough nonrigid bag filled with gas or heated air', 'name': 'balloon'}, {'frequency': 'c', 'id': 44, 'synset': 'bamboo.n.02', 'synonyms': ['bamboo'], 'def': 'woody tropical grass having hollow woody stems', 'name': 'bamboo'}, {'frequency': 'f', 'id': 45, 'synset': 'banana.n.02', 'synonyms': ['banana'], 'def': 'elongated crescent-shaped yellow fruit with soft sweet flesh', 'name': 'banana'}, {'frequency': 'r', 'id': 46, 'synset': 'band_aid.n.01', 'synonyms': ['Band_Aid'], 'def': 'trade name for an adhesive bandage to cover small cuts or blisters', 'name': 'Band_Aid'}, {'frequency': 'c', 'id': 47, 'synset': 'bandage.n.01', 'synonyms': ['bandage'], 'def': 'a piece of soft material that covers and protects an injured part of the body', 'name': 'bandage'}, {'frequency': 'c', 'id': 48, 'synset': 'bandanna.n.01', 'synonyms': ['bandanna', 'bandana'], 'def': 'large and brightly colored handkerchief; often used as a neckerchief', 'name': 'bandanna'}, {'frequency': 'r', 'id': 49, 'synset': 'banjo.n.01', 'synonyms': ['banjo'], 'def': 'a stringed instrument of the guitar family with a long neck and circular body', 'name': 'banjo'}, {'frequency': 'f', 'id': 50, 'synset': 'banner.n.01', 'synonyms': ['banner', 'streamer'], 'def': 'long strip of cloth or paper used for decoration or advertising', 'name': 'banner'}, {'frequency': 'r', 'id': 51, 'synset': 'barbell.n.01', 'synonyms': ['barbell'], 'def': 'a bar to which heavy discs are attached at each end; used in weightlifting', 'name': 'barbell'}, {'frequency': 'r', 'id': 52, 'synset': 'barge.n.01', 'synonyms': ['barge'], 'def': 'a flatbottom boat for carrying heavy loads (especially on canals)', 'name': 'barge'}, {'frequency': 'f', 'id': 53, 'synset': 'barrel.n.02', 'synonyms': ['barrel', 'cask'], 'def': 'a cylindrical container that holds liquids', 'name': 'barrel'}, {'frequency': 'c', 'id': 54, 'synset': 'barrette.n.01', 'synonyms': ['barrette'], 'def': "a pin for holding women's hair in place", 'name': 'barrette'}, {'frequency': 'c', 'id': 55, 'synset': 'barrow.n.03', 'synonyms': ['barrow', 'garden_cart', 'lawn_cart', 'wheelbarrow'], 'def': 'a cart for carrying small loads; has handles and one or more wheels', 'name': 'barrow'}, {'frequency': 'f', 'id': 56, 'synset': 'base.n.03', 'synonyms': ['baseball_base'], 'def': 'a place that the runner must touch before scoring', 'name': 'baseball_base'}, {'frequency': 'f', 'id': 57, 'synset': 'baseball.n.02', 'synonyms': ['baseball'], 'def': 'a ball used in playing baseball', 'name': 'baseball'}, {'frequency': 'f', 'id': 58, 'synset': 'baseball_bat.n.01', 'synonyms': ['baseball_bat'], 'def': 'an implement used in baseball by the batter', 'name': 'baseball_bat'}, {'frequency': 'f', 'id': 59, 'synset': 'baseball_cap.n.01', 'synonyms': ['baseball_cap', 'jockey_cap', 'golf_cap'], 'def': 'a cap with a bill', 'name': 'baseball_cap'}, {'frequency': 'f', 'id': 60, 'synset': 'baseball_glove.n.01', 'synonyms': ['baseball_glove', 'baseball_mitt'], 'def': 'the handwear used by fielders in playing baseball', 'name': 'baseball_glove'}, {'frequency': 'f', 'id': 61, 'synset': 'basket.n.01', 'synonyms': ['basket', 'handbasket'], 'def': 'a container that is usually woven and has handles', 'name': 'basket'}, {'frequency': 'c', 'id': 62, 'synset': 'basket.n.03', 'synonyms': ['basketball_hoop'], 'def': 'metal hoop supporting a net through which players try to throw the basketball', 'name': 'basketball_hoop'}, {'frequency': 'c', 'id': 63, 'synset': 'basketball.n.02', 'synonyms': ['basketball'], 'def': 'an inflated ball used in playing basketball', 'name': 'basketball'}, {'frequency': 'r', 'id': 64, 'synset': 'bass_horn.n.01', 'synonyms': ['bass_horn', 'sousaphone', 'tuba'], 'def': 'the lowest brass wind instrument', 'name': 'bass_horn'}, {'frequency': 'r', 'id': 65, 'synset': 'bat.n.01', 'synonyms': ['bat_(animal)'], 'def': 'nocturnal mouselike mammal with forelimbs modified to form membranous wings', 'name': 'bat_(animal)'}, {'frequency': 'f', 'id': 66, 'synset': 'bath_mat.n.01', 'synonyms': ['bath_mat'], 'def': 'a heavy towel or mat to stand on while drying yourself after a bath', 'name': 'bath_mat'}, {'frequency': 'f', 'id': 67, 'synset': 'bath_towel.n.01', 'synonyms': ['bath_towel'], 'def': 'a large towel; to dry yourself after a bath', 'name': 'bath_towel'}, {'frequency': 'c', 'id': 68, 'synset': 'bathrobe.n.01', 'synonyms': ['bathrobe'], 'def': 'a loose-fitting robe of towelling; worn after a bath or swim', 'name': 'bathrobe'}, {'frequency': 'f', 'id': 69, 'synset': 'bathtub.n.01', 'synonyms': ['bathtub', 'bathing_tub'], 'def': 'a large open container that you fill with water and use to wash the body', 'name': 'bathtub'}, {'frequency': 'r', 'id': 70, 'synset': 'batter.n.02', 'synonyms': ['batter_(food)'], 'def': 'a liquid or semiliquid mixture, as of flour, eggs, and milk, used in cooking', 'name': 'batter_(food)'}, {'frequency': 'c', 'id': 71, 'synset': 'battery.n.02', 'synonyms': ['battery'], 'def': 'a portable device that produces electricity', 'name': 'battery'}, {'frequency': 'r', 'id': 72, 'synset': 'beach_ball.n.01', 'synonyms': ['beachball'], 'def': 'large and light ball; for play at the seaside', 'name': 'beachball'}, {'frequency': 'c', 'id': 73, 'synset': 'bead.n.01', 'synonyms': ['bead'], 'def': 'a small ball with a hole through the middle used for ornamentation, jewellery, etc.', 'name': 'bead'}, {'frequency': 'r', 'id': 74, 'synset': 'beaker.n.01', 'synonyms': ['beaker'], 'def': 'a flatbottomed jar made of glass or plastic; used for chemistry', 'name': 'beaker'}, {'frequency': 'c', 'id': 75, 'synset': 'bean_curd.n.01', 'synonyms': ['bean_curd', 'tofu'], 'def': 'cheeselike food made of curdled soybean milk', 'name': 'bean_curd'}, {'frequency': 'c', 'id': 76, 'synset': 'beanbag.n.01', 'synonyms': ['beanbag'], 'def': 'a bag filled with dried beans or similar items; used in games or to sit on', 'name': 'beanbag'}, {'frequency': 'f', 'id': 77, 'synset': 'beanie.n.01', 'synonyms': ['beanie', 'beany'], 'def': 'a small skullcap; formerly worn by schoolboys and college freshmen', 'name': 'beanie'}, {'frequency': 'f', 'id': 78, 'synset': 'bear.n.01', 'synonyms': ['bear'], 'def': 'large carnivorous or omnivorous mammals with shaggy coats and claws', 'name': 'bear'}, {'frequency': 'f', 'id': 79, 'synset': 'bed.n.01', 'synonyms': ['bed'], 'def': 'a piece of furniture that provides a place to sleep', 'name': 'bed'}, {'frequency': 'c', 'id': 80, 'synset': 'bedspread.n.01', 'synonyms': ['bedspread', 'bedcover', 'bed_covering', 'counterpane', 'spread'], 'def': 'decorative cover for a bed', 'name': 'bedspread'}, {'frequency': 'f', 'id': 81, 'synset': 'beef.n.01', 'synonyms': ['cow'], 'def': 'cattle that are reared for their meat', 'name': 'cow'}, {'frequency': 'c', 'id': 82, 'synset': 'beef.n.02', 'synonyms': ['beef_(food)', 'boeuf_(food)'], 'def': 'meat from an adult domestic bovine', 'name': 'beef_(food)'}, {'frequency': 'r', 'id': 83, 'synset': 'beeper.n.01', 'synonyms': ['beeper', 'pager'], 'def': 'an device that beeps when the person carrying it is being paged', 'name': 'beeper'}, {'frequency': 'f', 'id': 84, 'synset': 'beer_bottle.n.01', 'synonyms': ['beer_bottle'], 'def': 'a bottle that holds beer', 'name': 'beer_bottle'}, {'frequency': 'c', 'id': 85, 'synset': 'beer_can.n.01', 'synonyms': ['beer_can'], 'def': 'a can that holds beer', 'name': 'beer_can'}, {'frequency': 'r', 'id': 86, 'synset': 'beetle.n.01', 'synonyms': ['beetle'], 'def': 'insect with hard wing covers', 'name': 'beetle'}, {'frequency': 'f', 'id': 87, 'synset': 'bell.n.01', 'synonyms': ['bell'], 'def': 'a hollow device made of metal that makes a ringing sound when struck', 'name': 'bell'}, {'frequency': 'f', 'id': 88, 'synset': 'bell_pepper.n.02', 'synonyms': ['bell_pepper', 'capsicum'], 'def': 'large bell-shaped sweet pepper in green or red or yellow or orange or black varieties', 'name': 'bell_pepper'}, {'frequency': 'f', 'id': 89, 'synset': 'belt.n.02', 'synonyms': ['belt'], 'def': 'a band to tie or buckle around the body (usually at the waist)', 'name': 'belt'}, {'frequency': 'f', 'id': 90, 'synset': 'belt_buckle.n.01', 'synonyms': ['belt_buckle'], 'def': 'the buckle used to fasten a belt', 'name': 'belt_buckle'}, {'frequency': 'f', 'id': 91, 'synset': 'bench.n.01', 'synonyms': ['bench'], 'def': 'a long seat for more than one person', 'name': 'bench'}, {'frequency': 'c', 'id': 92, 'synset': 'beret.n.01', 'synonyms': ['beret'], 'def': 'a cap with no brim or bill; made of soft cloth', 'name': 'beret'}, {'frequency': 'c', 'id': 93, 'synset': 'bib.n.02', 'synonyms': ['bib'], 'def': 'a napkin tied under the chin of a child while eating', 'name': 'bib'}, {'frequency': 'r', 'id': 94, 'synset': 'bible.n.01', 'synonyms': ['Bible'], 'def': 'the sacred writings of the Christian religions', 'name': 'Bible'}, {'frequency': 'f', 'id': 95, 'synset': 'bicycle.n.01', 'synonyms': ['bicycle', 'bike_(bicycle)'], 'def': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'name': 'bicycle'}, {'frequency': 'f', 'id': 96, 'synset': 'bill.n.09', 'synonyms': ['visor', 'vizor'], 'def': 'a brim that projects to the front to shade the eyes', 'name': 'visor'}, {'frequency': 'c', 'id': 97, 'synset': 'binder.n.03', 'synonyms': ['binder', 'ring-binder'], 'def': 'holds loose papers or magazines', 'name': 'binder'}, {'frequency': 'c', 'id': 98, 'synset': 'binoculars.n.01', 'synonyms': ['binoculars', 'field_glasses', 'opera_glasses'], 'def': 'an optical instrument designed for simultaneous use by both eyes', 'name': 'binoculars'}, {'frequency': 'f', 'id': 99, 'synset': 'bird.n.01', 'synonyms': ['bird'], 'def': 'animal characterized by feathers and wings', 'name': 'bird'}, {'frequency': 'r', 'id': 100, 'synset': 'bird_feeder.n.01', 'synonyms': ['birdfeeder'], 'def': 'an outdoor device that supplies food for wild birds', 'name': 'birdfeeder'}, {'frequency': 'r', 'id': 101, 'synset': 'birdbath.n.01', 'synonyms': ['birdbath'], 'def': 'an ornamental basin (usually in a garden) for birds to bathe in', 'name': 'birdbath'}, {'frequency': 'c', 'id': 102, 'synset': 'birdcage.n.01', 'synonyms': ['birdcage'], 'def': 'a cage in which a bird can be kept', 'name': 'birdcage'}, {'frequency': 'c', 'id': 103, 'synset': 'birdhouse.n.01', 'synonyms': ['birdhouse'], 'def': 'a shelter for birds', 'name': 'birdhouse'}, {'frequency': 'f', 'id': 104, 'synset': 'birthday_cake.n.01', 'synonyms': ['birthday_cake'], 'def': 'decorated cake served at a birthday party', 'name': 'birthday_cake'}, {'frequency': 'r', 'id': 105, 'synset': 'birthday_card.n.01', 'synonyms': ['birthday_card'], 'def': 'a card expressing a birthday greeting', 'name': 'birthday_card'}, {'frequency': 'r', 'id': 106, 'synset': 'biscuit.n.01', 'synonyms': ['biscuit_(bread)'], 'def': 'small round bread leavened with baking-powder or soda', 'name': 'biscuit_(bread)'}, {'frequency': 'r', 'id': 107, 'synset': 'black_flag.n.01', 'synonyms': ['pirate_flag'], 'def': 'a flag usually bearing a white skull and crossbones on a black background', 'name': 'pirate_flag'}, {'frequency': 'c', 'id': 108, 'synset': 'black_sheep.n.02', 'synonyms': ['black_sheep'], 'def': 'sheep with a black coat', 'name': 'black_sheep'}, {'frequency': 'c', 'id': 109, 'synset': 'blackboard.n.01', 'synonyms': ['blackboard', 'chalkboard'], 'def': 'sheet of slate; for writing with chalk', 'name': 'blackboard'}, {'frequency': 'f', 'id': 110, 'synset': 'blanket.n.01', 'synonyms': ['blanket'], 'def': 'bedding that keeps a person warm in bed', 'name': 'blanket'}, {'frequency': 'c', 'id': 111, 'synset': 'blazer.n.01', 'synonyms': ['blazer', 'sport_jacket', 'sport_coat', 'sports_jacket', 'sports_coat'], 'def': 'lightweight jacket; often striped in the colors of a club or school', 'name': 'blazer'}, {'frequency': 'f', 'id': 112, 'synset': 'blender.n.01', 'synonyms': ['blender', 'liquidizer', 'liquidiser'], 'def': 'an electrically powered mixer that mix or chop or liquefy foods', 'name': 'blender'}, {'frequency': 'r', 'id': 113, 'synset': 'blimp.n.02', 'synonyms': ['blimp'], 'def': 'a small nonrigid airship used for observation or as a barrage balloon', 'name': 'blimp'}, {'frequency': 'c', 'id': 114, 'synset': 'blinker.n.01', 'synonyms': ['blinker', 'flasher'], 'def': 'a light that flashes on and off; used as a signal or to send messages', 'name': 'blinker'}, {'frequency': 'c', 'id': 115, 'synset': 'blueberry.n.02', 'synonyms': ['blueberry'], 'def': 'sweet edible dark-blue berries of blueberry plants', 'name': 'blueberry'}, {'frequency': 'r', 'id': 116, 'synset': 'boar.n.02', 'synonyms': ['boar'], 'def': 'an uncastrated male hog', 'name': 'boar'}, {'frequency': 'r', 'id': 117, 'synset': 'board.n.09', 'synonyms': ['gameboard'], 'def': 'a flat portable surface (usually rectangular) designed for board games', 'name': 'gameboard'}, {'frequency': 'f', 'id': 118, 'synset': 'boat.n.01', 'synonyms': ['boat', 'ship_(boat)'], 'def': 'a vessel for travel on water', 'name': 'boat'}, {'frequency': 'c', 'id': 119, 'synset': 'bobbin.n.01', 'synonyms': ['bobbin', 'spool', 'reel'], 'def': 'a thing around which thread/tape/film or other flexible materials can be wound', 'name': 'bobbin'}, {'frequency': 'r', 'id': 120, 'synset': 'bobby_pin.n.01', 'synonyms': ['bobby_pin', 'hairgrip'], 'def': 'a flat wire hairpin used to hold bobbed hair in place', 'name': 'bobby_pin'}, {'frequency': 'c', 'id': 121, 'synset': 'boiled_egg.n.01', 'synonyms': ['boiled_egg', 'coddled_egg'], 'def': 'egg cooked briefly in the shell in gently boiling water', 'name': 'boiled_egg'}, {'frequency': 'r', 'id': 122, 'synset': 'bolo_tie.n.01', 'synonyms': ['bolo_tie', 'bolo', 'bola_tie', 'bola'], 'def': 'a cord fastened around the neck with an ornamental clasp and worn as a necktie', 'name': 'bolo_tie'}, {'frequency': 'c', 'id': 123, 'synset': 'bolt.n.03', 'synonyms': ['deadbolt'], 'def': 'the part of a lock that is engaged or withdrawn with a key', 'name': 'deadbolt'}, {'frequency': 'f', 'id': 124, 'synset': 'bolt.n.06', 'synonyms': ['bolt'], 'def': 'a screw that screws into a nut to form a fastener', 'name': 'bolt'}, {'frequency': 'r', 'id': 125, 'synset': 'bonnet.n.01', 'synonyms': ['bonnet'], 'def': 'a hat tied under the chin', 'name': 'bonnet'}, {'frequency': 'f', 'id': 126, 'synset': 'book.n.01', 'synonyms': ['book'], 'def': 'a written work or composition that has been published', 'name': 'book'}, {'frequency': 'r', 'id': 127, 'synset': 'book_bag.n.01', 'synonyms': ['book_bag'], 'def': 'a bag in which students carry their books', 'name': 'book_bag'}, {'frequency': 'c', 'id': 128, 'synset': 'bookcase.n.01', 'synonyms': ['bookcase'], 'def': 'a piece of furniture with shelves for storing books', 'name': 'bookcase'}, {'frequency': 'c', 'id': 129, 'synset': 'booklet.n.01', 'synonyms': ['booklet', 'brochure', 'leaflet', 'pamphlet'], 'def': 'a small book usually having a paper cover', 'name': 'booklet'}, {'frequency': 'r', 'id': 130, 'synset': 'bookmark.n.01', 'synonyms': ['bookmark', 'bookmarker'], 'def': 'a marker (a piece of paper or ribbon) placed between the pages of a book', 'name': 'bookmark'}, {'frequency': 'r', 'id': 131, 'synset': 'boom.n.04', 'synonyms': ['boom_microphone', 'microphone_boom'], 'def': 'a pole carrying an overhead microphone projected over a film or tv set', 'name': 'boom_microphone'}, {'frequency': 'f', 'id': 132, 'synset': 'boot.n.01', 'synonyms': ['boot'], 'def': 'footwear that covers the whole foot and lower leg', 'name': 'boot'}, {'frequency': 'f', 'id': 133, 'synset': 'bottle.n.01', 'synonyms': ['bottle'], 'def': 'a glass or plastic vessel used for storing drinks or other liquids', 'name': 'bottle'}, {'frequency': 'c', 'id': 134, 'synset': 'bottle_opener.n.01', 'synonyms': ['bottle_opener'], 'def': 'an opener for removing caps or corks from bottles', 'name': 'bottle_opener'}, {'frequency': 'c', 'id': 135, 'synset': 'bouquet.n.01', 'synonyms': ['bouquet'], 'def': 'an arrangement of flowers that is usually given as a present', 'name': 'bouquet'}, {'frequency': 'r', 'id': 136, 'synset': 'bow.n.04', 'synonyms': ['bow_(weapon)'], 'def': 'a weapon for shooting arrows', 'name': 'bow_(weapon)'}, {'frequency': 'f', 'id': 137, 'synset': 'bow.n.08', 'synonyms': ['bow_(decorative_ribbons)'], 'def': 'a decorative interlacing of ribbons', 'name': 'bow_(decorative_ribbons)'}, {'frequency': 'f', 'id': 138, 'synset': 'bow_tie.n.01', 'synonyms': ['bow-tie', 'bowtie'], 'def': "a man's tie that ties in a bow", 'name': 'bow-tie'}, {'frequency': 'f', 'id': 139, 'synset': 'bowl.n.03', 'synonyms': ['bowl'], 'def': 'a dish that is round and open at the top for serving foods', 'name': 'bowl'}, {'frequency': 'r', 'id': 140, 'synset': 'bowl.n.08', 'synonyms': ['pipe_bowl'], 'def': 'a small round container that is open at the top for holding tobacco', 'name': 'pipe_bowl'}, {'frequency': 'c', 'id': 141, 'synset': 'bowler_hat.n.01', 'synonyms': ['bowler_hat', 'bowler', 'derby_hat', 'derby', 'plug_hat'], 'def': 'a felt hat that is round and hard with a narrow brim', 'name': 'bowler_hat'}, {'frequency': 'r', 'id': 142, 'synset': 'bowling_ball.n.01', 'synonyms': ['bowling_ball'], 'def': 'a large ball with finger holes used in the sport of bowling', 'name': 'bowling_ball'}, {'frequency': 'r', 'id': 143, 'synset': 'bowling_pin.n.01', 'synonyms': ['bowling_pin'], 'def': 'a club-shaped wooden object used in bowling', 'name': 'bowling_pin'}, {'frequency': 'r', 'id': 144, 'synset': 'boxing_glove.n.01', 'synonyms': ['boxing_glove'], 'def': 'large glove coverings the fists of a fighter worn for the sport of boxing', 'name': 'boxing_glove'}, {'frequency': 'c', 'id': 145, 'synset': 'brace.n.06', 'synonyms': ['suspenders'], 'def': 'elastic straps that hold trousers up (usually used in the plural)', 'name': 'suspenders'}, {'frequency': 'f', 'id': 146, 'synset': 'bracelet.n.02', 'synonyms': ['bracelet', 'bangle'], 'def': 'jewelry worn around the wrist for decoration', 'name': 'bracelet'}, {'frequency': 'r', 'id': 147, 'synset': 'brass.n.07', 'synonyms': ['brass_plaque'], 'def': 'a memorial made of brass', 'name': 'brass_plaque'}, {'frequency': 'c', 'id': 148, 'synset': 'brassiere.n.01', 'synonyms': ['brassiere', 'bra', 'bandeau'], 'def': 'an undergarment worn by women to support their breasts', 'name': 'brassiere'}, {'frequency': 'c', 'id': 149, 'synset': 'bread-bin.n.01', 'synonyms': ['bread-bin', 'breadbox'], 'def': 'a container used to keep bread or cake in', 'name': 'bread-bin'}, {'frequency': 'r', 'id': 150, 'synset': 'breechcloth.n.01', 'synonyms': ['breechcloth', 'breechclout', 'loincloth'], 'def': 'a garment that provides covering for the loins', 'name': 'breechcloth'}, {'frequency': 'c', 'id': 151, 'synset': 'bridal_gown.n.01', 'synonyms': ['bridal_gown', 'wedding_gown', 'wedding_dress'], 'def': 'a gown worn by the bride at a wedding', 'name': 'bridal_gown'}, {'frequency': 'c', 'id': 152, 'synset': 'briefcase.n.01', 'synonyms': ['briefcase'], 'def': 'a case with a handle; for carrying papers or files or books', 'name': 'briefcase'}, {'frequency': 'c', 'id': 153, 'synset': 'bristle_brush.n.01', 'synonyms': ['bristle_brush'], 'def': 'a brush that is made with the short stiff hairs of an animal or plant', 'name': 'bristle_brush'}, {'frequency': 'f', 'id': 154, 'synset': 'broccoli.n.01', 'synonyms': ['broccoli'], 'def': 'plant with dense clusters of tight green flower buds', 'name': 'broccoli'}, {'frequency': 'r', 'id': 155, 'synset': 'brooch.n.01', 'synonyms': ['broach'], 'def': 'a decorative pin worn by women', 'name': 'broach'}, {'frequency': 'c', 'id': 156, 'synset': 'broom.n.01', 'synonyms': ['broom'], 'def': 'bundle of straws or twigs attached to a long handle; used for cleaning', 'name': 'broom'}, {'frequency': 'c', 'id': 157, 'synset': 'brownie.n.03', 'synonyms': ['brownie'], 'def': 'square or bar of very rich chocolate cake usually with nuts', 'name': 'brownie'}, {'frequency': 'c', 'id': 158, 'synset': 'brussels_sprouts.n.01', 'synonyms': ['brussels_sprouts'], 'def': 'the small edible cabbage-like buds growing along a stalk', 'name': 'brussels_sprouts'}, {'frequency': 'r', 'id': 159, 'synset': 'bubble_gum.n.01', 'synonyms': ['bubble_gum'], 'def': 'a kind of chewing gum that can be blown into bubbles', 'name': 'bubble_gum'}, {'frequency': 'f', 'id': 160, 'synset': 'bucket.n.01', 'synonyms': ['bucket', 'pail'], 'def': 'a roughly cylindrical vessel that is open at the top', 'name': 'bucket'}, {'frequency': 'r', 'id': 161, 'synset': 'buggy.n.01', 'synonyms': ['horse_buggy'], 'def': 'a small lightweight carriage; drawn by a single horse', 'name': 'horse_buggy'}, {'frequency': 'c', 'id': 162, 'synset': 'bull.n.11', 'synonyms': ['bull'], 'def': 'mature male cow', 'name': 'bull'}, {'frequency': 'r', 'id': 163, 'synset': 'bulldog.n.01', 'synonyms': ['bulldog'], 'def': 'a thickset short-haired dog with a large head and strong undershot lower jaw', 'name': 'bulldog'}, {'frequency': 'r', 'id': 164, 'synset': 'bulldozer.n.01', 'synonyms': ['bulldozer', 'dozer'], 'def': 'large powerful tractor; a large blade in front flattens areas of ground', 'name': 'bulldozer'}, {'frequency': 'c', 'id': 165, 'synset': 'bullet_train.n.01', 'synonyms': ['bullet_train'], 'def': 'a high-speed passenger train', 'name': 'bullet_train'}, {'frequency': 'c', 'id': 166, 'synset': 'bulletin_board.n.02', 'synonyms': ['bulletin_board', 'notice_board'], 'def': 'a board that hangs on a wall; displays announcements', 'name': 'bulletin_board'}, {'frequency': 'r', 'id': 167, 'synset': 'bulletproof_vest.n.01', 'synonyms': ['bulletproof_vest'], 'def': 'a vest capable of resisting the impact of a bullet', 'name': 'bulletproof_vest'}, {'frequency': 'c', 'id': 168, 'synset': 'bullhorn.n.01', 'synonyms': ['bullhorn', 'megaphone'], 'def': 'a portable loudspeaker with built-in microphone and amplifier', 'name': 'bullhorn'}, {'frequency': 'r', 'id': 169, 'synset': 'bully_beef.n.01', 'synonyms': ['corned_beef', 'corn_beef'], 'def': 'beef cured or pickled in brine', 'name': 'corned_beef'}, {'frequency': 'f', 'id': 170, 'synset': 'bun.n.01', 'synonyms': ['bun', 'roll'], 'def': 'small rounded bread either plain or sweet', 'name': 'bun'}, {'frequency': 'c', 'id': 171, 'synset': 'bunk_bed.n.01', 'synonyms': ['bunk_bed'], 'def': 'beds built one above the other', 'name': 'bunk_bed'}, {'frequency': 'f', 'id': 172, 'synset': 'buoy.n.01', 'synonyms': ['buoy'], 'def': 'a float attached by rope to the seabed to mark channels in a harbor or underwater hazards', 'name': 'buoy'}, {'frequency': 'r', 'id': 173, 'synset': 'burrito.n.01', 'synonyms': ['burrito'], 'def': 'a flour tortilla folded around a filling', 'name': 'burrito'}, {'frequency': 'f', 'id': 174, 'synset': 'bus.n.01', 'synonyms': ['bus_(vehicle)', 'autobus', 'charabanc', 'double-decker', 'motorbus', 'motorcoach'], 'def': 'a vehicle carrying many passengers; used for public transport', 'name': 'bus_(vehicle)'}, {'frequency': 'c', 'id': 175, 'synset': 'business_card.n.01', 'synonyms': ['business_card'], 'def': "a card on which are printed the person's name and business affiliation", 'name': 'business_card'}, {'frequency': 'c', 'id': 176, 'synset': 'butcher_knife.n.01', 'synonyms': ['butcher_knife'], 'def': 'a large sharp knife for cutting or trimming meat', 'name': 'butcher_knife'}, {'frequency': 'c', 'id': 177, 'synset': 'butter.n.01', 'synonyms': ['butter'], 'def': 'an edible emulsion of fat globules made by churning milk or cream; for cooking and table use', 'name': 'butter'}, {'frequency': 'c', 'id': 178, 'synset': 'butterfly.n.01', 'synonyms': ['butterfly'], 'def': 'insect typically having a slender body with knobbed antennae and broad colorful wings', 'name': 'butterfly'}, {'frequency': 'f', 'id': 179, 'synset': 'button.n.01', 'synonyms': ['button'], 'def': 'a round fastener sewn to shirts and coats etc to fit through buttonholes', 'name': 'button'}, {'frequency': 'f', 'id': 180, 'synset': 'cab.n.03', 'synonyms': ['cab_(taxi)', 'taxi', 'taxicab'], 'def': 'a car that takes passengers where they want to go in exchange for money', 'name': 'cab_(taxi)'}, {'frequency': 'r', 'id': 181, 'synset': 'cabana.n.01', 'synonyms': ['cabana'], 'def': 'a small tent used as a dressing room beside the sea or a swimming pool', 'name': 'cabana'}, {'frequency': 'r', 'id': 182, 'synset': 'cabin_car.n.01', 'synonyms': ['cabin_car', 'caboose'], 'def': 'a car on a freight train for use of the train crew; usually the last car on the train', 'name': 'cabin_car'}, {'frequency': 'f', 'id': 183, 'synset': 'cabinet.n.01', 'synonyms': ['cabinet'], 'def': 'a piece of furniture resembling a cupboard with doors and shelves and drawers', 'name': 'cabinet'}, {'frequency': 'r', 'id': 184, 'synset': 'cabinet.n.03', 'synonyms': ['locker', 'storage_locker'], 'def': 'a storage compartment for clothes and valuables; usually it has a lock', 'name': 'locker'}, {'frequency': 'f', 'id': 185, 'synset': 'cake.n.03', 'synonyms': ['cake'], 'def': 'baked goods made from or based on a mixture of flour, sugar, eggs, and fat', 'name': 'cake'}, {'frequency': 'c', 'id': 186, 'synset': 'calculator.n.02', 'synonyms': ['calculator'], 'def': 'a small machine that is used for mathematical calculations', 'name': 'calculator'}, {'frequency': 'f', 'id': 187, 'synset': 'calendar.n.02', 'synonyms': ['calendar'], 'def': 'a list or register of events (appointments/social events/court cases, etc)', 'name': 'calendar'}, {'frequency': 'c', 'id': 188, 'synset': 'calf.n.01', 'synonyms': ['calf'], 'def': 'young of domestic cattle', 'name': 'calf'}, {'frequency': 'c', 'id': 189, 'synset': 'camcorder.n.01', 'synonyms': ['camcorder'], 'def': 'a portable television camera and videocassette recorder', 'name': 'camcorder'}, {'frequency': 'c', 'id': 190, 'synset': 'camel.n.01', 'synonyms': ['camel'], 'def': 'cud-chewing mammal used as a draft or saddle animal in desert regions', 'name': 'camel'}, {'frequency': 'f', 'id': 191, 'synset': 'camera.n.01', 'synonyms': ['camera'], 'def': 'equipment for taking photographs', 'name': 'camera'}, {'frequency': 'c', 'id': 192, 'synset': 'camera_lens.n.01', 'synonyms': ['camera_lens'], 'def': 'a lens that focuses the image in a camera', 'name': 'camera_lens'}, {'frequency': 'c', 'id': 193, 'synset': 'camper.n.02', 'synonyms': ['camper_(vehicle)', 'camping_bus', 'motor_home'], 'def': 'a recreational vehicle equipped for camping out while traveling', 'name': 'camper_(vehicle)'}, {'frequency': 'f', 'id': 194, 'synset': 'can.n.01', 'synonyms': ['can', 'tin_can'], 'def': 'airtight sealed metal container for food or drink or paint etc.', 'name': 'can'}, {'frequency': 'c', 'id': 195, 'synset': 'can_opener.n.01', 'synonyms': ['can_opener', 'tin_opener'], 'def': 'a device for cutting cans open', 'name': 'can_opener'}, {'frequency': 'r', 'id': 196, 'synset': 'candelabrum.n.01', 'synonyms': ['candelabrum', 'candelabra'], 'def': 'branched candlestick; ornamental; has several lights', 'name': 'candelabrum'}, {'frequency': 'f', 'id': 197, 'synset': 'candle.n.01', 'synonyms': ['candle', 'candlestick'], 'def': 'stick of wax with a wick in the middle', 'name': 'candle'}, {'frequency': 'f', 'id': 198, 'synset': 'candlestick.n.01', 'synonyms': ['candle_holder'], 'def': 'a holder with sockets for candles', 'name': 'candle_holder'}, {'frequency': 'r', 'id': 199, 'synset': 'candy_bar.n.01', 'synonyms': ['candy_bar'], 'def': 'a candy shaped as a bar', 'name': 'candy_bar'}, {'frequency': 'c', 'id': 200, 'synset': 'candy_cane.n.01', 'synonyms': ['candy_cane'], 'def': 'a hard candy in the shape of a rod (usually with stripes)', 'name': 'candy_cane'}, {'frequency': 'c', 'id': 201, 'synset': 'cane.n.01', 'synonyms': ['walking_cane'], 'def': 'a stick that people can lean on to help them walk', 'name': 'walking_cane'}, {'frequency': 'c', 'id': 202, 'synset': 'canister.n.02', 'synonyms': ['canister', 'cannister'], 'def': 'metal container for storing dry foods such as tea or flour', 'name': 'canister'}, {'frequency': 'r', 'id': 203, 'synset': 'cannon.n.02', 'synonyms': ['cannon'], 'def': 'heavy gun fired from a tank', 'name': 'cannon'}, {'frequency': 'c', 'id': 204, 'synset': 'canoe.n.01', 'synonyms': ['canoe'], 'def': 'small and light boat; pointed at both ends; propelled with a paddle', 'name': 'canoe'}, {'frequency': 'r', 'id': 205, 'synset': 'cantaloup.n.02', 'synonyms': ['cantaloup', 'cantaloupe'], 'def': 'the fruit of a cantaloup vine; small to medium-sized melon with yellowish flesh', 'name': 'cantaloup'}, {'frequency': 'r', 'id': 206, 'synset': 'canteen.n.01', 'synonyms': ['canteen'], 'def': 'a flask for carrying water; used by soldiers or travelers', 'name': 'canteen'}, {'frequency': 'c', 'id': 207, 'synset': 'cap.n.01', 'synonyms': ['cap_(headwear)'], 'def': 'a tight-fitting headwear', 'name': 'cap_(headwear)'}, {'frequency': 'f', 'id': 208, 'synset': 'cap.n.02', 'synonyms': ['bottle_cap', 'cap_(container_lid)'], 'def': 'a top (as for a bottle)', 'name': 'bottle_cap'}, {'frequency': 'r', 'id': 209, 'synset': 'cape.n.02', 'synonyms': ['cape'], 'def': 'a sleeveless garment like a cloak but shorter', 'name': 'cape'}, {'frequency': 'c', 'id': 210, 'synset': 'cappuccino.n.01', 'synonyms': ['cappuccino', 'coffee_cappuccino'], 'def': 'equal parts of espresso and steamed milk', 'name': 'cappuccino'}, {'frequency': 'f', 'id': 211, 'synset': 'car.n.01', 'synonyms': ['car_(automobile)', 'auto_(automobile)', 'automobile'], 'def': 'a motor vehicle with four wheels', 'name': 'car_(automobile)'}, {'frequency': 'f', 'id': 212, 'synset': 'car.n.02', 'synonyms': ['railcar_(part_of_a_train)', 'railway_car_(part_of_a_train)', 'railroad_car_(part_of_a_train)'], 'def': 'a wheeled vehicle adapted to the rails of railroad', 'name': 'railcar_(part_of_a_train)'}, {'frequency': 'r', 'id': 213, 'synset': 'car.n.04', 'synonyms': ['elevator_car'], 'def': 'where passengers ride up and down', 'name': 'elevator_car'}, {'frequency': 'r', 'id': 214, 'synset': 'car_battery.n.01', 'synonyms': ['car_battery', 'automobile_battery'], 'def': 'a battery in a motor vehicle', 'name': 'car_battery'}, {'frequency': 'c', 'id': 215, 'synset': 'card.n.02', 'synonyms': ['identity_card'], 'def': 'a card certifying the identity of the bearer', 'name': 'identity_card'}, {'frequency': 'c', 'id': 216, 'synset': 'card.n.03', 'synonyms': ['card'], 'def': 'a rectangular piece of paper used to send messages (e.g. greetings or pictures)', 'name': 'card'}, {'frequency': 'r', 'id': 217, 'synset': 'cardigan.n.01', 'synonyms': ['cardigan'], 'def': 'knitted jacket that is fastened up the front with buttons or a zipper', 'name': 'cardigan'}, {'frequency': 'r', 'id': 218, 'synset': 'cargo_ship.n.01', 'synonyms': ['cargo_ship', 'cargo_vessel'], 'def': 'a ship designed to carry cargo', 'name': 'cargo_ship'}, {'frequency': 'r', 'id': 219, 'synset': 'carnation.n.01', 'synonyms': ['carnation'], 'def': 'plant with pink to purple-red spice-scented usually double flowers', 'name': 'carnation'}, {'frequency': 'c', 'id': 220, 'synset': 'carriage.n.02', 'synonyms': ['horse_carriage'], 'def': 'a vehicle with wheels drawn by one or more horses', 'name': 'horse_carriage'}, {'frequency': 'f', 'id': 221, 'synset': 'carrot.n.01', 'synonyms': ['carrot'], 'def': 'deep orange edible root of the cultivated carrot plant', 'name': 'carrot'}, {'frequency': 'c', 'id': 222, 'synset': 'carryall.n.01', 'synonyms': ['tote_bag'], 'def': 'a capacious bag or basket', 'name': 'tote_bag'}, {'frequency': 'c', 'id': 223, 'synset': 'cart.n.01', 'synonyms': ['cart'], 'def': 'a heavy open wagon usually having two wheels and drawn by an animal', 'name': 'cart'}, {'frequency': 'c', 'id': 224, 'synset': 'carton.n.02', 'synonyms': ['carton'], 'def': 'a box made of cardboard; opens by flaps on top', 'name': 'carton'}, {'frequency': 'c', 'id': 225, 'synset': 'cash_register.n.01', 'synonyms': ['cash_register', 'register_(for_cash_transactions)'], 'def': 'a cashbox with an adding machine to register transactions', 'name': 'cash_register'}, {'frequency': 'r', 'id': 226, 'synset': 'casserole.n.01', 'synonyms': ['casserole'], 'def': 'food cooked and served in a casserole', 'name': 'casserole'}, {'frequency': 'r', 'id': 227, 'synset': 'cassette.n.01', 'synonyms': ['cassette'], 'def': 'a container that holds a magnetic tape used for recording or playing sound or video', 'name': 'cassette'}, {'frequency': 'c', 'id': 228, 'synset': 'cast.n.05', 'synonyms': ['cast', 'plaster_cast', 'plaster_bandage'], 'def': 'bandage consisting of a firm covering that immobilizes broken bones while they heal', 'name': 'cast'}, {'frequency': 'f', 'id': 229, 'synset': 'cat.n.01', 'synonyms': ['cat'], 'def': 'a domestic house cat', 'name': 'cat'}, {'frequency': 'c', 'id': 230, 'synset': 'cauliflower.n.02', 'synonyms': ['cauliflower'], 'def': 'edible compact head of white undeveloped flowers', 'name': 'cauliflower'}, {'frequency': 'r', 'id': 231, 'synset': 'caviar.n.01', 'synonyms': ['caviar', 'caviare'], 'def': "salted roe of sturgeon or other large fish; usually served as an hors d'oeuvre", 'name': 'caviar'}, {'frequency': 'c', 'id': 232, 'synset': 'cayenne.n.02', 'synonyms': ['cayenne_(spice)', 'cayenne_pepper_(spice)', 'red_pepper_(spice)'], 'def': 'ground pods and seeds of pungent red peppers of the genus Capsicum', 'name': 'cayenne_(spice)'}, {'frequency': 'c', 'id': 233, 'synset': 'cd_player.n.01', 'synonyms': ['CD_player'], 'def': 'electronic equipment for playing compact discs (CDs)', 'name': 'CD_player'}, {'frequency': 'c', 'id': 234, 'synset': 'celery.n.01', 'synonyms': ['celery'], 'def': 'widely cultivated herb with aromatic leaf stalks that are eaten raw or cooked', 'name': 'celery'}, {'frequency': 'f', 'id': 235, 'synset': 'cellular_telephone.n.01', 'synonyms': ['cellular_telephone', 'cellular_phone', 'cellphone', 'mobile_phone', 'smart_phone'], 'def': 'a hand-held mobile telephone', 'name': 'cellular_telephone'}, {'frequency': 'r', 'id': 236, 'synset': 'chain_mail.n.01', 'synonyms': ['chain_mail', 'ring_mail', 'chain_armor', 'chain_armour', 'ring_armor', 'ring_armour'], 'def': '(Middle Ages) flexible armor made of interlinked metal rings', 'name': 'chain_mail'}, {'frequency': 'f', 'id': 237, 'synset': 'chair.n.01', 'synonyms': ['chair'], 'def': 'a seat for one person, with a support for the back', 'name': 'chair'}, {'frequency': 'r', 'id': 238, 'synset': 'chaise_longue.n.01', 'synonyms': ['chaise_longue', 'chaise', 'daybed'], 'def': 'a long chair; for reclining', 'name': 'chaise_longue'}, {'frequency': 'r', 'id': 239, 'synset': 'champagne.n.01', 'synonyms': ['champagne'], 'def': 'a white sparkling wine produced in Champagne or resembling that produced there', 'name': 'champagne'}, {'frequency': 'f', 'id': 240, 'synset': 'chandelier.n.01', 'synonyms': ['chandelier'], 'def': 'branched lighting fixture; often ornate; hangs from the ceiling', 'name': 'chandelier'}, {'frequency': 'r', 'id': 241, 'synset': 'chap.n.04', 'synonyms': ['chap'], 'def': 'leather leggings without a seat; worn over trousers by cowboys to protect their legs', 'name': 'chap'}, {'frequency': 'r', 'id': 242, 'synset': 'checkbook.n.01', 'synonyms': ['checkbook', 'chequebook'], 'def': 'a book issued to holders of checking accounts', 'name': 'checkbook'}, {'frequency': 'r', 'id': 243, 'synset': 'checkerboard.n.01', 'synonyms': ['checkerboard'], 'def': 'a board having 64 squares of two alternating colors', 'name': 'checkerboard'}, {'frequency': 'c', 'id': 244, 'synset': 'cherry.n.03', 'synonyms': ['cherry'], 'def': 'a red fruit with a single hard stone', 'name': 'cherry'}, {'frequency': 'r', 'id': 245, 'synset': 'chessboard.n.01', 'synonyms': ['chessboard'], 'def': 'a checkerboard used to play chess', 'name': 'chessboard'}, {'frequency': 'r', 'id': 246, 'synset': 'chest_of_drawers.n.01', 'synonyms': ['chest_of_drawers_(furniture)', 'bureau_(furniture)', 'chest_(furniture)'], 'def': 'furniture with drawers for keeping clothes', 'name': 'chest_of_drawers_(furniture)'}, {'frequency': 'c', 'id': 247, 'synset': 'chicken.n.02', 'synonyms': ['chicken_(animal)'], 'def': 'a domestic fowl bred for flesh or eggs', 'name': 'chicken_(animal)'}, {'frequency': 'c', 'id': 248, 'synset': 'chicken_wire.n.01', 'synonyms': ['chicken_wire'], 'def': 'a galvanized wire network with a hexagonal mesh; used to build fences', 'name': 'chicken_wire'}, {'frequency': 'r', 'id': 249, 'synset': 'chickpea.n.01', 'synonyms': ['chickpea', 'garbanzo'], 'def': 'the seed of the chickpea plant; usually dried', 'name': 'chickpea'}, {'frequency': 'r', 'id': 250, 'synset': 'chihuahua.n.03', 'synonyms': ['Chihuahua'], 'def': 'an old breed of tiny short-haired dog with protruding eyes from Mexico', 'name': 'Chihuahua'}, {'frequency': 'r', 'id': 251, 'synset': 'chili.n.02', 'synonyms': ['chili_(vegetable)', 'chili_pepper_(vegetable)', 'chilli_(vegetable)', 'chilly_(vegetable)', 'chile_(vegetable)'], 'def': 'very hot and finely tapering pepper of special pungency', 'name': 'chili_(vegetable)'}, {'frequency': 'r', 'id': 252, 'synset': 'chime.n.01', 'synonyms': ['chime', 'gong'], 'def': 'an instrument consisting of a set of bells that are struck with a hammer', 'name': 'chime'}, {'frequency': 'r', 'id': 253, 'synset': 'chinaware.n.01', 'synonyms': ['chinaware'], 'def': 'dishware made of high quality porcelain', 'name': 'chinaware'}, {'frequency': 'c', 'id': 254, 'synset': 'chip.n.04', 'synonyms': ['crisp_(potato_chip)', 'potato_chip'], 'def': 'a thin crisp slice of potato fried in deep fat', 'name': 'crisp_(potato_chip)'}, {'frequency': 'r', 'id': 255, 'synset': 'chip.n.06', 'synonyms': ['poker_chip'], 'def': 'a small disk-shaped counter used to represent money when gambling', 'name': 'poker_chip'}, {'frequency': 'c', 'id': 256, 'synset': 'chocolate_bar.n.01', 'synonyms': ['chocolate_bar'], 'def': 'a bar of chocolate candy', 'name': 'chocolate_bar'}, {'frequency': 'c', 'id': 257, 'synset': 'chocolate_cake.n.01', 'synonyms': ['chocolate_cake'], 'def': 'cake containing chocolate', 'name': 'chocolate_cake'}, {'frequency': 'r', 'id': 258, 'synset': 'chocolate_milk.n.01', 'synonyms': ['chocolate_milk'], 'def': 'milk flavored with chocolate syrup', 'name': 'chocolate_milk'}, {'frequency': 'r', 'id': 259, 'synset': 'chocolate_mousse.n.01', 'synonyms': ['chocolate_mousse'], 'def': 'dessert mousse made with chocolate', 'name': 'chocolate_mousse'}, {'frequency': 'f', 'id': 260, 'synset': 'choker.n.03', 'synonyms': ['choker', 'collar', 'neckband'], 'def': 'necklace that fits tightly around the neck', 'name': 'choker'}, {'frequency': 'f', 'id': 261, 'synset': 'chopping_board.n.01', 'synonyms': ['chopping_board', 'cutting_board', 'chopping_block'], 'def': 'a wooden board where meats or vegetables can be cut', 'name': 'chopping_board'}, {'frequency': 'c', 'id': 262, 'synset': 'chopstick.n.01', 'synonyms': ['chopstick'], 'def': 'one of a pair of slender sticks used as oriental tableware to eat food with', 'name': 'chopstick'}, {'frequency': 'f', 'id': 263, 'synset': 'christmas_tree.n.05', 'synonyms': ['Christmas_tree'], 'def': 'an ornamented evergreen used as a Christmas decoration', 'name': 'Christmas_tree'}, {'frequency': 'c', 'id': 264, 'synset': 'chute.n.02', 'synonyms': ['slide'], 'def': 'sloping channel through which things can descend', 'name': 'slide'}, {'frequency': 'r', 'id': 265, 'synset': 'cider.n.01', 'synonyms': ['cider', 'cyder'], 'def': 'a beverage made from juice pressed from apples', 'name': 'cider'}, {'frequency': 'r', 'id': 266, 'synset': 'cigar_box.n.01', 'synonyms': ['cigar_box'], 'def': 'a box for holding cigars', 'name': 'cigar_box'}, {'frequency': 'c', 'id': 267, 'synset': 'cigarette.n.01', 'synonyms': ['cigarette'], 'def': 'finely ground tobacco wrapped in paper; for smoking', 'name': 'cigarette'}, {'frequency': 'c', 'id': 268, 'synset': 'cigarette_case.n.01', 'synonyms': ['cigarette_case', 'cigarette_pack'], 'def': 'a small flat case for holding cigarettes', 'name': 'cigarette_case'}, {'frequency': 'f', 'id': 269, 'synset': 'cistern.n.02', 'synonyms': ['cistern', 'water_tank'], 'def': 'a tank that holds the water used to flush a toilet', 'name': 'cistern'}, {'frequency': 'r', 'id': 270, 'synset': 'clarinet.n.01', 'synonyms': ['clarinet'], 'def': 'a single-reed instrument with a straight tube', 'name': 'clarinet'}, {'frequency': 'r', 'id': 271, 'synset': 'clasp.n.01', 'synonyms': ['clasp'], 'def': 'a fastener (as a buckle or hook) that is used to hold two things together', 'name': 'clasp'}, {'frequency': 'c', 'id': 272, 'synset': 'cleansing_agent.n.01', 'synonyms': ['cleansing_agent', 'cleanser', 'cleaner'], 'def': 'a preparation used in cleaning something', 'name': 'cleansing_agent'}, {'frequency': 'r', 'id': 273, 'synset': 'clementine.n.01', 'synonyms': ['clementine'], 'def': 'a variety of mandarin orange', 'name': 'clementine'}, {'frequency': 'c', 'id': 274, 'synset': 'clip.n.03', 'synonyms': ['clip'], 'def': 'any of various small fasteners used to hold loose articles together', 'name': 'clip'}, {'frequency': 'c', 'id': 275, 'synset': 'clipboard.n.01', 'synonyms': ['clipboard'], 'def': 'a small writing board with a clip at the top for holding papers', 'name': 'clipboard'}, {'frequency': 'f', 'id': 276, 'synset': 'clock.n.01', 'synonyms': ['clock', 'timepiece', 'timekeeper'], 'def': 'a timepiece that shows the time of day', 'name': 'clock'}, {'frequency': 'f', 'id': 277, 'synset': 'clock_tower.n.01', 'synonyms': ['clock_tower'], 'def': 'a tower with a large clock visible high up on an outside face', 'name': 'clock_tower'}, {'frequency': 'c', 'id': 278, 'synset': 'clothes_hamper.n.01', 'synonyms': ['clothes_hamper', 'laundry_basket', 'clothes_basket'], 'def': 'a hamper that holds dirty clothes to be washed or wet clothes to be dried', 'name': 'clothes_hamper'}, {'frequency': 'c', 'id': 279, 'synset': 'clothespin.n.01', 'synonyms': ['clothespin', 'clothes_peg'], 'def': 'wood or plastic fastener; for holding clothes on a clothesline', 'name': 'clothespin'}, {'frequency': 'r', 'id': 280, 'synset': 'clutch_bag.n.01', 'synonyms': ['clutch_bag'], 'def': "a woman's strapless purse that is carried in the hand", 'name': 'clutch_bag'}, {'frequency': 'f', 'id': 281, 'synset': 'coaster.n.03', 'synonyms': ['coaster'], 'def': 'a covering (plate or mat) that protects the surface of a table', 'name': 'coaster'}, {'frequency': 'f', 'id': 282, 'synset': 'coat.n.01', 'synonyms': ['coat'], 'def': 'an outer garment that has sleeves and covers the body from shoulder down', 'name': 'coat'}, {'frequency': 'c', 'id': 283, 'synset': 'coat_hanger.n.01', 'synonyms': ['coat_hanger', 'clothes_hanger', 'dress_hanger'], 'def': "a hanger that is shaped like a person's shoulders", 'name': 'coat_hanger'}, {'frequency': 'r', 'id': 284, 'synset': 'coatrack.n.01', 'synonyms': ['coatrack', 'hatrack'], 'def': 'a rack with hooks for temporarily holding coats and hats', 'name': 'coatrack'}, {'frequency': 'c', 'id': 285, 'synset': 'cock.n.04', 'synonyms': ['cock', 'rooster'], 'def': 'adult male chicken', 'name': 'cock'}, {'frequency': 'c', 'id': 286, 'synset': 'coconut.n.02', 'synonyms': ['coconut', 'cocoanut'], 'def': 'large hard-shelled brown oval nut with a fibrous husk', 'name': 'coconut'}, {'frequency': 'r', 'id': 287, 'synset': 'coffee_filter.n.01', 'synonyms': ['coffee_filter'], 'def': 'filter (usually of paper) that passes the coffee and retains the coffee grounds', 'name': 'coffee_filter'}, {'frequency': 'f', 'id': 288, 'synset': 'coffee_maker.n.01', 'synonyms': ['coffee_maker', 'coffee_machine'], 'def': 'a kitchen appliance for brewing coffee automatically', 'name': 'coffee_maker'}, {'frequency': 'f', 'id': 289, 'synset': 'coffee_table.n.01', 'synonyms': ['coffee_table', 'cocktail_table'], 'def': 'low table where magazines can be placed and coffee or cocktails are served', 'name': 'coffee_table'}, {'frequency': 'c', 'id': 290, 'synset': 'coffeepot.n.01', 'synonyms': ['coffeepot'], 'def': 'tall pot in which coffee is brewed', 'name': 'coffeepot'}, {'frequency': 'r', 'id': 291, 'synset': 'coil.n.05', 'synonyms': ['coil'], 'def': 'tubing that is wound in a spiral', 'name': 'coil'}, {'frequency': 'c', 'id': 292, 'synset': 'coin.n.01', 'synonyms': ['coin'], 'def': 'a flat metal piece (usually a disc) used as money', 'name': 'coin'}, {'frequency': 'r', 'id': 293, 'synset': 'colander.n.01', 'synonyms': ['colander', 'cullender'], 'def': 'bowl-shaped strainer; used to wash or drain foods', 'name': 'colander'}, {'frequency': 'c', 'id': 294, 'synset': 'coleslaw.n.01', 'synonyms': ['coleslaw', 'slaw'], 'def': 'basically shredded cabbage', 'name': 'coleslaw'}, {'frequency': 'r', 'id': 295, 'synset': 'coloring_material.n.01', 'synonyms': ['coloring_material', 'colouring_material'], 'def': 'any material used for its color', 'name': 'coloring_material'}, {'frequency': 'r', 'id': 296, 'synset': 'combination_lock.n.01', 'synonyms': ['combination_lock'], 'def': 'lock that can be opened only by turning dials in a special sequence', 'name': 'combination_lock'}, {'frequency': 'c', 'id': 297, 'synset': 'comforter.n.04', 'synonyms': ['pacifier', 'teething_ring'], 'def': 'device used for an infant to suck or bite on', 'name': 'pacifier'}, {'frequency': 'r', 'id': 298, 'synset': 'comic_book.n.01', 'synonyms': ['comic_book'], 'def': 'a magazine devoted to comic strips', 'name': 'comic_book'}, {'frequency': 'f', 'id': 299, 'synset': 'computer_keyboard.n.01', 'synonyms': ['computer_keyboard', 'keyboard_(computer)'], 'def': 'a keyboard that is a data input device for computers', 'name': 'computer_keyboard'}, {'frequency': 'r', 'id': 300, 'synset': 'concrete_mixer.n.01', 'synonyms': ['concrete_mixer', 'cement_mixer'], 'def': 'a machine with a large revolving drum in which cement/concrete is mixed', 'name': 'concrete_mixer'}, {'frequency': 'f', 'id': 301, 'synset': 'cone.n.01', 'synonyms': ['cone', 'traffic_cone'], 'def': 'a cone-shaped object used to direct traffic', 'name': 'cone'}, {'frequency': 'f', 'id': 302, 'synset': 'control.n.09', 'synonyms': ['control', 'controller'], 'def': 'a mechanism that controls the operation of a machine', 'name': 'control'}, {'frequency': 'r', 'id': 303, 'synset': 'convertible.n.01', 'synonyms': ['convertible_(automobile)'], 'def': 'a car that has top that can be folded or removed', 'name': 'convertible_(automobile)'}, {'frequency': 'r', 'id': 304, 'synset': 'convertible.n.03', 'synonyms': ['sofa_bed'], 'def': 'a sofa that can be converted into a bed', 'name': 'sofa_bed'}, {'frequency': 'c', 'id': 305, 'synset': 'cookie.n.01', 'synonyms': ['cookie', 'cooky', 'biscuit_(cookie)'], 'def': "any of various small flat sweet cakes (`biscuit' is the British term)", 'name': 'cookie'}, {'frequency': 'r', 'id': 306, 'synset': 'cookie_jar.n.01', 'synonyms': ['cookie_jar', 'cooky_jar'], 'def': 'a jar in which cookies are kept (and sometimes money is hidden)', 'name': 'cookie_jar'}, {'frequency': 'r', 'id': 307, 'synset': 'cooking_utensil.n.01', 'synonyms': ['cooking_utensil'], 'def': 'a kitchen utensil made of material that does not melt easily; used for cooking', 'name': 'cooking_utensil'}, {'frequency': 'f', 'id': 308, 'synset': 'cooler.n.01', 'synonyms': ['cooler_(for_food)', 'ice_chest'], 'def': 'an insulated box for storing food often with ice', 'name': 'cooler_(for_food)'}, {'frequency': 'c', 'id': 309, 'synset': 'cork.n.04', 'synonyms': ['cork_(bottle_plug)', 'bottle_cork'], 'def': 'the plug in the mouth of a bottle (especially a wine bottle)', 'name': 'cork_(bottle_plug)'}, {'frequency': 'r', 'id': 310, 'synset': 'corkboard.n.01', 'synonyms': ['corkboard'], 'def': 'a sheet consisting of cork granules', 'name': 'corkboard'}, {'frequency': 'r', 'id': 311, 'synset': 'corkscrew.n.01', 'synonyms': ['corkscrew', 'bottle_screw'], 'def': 'a bottle opener that pulls corks', 'name': 'corkscrew'}, {'frequency': 'c', 'id': 312, 'synset': 'corn.n.03', 'synonyms': ['edible_corn', 'corn', 'maize'], 'def': 'ears of corn that can be prepared and served for human food', 'name': 'edible_corn'}, {'frequency': 'r', 'id': 313, 'synset': 'cornbread.n.01', 'synonyms': ['cornbread'], 'def': 'bread made primarily of cornmeal', 'name': 'cornbread'}, {'frequency': 'c', 'id': 314, 'synset': 'cornet.n.01', 'synonyms': ['cornet', 'horn', 'trumpet'], 'def': 'a brass musical instrument with a narrow tube and a flared bell and many valves', 'name': 'cornet'}, {'frequency': 'c', 'id': 315, 'synset': 'cornice.n.01', 'synonyms': ['cornice', 'valance', 'valance_board', 'pelmet'], 'def': 'a decorative framework to conceal curtain fixtures at the top of a window casing', 'name': 'cornice'}, {'frequency': 'r', 'id': 316, 'synset': 'cornmeal.n.01', 'synonyms': ['cornmeal'], 'def': 'coarsely ground corn', 'name': 'cornmeal'}, {'frequency': 'r', 'id': 317, 'synset': 'corset.n.01', 'synonyms': ['corset', 'girdle'], 'def': "a woman's close-fitting foundation garment", 'name': 'corset'}, {'frequency': 'r', 'id': 318, 'synset': 'cos.n.02', 'synonyms': ['romaine_lettuce'], 'def': 'lettuce with long dark-green leaves in a loosely packed elongated head', 'name': 'romaine_lettuce'}, {'frequency': 'c', 'id': 319, 'synset': 'costume.n.04', 'synonyms': ['costume'], 'def': 'the attire characteristic of a country or a time or a social class', 'name': 'costume'}, {'frequency': 'r', 'id': 320, 'synset': 'cougar.n.01', 'synonyms': ['cougar', 'puma', 'catamount', 'mountain_lion', 'panther'], 'def': 'large American feline resembling a lion', 'name': 'cougar'}, {'frequency': 'r', 'id': 321, 'synset': 'coverall.n.01', 'synonyms': ['coverall'], 'def': 'a loose-fitting protective garment that is worn over other clothing', 'name': 'coverall'}, {'frequency': 'r', 'id': 322, 'synset': 'cowbell.n.01', 'synonyms': ['cowbell'], 'def': 'a bell hung around the neck of cow so that the cow can be easily located', 'name': 'cowbell'}, {'frequency': 'f', 'id': 323, 'synset': 'cowboy_hat.n.01', 'synonyms': ['cowboy_hat', 'ten-gallon_hat'], 'def': 'a hat with a wide brim and a soft crown; worn by American ranch hands', 'name': 'cowboy_hat'}, {'frequency': 'r', 'id': 324, 'synset': 'crab.n.01', 'synonyms': ['crab_(animal)'], 'def': 'decapod having eyes on short stalks and a broad flattened shell and pincers', 'name': 'crab_(animal)'}, {'frequency': 'c', 'id': 325, 'synset': 'cracker.n.01', 'synonyms': ['cracker'], 'def': 'a thin crisp wafer', 'name': 'cracker'}, {'frequency': 'r', 'id': 326, 'synset': 'crape.n.01', 'synonyms': ['crape', 'crepe', 'French_pancake'], 'def': 'small very thin pancake', 'name': 'crape'}, {'frequency': 'f', 'id': 327, 'synset': 'crate.n.01', 'synonyms': ['crate'], 'def': 'a rugged box (usually made of wood); used for shipping', 'name': 'crate'}, {'frequency': 'r', 'id': 328, 'synset': 'crayon.n.01', 'synonyms': ['crayon', 'wax_crayon'], 'def': 'writing or drawing implement made of a colored stick of composition wax', 'name': 'crayon'}, {'frequency': 'r', 'id': 329, 'synset': 'cream_pitcher.n.01', 'synonyms': ['cream_pitcher'], 'def': 'a small pitcher for serving cream', 'name': 'cream_pitcher'}, {'frequency': 'r', 'id': 330, 'synset': 'credit_card.n.01', 'synonyms': ['credit_card', 'charge_card', 'debit_card'], 'def': 'a card, usually plastic, used to pay for goods and services', 'name': 'credit_card'}, {'frequency': 'c', 'id': 331, 'synset': 'crescent_roll.n.01', 'synonyms': ['crescent_roll', 'croissant'], 'def': 'very rich flaky crescent-shaped roll', 'name': 'crescent_roll'}, {'frequency': 'c', 'id': 332, 'synset': 'crib.n.01', 'synonyms': ['crib', 'cot'], 'def': 'baby bed with high sides made of slats', 'name': 'crib'}, {'frequency': 'c', 'id': 333, 'synset': 'crock.n.03', 'synonyms': ['crock_pot', 'earthenware_jar'], 'def': 'an earthen jar (made of baked clay)', 'name': 'crock_pot'}, {'frequency': 'f', 'id': 334, 'synset': 'crossbar.n.01', 'synonyms': ['crossbar'], 'def': 'a horizontal bar that goes across something', 'name': 'crossbar'}, {'frequency': 'r', 'id': 335, 'synset': 'crouton.n.01', 'synonyms': ['crouton'], 'def': 'a small piece of toasted or fried bread; served in soup or salads', 'name': 'crouton'}, {'frequency': 'r', 'id': 336, 'synset': 'crow.n.01', 'synonyms': ['crow'], 'def': 'black birds having a raucous call', 'name': 'crow'}, {'frequency': 'c', 'id': 337, 'synset': 'crown.n.04', 'synonyms': ['crown'], 'def': 'an ornamental jeweled headdress signifying sovereignty', 'name': 'crown'}, {'frequency': 'c', 'id': 338, 'synset': 'crucifix.n.01', 'synonyms': ['crucifix'], 'def': 'representation of the cross on which Jesus died', 'name': 'crucifix'}, {'frequency': 'c', 'id': 339, 'synset': 'cruise_ship.n.01', 'synonyms': ['cruise_ship', 'cruise_liner'], 'def': 'a passenger ship used commercially for pleasure cruises', 'name': 'cruise_ship'}, {'frequency': 'c', 'id': 340, 'synset': 'cruiser.n.01', 'synonyms': ['police_cruiser', 'patrol_car', 'police_car', 'squad_car'], 'def': 'a car in which policemen cruise the streets', 'name': 'police_cruiser'}, {'frequency': 'c', 'id': 341, 'synset': 'crumb.n.03', 'synonyms': ['crumb'], 'def': 'small piece of e.g. bread or cake', 'name': 'crumb'}, {'frequency': 'r', 'id': 342, 'synset': 'crutch.n.01', 'synonyms': ['crutch'], 'def': 'a wooden or metal staff that fits under the armpit and reaches to the ground', 'name': 'crutch'}, {'frequency': 'c', 'id': 343, 'synset': 'cub.n.03', 'synonyms': ['cub_(animal)'], 'def': 'the young of certain carnivorous mammals such as the bear or wolf or lion', 'name': 'cub_(animal)'}, {'frequency': 'r', 'id': 344, 'synset': 'cube.n.05', 'synonyms': ['cube', 'square_block'], 'def': 'a block in the (approximate) shape of a cube', 'name': 'cube'}, {'frequency': 'f', 'id': 345, 'synset': 'cucumber.n.02', 'synonyms': ['cucumber', 'cuke'], 'def': 'cylindrical green fruit with thin green rind and white flesh eaten as a vegetable', 'name': 'cucumber'}, {'frequency': 'c', 'id': 346, 'synset': 'cufflink.n.01', 'synonyms': ['cufflink'], 'def': 'jewelry consisting of linked buttons used to fasten the cuffs of a shirt', 'name': 'cufflink'}, {'frequency': 'f', 'id': 347, 'synset': 'cup.n.01', 'synonyms': ['cup'], 'def': 'a small open container usually used for drinking; usually has a handle', 'name': 'cup'}, {'frequency': 'c', 'id': 348, 'synset': 'cup.n.08', 'synonyms': ['trophy_cup'], 'def': 'a metal vessel with handles that is awarded as a trophy to a competition winner', 'name': 'trophy_cup'}, {'frequency': 'c', 'id': 349, 'synset': 'cupcake.n.01', 'synonyms': ['cupcake'], 'def': 'small cake baked in a muffin tin', 'name': 'cupcake'}, {'frequency': 'r', 'id': 350, 'synset': 'curler.n.01', 'synonyms': ['hair_curler', 'hair_roller', 'hair_crimper'], 'def': 'a cylindrical tube around which the hair is wound to curl it', 'name': 'hair_curler'}, {'frequency': 'r', 'id': 351, 'synset': 'curling_iron.n.01', 'synonyms': ['curling_iron'], 'def': 'a cylindrical home appliance that heats hair that has been curled around it', 'name': 'curling_iron'}, {'frequency': 'f', 'id': 352, 'synset': 'curtain.n.01', 'synonyms': ['curtain', 'drapery'], 'def': 'hanging cloth used as a blind (especially for a window)', 'name': 'curtain'}, {'frequency': 'f', 'id': 353, 'synset': 'cushion.n.03', 'synonyms': ['cushion'], 'def': 'a soft bag filled with air or padding such as feathers or foam rubber', 'name': 'cushion'}, {'frequency': 'r', 'id': 354, 'synset': 'custard.n.01', 'synonyms': ['custard'], 'def': 'sweetened mixture of milk and eggs baked or boiled or frozen', 'name': 'custard'}, {'frequency': 'c', 'id': 355, 'synset': 'cutter.n.06', 'synonyms': ['cutting_tool'], 'def': 'a cutting implement; a tool for cutting', 'name': 'cutting_tool'}, {'frequency': 'r', 'id': 356, 'synset': 'cylinder.n.04', 'synonyms': ['cylinder'], 'def': 'a cylindrical container', 'name': 'cylinder'}, {'frequency': 'r', 'id': 357, 'synset': 'cymbal.n.01', 'synonyms': ['cymbal'], 'def': 'a percussion instrument consisting of a concave brass disk', 'name': 'cymbal'}, {'frequency': 'r', 'id': 358, 'synset': 'dachshund.n.01', 'synonyms': ['dachshund', 'dachsie', 'badger_dog'], 'def': 'small long-bodied short-legged breed of dog having a short sleek coat and long drooping ears', 'name': 'dachshund'}, {'frequency': 'r', 'id': 359, 'synset': 'dagger.n.01', 'synonyms': ['dagger'], 'def': 'a short knife with a pointed blade used for piercing or stabbing', 'name': 'dagger'}, {'frequency': 'r', 'id': 360, 'synset': 'dartboard.n.01', 'synonyms': ['dartboard'], 'def': 'a circular board of wood or cork used as the target in the game of darts', 'name': 'dartboard'}, {'frequency': 'r', 'id': 361, 'synset': 'date.n.08', 'synonyms': ['date_(fruit)'], 'def': 'sweet edible fruit of the date palm with a single long woody seed', 'name': 'date_(fruit)'}, {'frequency': 'f', 'id': 362, 'synset': 'deck_chair.n.01', 'synonyms': ['deck_chair', 'beach_chair'], 'def': 'a folding chair for use outdoors; a wooden frame supports a length of canvas', 'name': 'deck_chair'}, {'frequency': 'c', 'id': 363, 'synset': 'deer.n.01', 'synonyms': ['deer', 'cervid'], 'def': "distinguished from Bovidae by the male's having solid deciduous antlers", 'name': 'deer'}, {'frequency': 'c', 'id': 364, 'synset': 'dental_floss.n.01', 'synonyms': ['dental_floss', 'floss'], 'def': 'a soft thread for cleaning the spaces between the teeth', 'name': 'dental_floss'}, {'frequency': 'f', 'id': 365, 'synset': 'desk.n.01', 'synonyms': ['desk'], 'def': 'a piece of furniture with a writing surface and usually drawers or other compartments', 'name': 'desk'}, {'frequency': 'r', 'id': 366, 'synset': 'detergent.n.01', 'synonyms': ['detergent'], 'def': 'a surface-active chemical widely used in industry and laundering', 'name': 'detergent'}, {'frequency': 'c', 'id': 367, 'synset': 'diaper.n.01', 'synonyms': ['diaper'], 'def': 'garment consisting of a folded cloth drawn up between the legs and fastened at the waist', 'name': 'diaper'}, {'frequency': 'r', 'id': 368, 'synset': 'diary.n.01', 'synonyms': ['diary', 'journal'], 'def': 'a daily written record of (usually personal) experiences and observations', 'name': 'diary'}, {'frequency': 'r', 'id': 369, 'synset': 'die.n.01', 'synonyms': ['die', 'dice'], 'def': 'a small cube with 1 to 6 spots on the six faces; used in gambling', 'name': 'die'}, {'frequency': 'r', 'id': 370, 'synset': 'dinghy.n.01', 'synonyms': ['dinghy', 'dory', 'rowboat'], 'def': 'a small boat of shallow draft with seats and oars with which it is propelled', 'name': 'dinghy'}, {'frequency': 'f', 'id': 371, 'synset': 'dining_table.n.01', 'synonyms': ['dining_table'], 'def': 'a table at which meals are served', 'name': 'dining_table'}, {'frequency': 'r', 'id': 372, 'synset': 'dinner_jacket.n.01', 'synonyms': ['tux', 'tuxedo'], 'def': 'semiformal evening dress for men', 'name': 'tux'}, {'frequency': 'c', 'id': 373, 'synset': 'dish.n.01', 'synonyms': ['dish'], 'def': 'a piece of dishware normally used as a container for holding or serving food', 'name': 'dish'}, {'frequency': 'c', 'id': 374, 'synset': 'dish.n.05', 'synonyms': ['dish_antenna'], 'def': 'directional antenna consisting of a parabolic reflector', 'name': 'dish_antenna'}, {'frequency': 'c', 'id': 375, 'synset': 'dishrag.n.01', 'synonyms': ['dishrag', 'dishcloth'], 'def': 'a cloth for washing dishes', 'name': 'dishrag'}, {'frequency': 'c', 'id': 376, 'synset': 'dishtowel.n.01', 'synonyms': ['dishtowel', 'tea_towel'], 'def': 'a towel for drying dishes', 'name': 'dishtowel'}, {'frequency': 'f', 'id': 377, 'synset': 'dishwasher.n.01', 'synonyms': ['dishwasher', 'dishwashing_machine'], 'def': 'a machine for washing dishes', 'name': 'dishwasher'}, {'frequency': 'r', 'id': 378, 'synset': 'dishwasher_detergent.n.01', 'synonyms': ['dishwasher_detergent', 'dishwashing_detergent', 'dishwashing_liquid'], 'def': 'a low-sudsing detergent designed for use in dishwashers', 'name': 'dishwasher_detergent'}, {'frequency': 'r', 'id': 379, 'synset': 'diskette.n.01', 'synonyms': ['diskette', 'floppy', 'floppy_disk'], 'def': 'a small plastic magnetic disk enclosed in a stiff envelope used to store data', 'name': 'diskette'}, {'frequency': 'c', 'id': 380, 'synset': 'dispenser.n.01', 'synonyms': ['dispenser'], 'def': 'a container so designed that the contents can be used in prescribed amounts', 'name': 'dispenser'}, {'frequency': 'c', 'id': 381, 'synset': 'dixie_cup.n.01', 'synonyms': ['Dixie_cup', 'paper_cup'], 'def': 'a disposable cup made of paper; for holding drinks', 'name': 'Dixie_cup'}, {'frequency': 'f', 'id': 382, 'synset': 'dog.n.01', 'synonyms': ['dog'], 'def': 'a common domesticated dog', 'name': 'dog'}, {'frequency': 'f', 'id': 383, 'synset': 'dog_collar.n.01', 'synonyms': ['dog_collar'], 'def': 'a collar for a dog', 'name': 'dog_collar'}, {'frequency': 'c', 'id': 384, 'synset': 'doll.n.01', 'synonyms': ['doll'], 'def': 'a toy replica of a HUMAN (NOT AN ANIMAL)', 'name': 'doll'}, {'frequency': 'r', 'id': 385, 'synset': 'dollar.n.02', 'synonyms': ['dollar', 'dollar_bill', 'one_dollar_bill'], 'def': 'a piece of paper money worth one dollar', 'name': 'dollar'}, {'frequency': 'r', 'id': 386, 'synset': 'dolphin.n.02', 'synonyms': ['dolphin'], 'def': 'any of various small toothed whales with a beaklike snout; larger than porpoises', 'name': 'dolphin'}, {'frequency': 'c', 'id': 387, 'synset': 'domestic_ass.n.01', 'synonyms': ['domestic_ass', 'donkey'], 'def': 'domestic beast of burden descended from the African wild ass; patient but stubborn', 'name': 'domestic_ass'}, {'frequency': 'r', 'id': 388, 'synset': 'domino.n.03', 'synonyms': ['eye_mask'], 'def': 'a mask covering the upper part of the face but with holes for the eyes', 'name': 'eye_mask'}, {'frequency': 'r', 'id': 389, 'synset': 'doorbell.n.01', 'synonyms': ['doorbell', 'buzzer'], 'def': 'a button at an outer door that gives a ringing or buzzing signal when pushed', 'name': 'doorbell'}, {'frequency': 'f', 'id': 390, 'synset': 'doorknob.n.01', 'synonyms': ['doorknob', 'doorhandle'], 'def': "a knob used to open a door (often called `doorhandle' in Great Britain)", 'name': 'doorknob'}, {'frequency': 'c', 'id': 391, 'synset': 'doormat.n.02', 'synonyms': ['doormat', 'welcome_mat'], 'def': 'a mat placed outside an exterior door for wiping the shoes before entering', 'name': 'doormat'}, {'frequency': 'f', 'id': 392, 'synset': 'doughnut.n.02', 'synonyms': ['doughnut', 'donut'], 'def': 'a small ring-shaped friedcake', 'name': 'doughnut'}, {'frequency': 'r', 'id': 393, 'synset': 'dove.n.01', 'synonyms': ['dove'], 'def': 'any of numerous small pigeons', 'name': 'dove'}, {'frequency': 'r', 'id': 394, 'synset': 'dragonfly.n.01', 'synonyms': ['dragonfly'], 'def': 'slender-bodied non-stinging insect having iridescent wings that are outspread at rest', 'name': 'dragonfly'}, {'frequency': 'f', 'id': 395, 'synset': 'drawer.n.01', 'synonyms': ['drawer'], 'def': 'a boxlike container in a piece of furniture; made so as to slide in and out', 'name': 'drawer'}, {'frequency': 'c', 'id': 396, 'synset': 'drawers.n.01', 'synonyms': ['underdrawers', 'boxers', 'boxershorts'], 'def': 'underpants worn by men', 'name': 'underdrawers'}, {'frequency': 'f', 'id': 397, 'synset': 'dress.n.01', 'synonyms': ['dress', 'frock'], 'def': 'a one-piece garment for a woman; has skirt and bodice', 'name': 'dress'}, {'frequency': 'c', 'id': 398, 'synset': 'dress_hat.n.01', 'synonyms': ['dress_hat', 'high_hat', 'opera_hat', 'silk_hat', 'top_hat'], 'def': "a man's hat with a tall crown; usually covered with silk or with beaver fur", 'name': 'dress_hat'}, {'frequency': 'c', 'id': 399, 'synset': 'dress_suit.n.01', 'synonyms': ['dress_suit'], 'def': 'formalwear consisting of full evening dress for men', 'name': 'dress_suit'}, {'frequency': 'c', 'id': 400, 'synset': 'dresser.n.05', 'synonyms': ['dresser'], 'def': 'a cabinet with shelves', 'name': 'dresser'}, {'frequency': 'c', 'id': 401, 'synset': 'drill.n.01', 'synonyms': ['drill'], 'def': 'a tool with a sharp rotating point for making holes in hard materials', 'name': 'drill'}, {'frequency': 'r', 'id': 402, 'synset': 'drinking_fountain.n.01', 'synonyms': ['drinking_fountain'], 'def': 'a public fountain to provide a jet of drinking water', 'name': 'drinking_fountain'}, {'frequency': 'r', 'id': 403, 'synset': 'drone.n.04', 'synonyms': ['drone'], 'def': 'an aircraft without a pilot that is operated by remote control', 'name': 'drone'}, {'frequency': 'r', 'id': 404, 'synset': 'dropper.n.01', 'synonyms': ['dropper', 'eye_dropper'], 'def': 'pipet consisting of a small tube with a vacuum bulb at one end for drawing liquid in and releasing it a drop at a time', 'name': 'dropper'}, {'frequency': 'c', 'id': 405, 'synset': 'drum.n.01', 'synonyms': ['drum_(musical_instrument)'], 'def': 'a musical percussion instrument; usually consists of a hollow cylinder with a membrane stretched across each end', 'name': 'drum_(musical_instrument)'}, {'frequency': 'r', 'id': 406, 'synset': 'drumstick.n.02', 'synonyms': ['drumstick'], 'def': 'a stick used for playing a drum', 'name': 'drumstick'}, {'frequency': 'f', 'id': 407, 'synset': 'duck.n.01', 'synonyms': ['duck'], 'def': 'small web-footed broad-billed swimming bird', 'name': 'duck'}, {'frequency': 'r', 'id': 408, 'synset': 'duckling.n.02', 'synonyms': ['duckling'], 'def': 'young duck', 'name': 'duckling'}, {'frequency': 'c', 'id': 409, 'synset': 'duct_tape.n.01', 'synonyms': ['duct_tape'], 'def': 'a wide silvery adhesive tape', 'name': 'duct_tape'}, {'frequency': 'f', 'id': 410, 'synset': 'duffel_bag.n.01', 'synonyms': ['duffel_bag', 'duffle_bag', 'duffel', 'duffle'], 'def': 'a large cylindrical bag of heavy cloth', 'name': 'duffel_bag'}, {'frequency': 'r', 'id': 411, 'synset': 'dumbbell.n.01', 'synonyms': ['dumbbell'], 'def': 'an exercising weight with two ball-like ends connected by a short handle', 'name': 'dumbbell'}, {'frequency': 'c', 'id': 412, 'synset': 'dumpster.n.01', 'synonyms': ['dumpster'], 'def': 'a container designed to receive and transport and dump waste', 'name': 'dumpster'}, {'frequency': 'r', 'id': 413, 'synset': 'dustpan.n.02', 'synonyms': ['dustpan'], 'def': 'a short-handled receptacle into which dust can be swept', 'name': 'dustpan'}, {'frequency': 'r', 'id': 414, 'synset': 'dutch_oven.n.02', 'synonyms': ['Dutch_oven'], 'def': 'iron or earthenware cooking pot; used for stews', 'name': 'Dutch_oven'}, {'frequency': 'c', 'id': 415, 'synset': 'eagle.n.01', 'synonyms': ['eagle'], 'def': 'large birds of prey noted for their broad wings and strong soaring flight', 'name': 'eagle'}, {'frequency': 'f', 'id': 416, 'synset': 'earphone.n.01', 'synonyms': ['earphone', 'earpiece', 'headphone'], 'def': 'device for listening to audio that is held over or inserted into the ear', 'name': 'earphone'}, {'frequency': 'r', 'id': 417, 'synset': 'earplug.n.01', 'synonyms': ['earplug'], 'def': 'a soft plug that is inserted into the ear canal to block sound', 'name': 'earplug'}, {'frequency': 'f', 'id': 418, 'synset': 'earring.n.01', 'synonyms': ['earring'], 'def': 'jewelry to ornament the ear', 'name': 'earring'}, {'frequency': 'c', 'id': 419, 'synset': 'easel.n.01', 'synonyms': ['easel'], 'def': "an upright tripod for displaying something (usually an artist's canvas)", 'name': 'easel'}, {'frequency': 'r', 'id': 420, 'synset': 'eclair.n.01', 'synonyms': ['eclair'], 'def': 'oblong cream puff', 'name': 'eclair'}, {'frequency': 'r', 'id': 421, 'synset': 'eel.n.01', 'synonyms': ['eel'], 'def': 'an elongate fish with fatty flesh', 'name': 'eel'}, {'frequency': 'f', 'id': 422, 'synset': 'egg.n.02', 'synonyms': ['egg', 'eggs'], 'def': 'oval reproductive body of a fowl (especially a hen) used as food', 'name': 'egg'}, {'frequency': 'r', 'id': 423, 'synset': 'egg_roll.n.01', 'synonyms': ['egg_roll', 'spring_roll'], 'def': 'minced vegetables and meat wrapped in a pancake and fried', 'name': 'egg_roll'}, {'frequency': 'c', 'id': 424, 'synset': 'egg_yolk.n.01', 'synonyms': ['egg_yolk', 'yolk_(egg)'], 'def': 'the yellow spherical part of an egg', 'name': 'egg_yolk'}, {'frequency': 'c', 'id': 425, 'synset': 'eggbeater.n.02', 'synonyms': ['eggbeater', 'eggwhisk'], 'def': 'a mixer for beating eggs or whipping cream', 'name': 'eggbeater'}, {'frequency': 'c', 'id': 426, 'synset': 'eggplant.n.01', 'synonyms': ['eggplant', 'aubergine'], 'def': 'egg-shaped vegetable having a shiny skin typically dark purple', 'name': 'eggplant'}, {'frequency': 'r', 'id': 427, 'synset': 'electric_chair.n.01', 'synonyms': ['electric_chair'], 'def': 'a chair-shaped instrument of execution by electrocution', 'name': 'electric_chair'}, {'frequency': 'f', 'id': 428, 'synset': 'electric_refrigerator.n.01', 'synonyms': ['refrigerator'], 'def': 'a refrigerator in which the coolant is pumped around by an electric motor', 'name': 'refrigerator'}, {'frequency': 'f', 'id': 429, 'synset': 'elephant.n.01', 'synonyms': ['elephant'], 'def': 'a common elephant', 'name': 'elephant'}, {'frequency': 'r', 'id': 430, 'synset': 'elk.n.01', 'synonyms': ['elk', 'moose'], 'def': 'large northern deer with enormous flattened antlers in the male', 'name': 'elk'}, {'frequency': 'c', 'id': 431, 'synset': 'envelope.n.01', 'synonyms': ['envelope'], 'def': 'a flat (usually rectangular) container for a letter, thin package, etc.', 'name': 'envelope'}, {'frequency': 'c', 'id': 432, 'synset': 'eraser.n.01', 'synonyms': ['eraser'], 'def': 'an implement used to erase something', 'name': 'eraser'}, {'frequency': 'r', 'id': 433, 'synset': 'escargot.n.01', 'synonyms': ['escargot'], 'def': 'edible snail usually served in the shell with a sauce of melted butter and garlic', 'name': 'escargot'}, {'frequency': 'r', 'id': 434, 'synset': 'eyepatch.n.01', 'synonyms': ['eyepatch'], 'def': 'a protective cloth covering for an injured eye', 'name': 'eyepatch'}, {'frequency': 'r', 'id': 435, 'synset': 'falcon.n.01', 'synonyms': ['falcon'], 'def': 'birds of prey having long pointed powerful wings adapted for swift flight', 'name': 'falcon'}, {'frequency': 'f', 'id': 436, 'synset': 'fan.n.01', 'synonyms': ['fan'], 'def': 'a device for creating a current of air by movement of a surface or surfaces', 'name': 'fan'}, {'frequency': 'f', 'id': 437, 'synset': 'faucet.n.01', 'synonyms': ['faucet', 'spigot', 'tap'], 'def': 'a regulator for controlling the flow of a liquid from a reservoir', 'name': 'faucet'}, {'frequency': 'r', 'id': 438, 'synset': 'fedora.n.01', 'synonyms': ['fedora'], 'def': 'a hat made of felt with a creased crown', 'name': 'fedora'}, {'frequency': 'r', 'id': 439, 'synset': 'ferret.n.02', 'synonyms': ['ferret'], 'def': 'domesticated albino variety of the European polecat bred for hunting rats and rabbits', 'name': 'ferret'}, {'frequency': 'c', 'id': 440, 'synset': 'ferris_wheel.n.01', 'synonyms': ['Ferris_wheel'], 'def': 'a large wheel with suspended seats that remain upright as the wheel rotates', 'name': 'Ferris_wheel'}, {'frequency': 'r', 'id': 441, 'synset': 'ferry.n.01', 'synonyms': ['ferry', 'ferryboat'], 'def': 'a boat that transports people or vehicles across a body of water and operates on a regular schedule', 'name': 'ferry'}, {'frequency': 'r', 'id': 442, 'synset': 'fig.n.04', 'synonyms': ['fig_(fruit)'], 'def': 'fleshy sweet pear-shaped yellowish or purple fruit eaten fresh or preserved or dried', 'name': 'fig_(fruit)'}, {'frequency': 'c', 'id': 443, 'synset': 'fighter.n.02', 'synonyms': ['fighter_jet', 'fighter_aircraft', 'attack_aircraft'], 'def': 'a high-speed military or naval airplane designed to destroy enemy targets', 'name': 'fighter_jet'}, {'frequency': 'f', 'id': 444, 'synset': 'figurine.n.01', 'synonyms': ['figurine'], 'def': 'a small carved or molded figure', 'name': 'figurine'}, {'frequency': 'c', 'id': 445, 'synset': 'file.n.03', 'synonyms': ['file_cabinet', 'filing_cabinet'], 'def': 'office furniture consisting of a container for keeping papers in order', 'name': 'file_cabinet'}, {'frequency': 'r', 'id': 446, 'synset': 'file.n.04', 'synonyms': ['file_(tool)'], 'def': 'a steel hand tool with small sharp teeth on some or all of its surfaces; used for smoothing wood or metal', 'name': 'file_(tool)'}, {'frequency': 'f', 'id': 447, 'synset': 'fire_alarm.n.02', 'synonyms': ['fire_alarm', 'smoke_alarm'], 'def': 'an alarm that is tripped off by fire or smoke', 'name': 'fire_alarm'}, {'frequency': 'c', 'id': 448, 'synset': 'fire_engine.n.01', 'synonyms': ['fire_engine', 'fire_truck'], 'def': 'large trucks that carry firefighters and equipment to the site of a fire', 'name': 'fire_engine'}, {'frequency': 'c', 'id': 449, 'synset': 'fire_extinguisher.n.01', 'synonyms': ['fire_extinguisher', 'extinguisher'], 'def': 'a manually operated device for extinguishing small fires', 'name': 'fire_extinguisher'}, {'frequency': 'c', 'id': 450, 'synset': 'fire_hose.n.01', 'synonyms': ['fire_hose'], 'def': 'a large hose that carries water from a fire hydrant to the site of the fire', 'name': 'fire_hose'}, {'frequency': 'f', 'id': 451, 'synset': 'fireplace.n.01', 'synonyms': ['fireplace'], 'def': 'an open recess in a wall at the base of a chimney where a fire can be built', 'name': 'fireplace'}, {'frequency': 'f', 'id': 452, 'synset': 'fireplug.n.01', 'synonyms': ['fireplug', 'fire_hydrant', 'hydrant'], 'def': 'an upright hydrant for drawing water to use in fighting a fire', 'name': 'fireplug'}, {'frequency': 'c', 'id': 453, 'synset': 'fish.n.01', 'synonyms': ['fish'], 'def': 'any of various mostly cold-blooded aquatic vertebrates usually having scales and breathing through gills', 'name': 'fish'}, {'frequency': 'r', 'id': 454, 'synset': 'fish.n.02', 'synonyms': ['fish_(food)'], 'def': 'the flesh of fish used as food', 'name': 'fish_(food)'}, {'frequency': 'r', 'id': 455, 'synset': 'fishbowl.n.02', 'synonyms': ['fishbowl', 'goldfish_bowl'], 'def': 'a transparent bowl in which small fish are kept', 'name': 'fishbowl'}, {'frequency': 'r', 'id': 456, 'synset': 'fishing_boat.n.01', 'synonyms': ['fishing_boat', 'fishing_vessel'], 'def': 'a vessel for fishing', 'name': 'fishing_boat'}, {'frequency': 'c', 'id': 457, 'synset': 'fishing_rod.n.01', 'synonyms': ['fishing_rod', 'fishing_pole'], 'def': 'a rod that is used in fishing to extend the fishing line', 'name': 'fishing_rod'}, {'frequency': 'f', 'id': 458, 'synset': 'flag.n.01', 'synonyms': ['flag'], 'def': 'emblem usually consisting of a rectangular piece of cloth of distinctive design (do not include pole)', 'name': 'flag'}, {'frequency': 'f', 'id': 459, 'synset': 'flagpole.n.02', 'synonyms': ['flagpole', 'flagstaff'], 'def': 'a tall staff or pole on which a flag is raised', 'name': 'flagpole'}, {'frequency': 'c', 'id': 460, 'synset': 'flamingo.n.01', 'synonyms': ['flamingo'], 'def': 'large pink web-footed bird with down-bent bill', 'name': 'flamingo'}, {'frequency': 'c', 'id': 461, 'synset': 'flannel.n.01', 'synonyms': ['flannel'], 'def': 'a soft light woolen fabric; used for clothing', 'name': 'flannel'}, {'frequency': 'r', 'id': 462, 'synset': 'flash.n.10', 'synonyms': ['flash', 'flashbulb'], 'def': 'a lamp for providing momentary light to take a photograph', 'name': 'flash'}, {'frequency': 'c', 'id': 463, 'synset': 'flashlight.n.01', 'synonyms': ['flashlight', 'torch'], 'def': 'a small portable battery-powered electric lamp', 'name': 'flashlight'}, {'frequency': 'r', 'id': 464, 'synset': 'fleece.n.03', 'synonyms': ['fleece'], 'def': 'a soft bulky fabric with deep pile; used chiefly for clothing', 'name': 'fleece'}, {'frequency': 'f', 'id': 465, 'synset': 'flip-flop.n.02', 'synonyms': ['flip-flop_(sandal)'], 'def': 'a backless sandal held to the foot by a thong between two toes', 'name': 'flip-flop_(sandal)'}, {'frequency': 'c', 'id': 466, 'synset': 'flipper.n.01', 'synonyms': ['flipper_(footwear)', 'fin_(footwear)'], 'def': 'a shoe to aid a person in swimming', 'name': 'flipper_(footwear)'}, {'frequency': 'f', 'id': 467, 'synset': 'flower_arrangement.n.01', 'synonyms': ['flower_arrangement', 'floral_arrangement'], 'def': 'a decorative arrangement of flowers', 'name': 'flower_arrangement'}, {'frequency': 'c', 'id': 468, 'synset': 'flute.n.02', 'synonyms': ['flute_glass', 'champagne_flute'], 'def': 'a tall narrow wineglass', 'name': 'flute_glass'}, {'frequency': 'r', 'id': 469, 'synset': 'foal.n.01', 'synonyms': ['foal'], 'def': 'a young horse', 'name': 'foal'}, {'frequency': 'c', 'id': 470, 'synset': 'folding_chair.n.01', 'synonyms': ['folding_chair'], 'def': 'a chair that can be folded flat for storage', 'name': 'folding_chair'}, {'frequency': 'c', 'id': 471, 'synset': 'food_processor.n.01', 'synonyms': ['food_processor'], 'def': 'a kitchen appliance for shredding, blending, chopping, or slicing food', 'name': 'food_processor'}, {'frequency': 'c', 'id': 472, 'synset': 'football.n.02', 'synonyms': ['football_(American)'], 'def': 'the inflated oblong ball used in playing American football', 'name': 'football_(American)'}, {'frequency': 'r', 'id': 473, 'synset': 'football_helmet.n.01', 'synonyms': ['football_helmet'], 'def': 'a padded helmet with a face mask to protect the head of football players', 'name': 'football_helmet'}, {'frequency': 'c', 'id': 474, 'synset': 'footstool.n.01', 'synonyms': ['footstool', 'footrest'], 'def': 'a low seat or a stool to rest the feet of a seated person', 'name': 'footstool'}, {'frequency': 'f', 'id': 475, 'synset': 'fork.n.01', 'synonyms': ['fork'], 'def': 'cutlery used for serving and eating food', 'name': 'fork'}, {'frequency': 'r', 'id': 476, 'synset': 'forklift.n.01', 'synonyms': ['forklift'], 'def': 'an industrial vehicle with a power operated fork in front that can be inserted under loads to lift and move them', 'name': 'forklift'}, {'frequency': 'r', 'id': 477, 'synset': 'freight_car.n.01', 'synonyms': ['freight_car'], 'def': 'a railway car that carries freight', 'name': 'freight_car'}, {'frequency': 'r', 'id': 478, 'synset': 'french_toast.n.01', 'synonyms': ['French_toast'], 'def': 'bread slice dipped in egg and milk and fried', 'name': 'French_toast'}, {'frequency': 'c', 'id': 479, 'synset': 'freshener.n.01', 'synonyms': ['freshener', 'air_freshener'], 'def': 'anything that freshens', 'name': 'freshener'}, {'frequency': 'f', 'id': 480, 'synset': 'frisbee.n.01', 'synonyms': ['frisbee'], 'def': 'a light, plastic disk propelled with a flip of the wrist for recreation or competition', 'name': 'frisbee'}, {'frequency': 'c', 'id': 481, 'synset': 'frog.n.01', 'synonyms': ['frog', 'toad', 'toad_frog'], 'def': 'a tailless stout-bodied amphibians with long hind limbs for leaping', 'name': 'frog'}, {'frequency': 'c', 'id': 482, 'synset': 'fruit_juice.n.01', 'synonyms': ['fruit_juice'], 'def': 'drink produced by squeezing or crushing fruit', 'name': 'fruit_juice'}, {'frequency': 'r', 'id': 483, 'synset': 'fruit_salad.n.01', 'synonyms': ['fruit_salad'], 'def': 'salad composed of fruits', 'name': 'fruit_salad'}, {'frequency': 'c', 'id': 484, 'synset': 'frying_pan.n.01', 'synonyms': ['frying_pan', 'frypan', 'skillet'], 'def': 'a pan used for frying foods', 'name': 'frying_pan'}, {'frequency': 'r', 'id': 485, 'synset': 'fudge.n.01', 'synonyms': ['fudge'], 'def': 'soft creamy candy', 'name': 'fudge'}, {'frequency': 'r', 'id': 486, 'synset': 'funnel.n.02', 'synonyms': ['funnel'], 'def': 'a cone-shaped utensil used to channel a substance into a container with a small mouth', 'name': 'funnel'}, {'frequency': 'c', 'id': 487, 'synset': 'futon.n.01', 'synonyms': ['futon'], 'def': 'a pad that is used for sleeping on the floor or on a raised frame', 'name': 'futon'}, {'frequency': 'r', 'id': 488, 'synset': 'gag.n.02', 'synonyms': ['gag', 'muzzle'], 'def': "restraint put into a person's mouth to prevent speaking or shouting", 'name': 'gag'}, {'frequency': 'r', 'id': 489, 'synset': 'garbage.n.03', 'synonyms': ['garbage'], 'def': 'a receptacle where waste can be discarded', 'name': 'garbage'}, {'frequency': 'c', 'id': 490, 'synset': 'garbage_truck.n.01', 'synonyms': ['garbage_truck'], 'def': 'a truck for collecting domestic refuse', 'name': 'garbage_truck'}, {'frequency': 'c', 'id': 491, 'synset': 'garden_hose.n.01', 'synonyms': ['garden_hose'], 'def': 'a hose used for watering a lawn or garden', 'name': 'garden_hose'}, {'frequency': 'c', 'id': 492, 'synset': 'gargle.n.01', 'synonyms': ['gargle', 'mouthwash'], 'def': 'a medicated solution used for gargling and rinsing the mouth', 'name': 'gargle'}, {'frequency': 'r', 'id': 493, 'synset': 'gargoyle.n.02', 'synonyms': ['gargoyle'], 'def': 'an ornament consisting of a grotesquely carved figure of a person or animal', 'name': 'gargoyle'}, {'frequency': 'c', 'id': 494, 'synset': 'garlic.n.02', 'synonyms': ['garlic', 'ail'], 'def': 'aromatic bulb used as seasoning', 'name': 'garlic'}, {'frequency': 'r', 'id': 495, 'synset': 'gasmask.n.01', 'synonyms': ['gasmask', 'respirator', 'gas_helmet'], 'def': 'a protective face mask with a filter', 'name': 'gasmask'}, {'frequency': 'r', 'id': 496, 'synset': 'gazelle.n.01', 'synonyms': ['gazelle'], 'def': 'small swift graceful antelope of Africa and Asia having lustrous eyes', 'name': 'gazelle'}, {'frequency': 'c', 'id': 497, 'synset': 'gelatin.n.02', 'synonyms': ['gelatin', 'jelly'], 'def': 'an edible jelly made with gelatin and used as a dessert or salad base or a coating for foods', 'name': 'gelatin'}, {'frequency': 'r', 'id': 498, 'synset': 'gem.n.02', 'synonyms': ['gemstone'], 'def': 'a crystalline rock that can be cut and polished for jewelry', 'name': 'gemstone'}, {'frequency': 'c', 'id': 499, 'synset': 'giant_panda.n.01', 'synonyms': ['giant_panda', 'panda', 'panda_bear'], 'def': 'large black-and-white herbivorous mammal of bamboo forests of China and Tibet', 'name': 'giant_panda'}, {'frequency': 'c', 'id': 500, 'synset': 'gift_wrap.n.01', 'synonyms': ['gift_wrap'], 'def': 'attractive wrapping paper suitable for wrapping gifts', 'name': 'gift_wrap'}, {'frequency': 'c', 'id': 501, 'synset': 'ginger.n.03', 'synonyms': ['ginger', 'gingerroot'], 'def': 'the root of the common ginger plant; used fresh as a seasoning', 'name': 'ginger'}, {'frequency': 'f', 'id': 502, 'synset': 'giraffe.n.01', 'synonyms': ['giraffe'], 'def': 'tall animal having a spotted coat and small horns and very long neck and legs', 'name': 'giraffe'}, {'frequency': 'c', 'id': 503, 'synset': 'girdle.n.02', 'synonyms': ['cincture', 'sash', 'waistband', 'waistcloth'], 'def': 'a band of material around the waist that strengthens a skirt or trousers', 'name': 'cincture'}, {'frequency': 'f', 'id': 504, 'synset': 'glass.n.02', 'synonyms': ['glass_(drink_container)', 'drinking_glass'], 'def': 'a container for holding liquids while drinking', 'name': 'glass_(drink_container)'}, {'frequency': 'c', 'id': 505, 'synset': 'globe.n.03', 'synonyms': ['globe'], 'def': 'a sphere on which a map (especially of the earth) is represented', 'name': 'globe'}, {'frequency': 'f', 'id': 506, 'synset': 'glove.n.02', 'synonyms': ['glove'], 'def': 'handwear covering the hand', 'name': 'glove'}, {'frequency': 'c', 'id': 507, 'synset': 'goat.n.01', 'synonyms': ['goat'], 'def': 'a common goat', 'name': 'goat'}, {'frequency': 'f', 'id': 508, 'synset': 'goggles.n.01', 'synonyms': ['goggles'], 'def': 'tight-fitting spectacles worn to protect the eyes', 'name': 'goggles'}, {'frequency': 'r', 'id': 509, 'synset': 'goldfish.n.01', 'synonyms': ['goldfish'], 'def': 'small golden or orange-red freshwater fishes used as pond or aquarium pets', 'name': 'goldfish'}, {'frequency': 'r', 'id': 510, 'synset': 'golf_club.n.02', 'synonyms': ['golf_club', 'golf-club'], 'def': 'golf equipment used by a golfer to hit a golf ball', 'name': 'golf_club'}, {'frequency': 'c', 'id': 511, 'synset': 'golfcart.n.01', 'synonyms': ['golfcart'], 'def': 'a small motor vehicle in which golfers can ride between shots', 'name': 'golfcart'}, {'frequency': 'r', 'id': 512, 'synset': 'gondola.n.02', 'synonyms': ['gondola_(boat)'], 'def': 'long narrow flat-bottomed boat propelled by sculling; traditionally used on canals of Venice', 'name': 'gondola_(boat)'}, {'frequency': 'c', 'id': 513, 'synset': 'goose.n.01', 'synonyms': ['goose'], 'def': 'loud, web-footed long-necked aquatic birds usually larger than ducks', 'name': 'goose'}, {'frequency': 'r', 'id': 514, 'synset': 'gorilla.n.01', 'synonyms': ['gorilla'], 'def': 'largest ape', 'name': 'gorilla'}, {'frequency': 'r', 'id': 515, 'synset': 'gourd.n.02', 'synonyms': ['gourd'], 'def': 'any of numerous inedible fruits with hard rinds', 'name': 'gourd'}, {'frequency': 'r', 'id': 516, 'synset': 'gown.n.04', 'synonyms': ['surgical_gown', 'scrubs_(surgical_clothing)'], 'def': 'protective garment worn by surgeons during operations', 'name': 'surgical_gown'}, {'frequency': 'f', 'id': 517, 'synset': 'grape.n.01', 'synonyms': ['grape'], 'def': 'any of various juicy fruit with green or purple skins; grow in clusters', 'name': 'grape'}, {'frequency': 'r', 'id': 518, 'synset': 'grasshopper.n.01', 'synonyms': ['grasshopper'], 'def': 'plant-eating insect with hind legs adapted for leaping', 'name': 'grasshopper'}, {'frequency': 'c', 'id': 519, 'synset': 'grater.n.01', 'synonyms': ['grater'], 'def': 'utensil with sharp perforations for shredding foods (as vegetables or cheese)', 'name': 'grater'}, {'frequency': 'c', 'id': 520, 'synset': 'gravestone.n.01', 'synonyms': ['gravestone', 'headstone', 'tombstone'], 'def': 'a stone that is used to mark a grave', 'name': 'gravestone'}, {'frequency': 'r', 'id': 521, 'synset': 'gravy_boat.n.01', 'synonyms': ['gravy_boat', 'gravy_holder'], 'def': 'a dish (often boat-shaped) for serving gravy or sauce', 'name': 'gravy_boat'}, {'frequency': 'c', 'id': 522, 'synset': 'green_bean.n.02', 'synonyms': ['green_bean'], 'def': 'a common bean plant cultivated for its slender green edible pods', 'name': 'green_bean'}, {'frequency': 'c', 'id': 523, 'synset': 'green_onion.n.01', 'synonyms': ['green_onion', 'spring_onion', 'scallion'], 'def': 'a young onion before the bulb has enlarged', 'name': 'green_onion'}, {'frequency': 'r', 'id': 524, 'synset': 'griddle.n.01', 'synonyms': ['griddle'], 'def': 'cooking utensil consisting of a flat heated surface on which food is cooked', 'name': 'griddle'}, {'frequency': 'r', 'id': 525, 'synset': 'grillroom.n.01', 'synonyms': ['grillroom', 'grill_(restaurant)'], 'def': 'a restaurant where food is cooked on a grill', 'name': 'grillroom'}, {'frequency': 'r', 'id': 526, 'synset': 'grinder.n.04', 'synonyms': ['grinder_(tool)'], 'def': 'a machine tool that polishes metal', 'name': 'grinder_(tool)'}, {'frequency': 'r', 'id': 527, 'synset': 'grits.n.01', 'synonyms': ['grits', 'hominy_grits'], 'def': 'coarsely ground corn boiled as a breakfast dish', 'name': 'grits'}, {'frequency': 'c', 'id': 528, 'synset': 'grizzly.n.01', 'synonyms': ['grizzly', 'grizzly_bear'], 'def': 'powerful brownish-yellow bear of the uplands of western North America', 'name': 'grizzly'}, {'frequency': 'c', 'id': 529, 'synset': 'grocery_bag.n.01', 'synonyms': ['grocery_bag'], 'def': "a sack for holding customer's groceries", 'name': 'grocery_bag'}, {'frequency': 'r', 'id': 530, 'synset': 'guacamole.n.01', 'synonyms': ['guacamole'], 'def': 'a dip made of mashed avocado mixed with chopped onions and other seasonings', 'name': 'guacamole'}, {'frequency': 'f', 'id': 531, 'synset': 'guitar.n.01', 'synonyms': ['guitar'], 'def': 'a stringed instrument usually having six strings; played by strumming or plucking', 'name': 'guitar'}, {'frequency': 'c', 'id': 532, 'synset': 'gull.n.02', 'synonyms': ['gull', 'seagull'], 'def': 'mostly white aquatic bird having long pointed wings and short legs', 'name': 'gull'}, {'frequency': 'c', 'id': 533, 'synset': 'gun.n.01', 'synonyms': ['gun'], 'def': 'a weapon that discharges a bullet at high velocity from a metal tube', 'name': 'gun'}, {'frequency': 'r', 'id': 534, 'synset': 'hair_spray.n.01', 'synonyms': ['hair_spray'], 'def': 'substance sprayed on the hair to hold it in place', 'name': 'hair_spray'}, {'frequency': 'c', 'id': 535, 'synset': 'hairbrush.n.01', 'synonyms': ['hairbrush'], 'def': "a brush used to groom a person's hair", 'name': 'hairbrush'}, {'frequency': 'c', 'id': 536, 'synset': 'hairnet.n.01', 'synonyms': ['hairnet'], 'def': 'a small net that someone wears over their hair to keep it in place', 'name': 'hairnet'}, {'frequency': 'c', 'id': 537, 'synset': 'hairpin.n.01', 'synonyms': ['hairpin'], 'def': "a double pronged pin used to hold women's hair in place", 'name': 'hairpin'}, {'frequency': 'f', 'id': 538, 'synset': 'ham.n.01', 'synonyms': ['ham', 'jambon', 'gammon'], 'def': 'meat cut from the thigh of a hog (usually smoked)', 'name': 'ham'}, {'frequency': 'c', 'id': 539, 'synset': 'hamburger.n.01', 'synonyms': ['hamburger', 'beefburger', 'burger'], 'def': 'a sandwich consisting of a patty of minced beef served on a bun', 'name': 'hamburger'}, {'frequency': 'c', 'id': 540, 'synset': 'hammer.n.02', 'synonyms': ['hammer'], 'def': 'a hand tool with a heavy head and a handle; used to deliver an impulsive force by striking', 'name': 'hammer'}, {'frequency': 'r', 'id': 541, 'synset': 'hammock.n.02', 'synonyms': ['hammock'], 'def': 'a hanging bed of canvas or rope netting (usually suspended between two trees)', 'name': 'hammock'}, {'frequency': 'r', 'id': 542, 'synset': 'hamper.n.02', 'synonyms': ['hamper'], 'def': 'a basket usually with a cover', 'name': 'hamper'}, {'frequency': 'r', 'id': 543, 'synset': 'hamster.n.01', 'synonyms': ['hamster'], 'def': 'short-tailed burrowing rodent with large cheek pouches', 'name': 'hamster'}, {'frequency': 'c', 'id': 544, 'synset': 'hand_blower.n.01', 'synonyms': ['hair_dryer'], 'def': 'a hand-held electric blower that can blow warm air onto the hair', 'name': 'hair_dryer'}, {'frequency': 'r', 'id': 545, 'synset': 'hand_glass.n.01', 'synonyms': ['hand_glass', 'hand_mirror'], 'def': 'a mirror intended to be held in the hand', 'name': 'hand_glass'}, {'frequency': 'f', 'id': 546, 'synset': 'hand_towel.n.01', 'synonyms': ['hand_towel', 'face_towel'], 'def': 'a small towel used to dry the hands or face', 'name': 'hand_towel'}, {'frequency': 'c', 'id': 547, 'synset': 'handcart.n.01', 'synonyms': ['handcart', 'pushcart', 'hand_truck'], 'def': 'wheeled vehicle that can be pushed by a person', 'name': 'handcart'}, {'frequency': 'r', 'id': 548, 'synset': 'handcuff.n.01', 'synonyms': ['handcuff'], 'def': 'shackle that consists of a metal loop that can be locked around the wrist', 'name': 'handcuff'}, {'frequency': 'c', 'id': 549, 'synset': 'handkerchief.n.01', 'synonyms': ['handkerchief'], 'def': 'a square piece of cloth used for wiping the eyes or nose or as a costume accessory', 'name': 'handkerchief'}, {'frequency': 'f', 'id': 550, 'synset': 'handle.n.01', 'synonyms': ['handle', 'grip', 'handgrip'], 'def': 'the appendage to an object that is designed to be held in order to use or move it', 'name': 'handle'}, {'frequency': 'r', 'id': 551, 'synset': 'handsaw.n.01', 'synonyms': ['handsaw', "carpenter's_saw"], 'def': 'a saw used with one hand for cutting wood', 'name': 'handsaw'}, {'frequency': 'r', 'id': 552, 'synset': 'hardback.n.01', 'synonyms': ['hardback_book', 'hardcover_book'], 'def': 'a book with cardboard or cloth or leather covers', 'name': 'hardback_book'}, {'frequency': 'r', 'id': 553, 'synset': 'harmonium.n.01', 'synonyms': ['harmonium', 'organ_(musical_instrument)', 'reed_organ_(musical_instrument)'], 'def': 'a free-reed instrument in which air is forced through the reeds by bellows', 'name': 'harmonium'}, {'frequency': 'f', 'id': 554, 'synset': 'hat.n.01', 'synonyms': ['hat'], 'def': 'headwear that protects the head from bad weather, sun, or worn for fashion', 'name': 'hat'}, {'frequency': 'r', 'id': 555, 'synset': 'hatbox.n.01', 'synonyms': ['hatbox'], 'def': 'a round piece of luggage for carrying hats', 'name': 'hatbox'}, {'frequency': 'r', 'id': 556, 'synset': 'hatch.n.03', 'synonyms': ['hatch'], 'def': 'a movable barrier covering a hatchway', 'name': 'hatch'}, {'frequency': 'c', 'id': 557, 'synset': 'head_covering.n.01', 'synonyms': ['veil'], 'def': 'a garment that covers the head and face', 'name': 'veil'}, {'frequency': 'f', 'id': 558, 'synset': 'headband.n.01', 'synonyms': ['headband'], 'def': 'a band worn around or over the head', 'name': 'headband'}, {'frequency': 'f', 'id': 559, 'synset': 'headboard.n.01', 'synonyms': ['headboard'], 'def': 'a vertical board or panel forming the head of a bedstead', 'name': 'headboard'}, {'frequency': 'f', 'id': 560, 'synset': 'headlight.n.01', 'synonyms': ['headlight', 'headlamp'], 'def': 'a powerful light with reflector; attached to the front of an automobile or locomotive', 'name': 'headlight'}, {'frequency': 'c', 'id': 561, 'synset': 'headscarf.n.01', 'synonyms': ['headscarf'], 'def': 'a kerchief worn over the head and tied under the chin', 'name': 'headscarf'}, {'frequency': 'r', 'id': 562, 'synset': 'headset.n.01', 'synonyms': ['headset'], 'def': 'receiver consisting of a pair of headphones', 'name': 'headset'}, {'frequency': 'c', 'id': 563, 'synset': 'headstall.n.01', 'synonyms': ['headstall_(for_horses)', 'headpiece_(for_horses)'], 'def': "the band that is the part of a bridle that fits around a horse's head", 'name': 'headstall_(for_horses)'}, {'frequency': 'r', 'id': 564, 'synset': 'hearing_aid.n.02', 'synonyms': ['hearing_aid'], 'def': 'an acoustic device used to direct sound to the ear of a hearing-impaired person', 'name': 'hearing_aid'}, {'frequency': 'c', 'id': 565, 'synset': 'heart.n.02', 'synonyms': ['heart'], 'def': 'a muscular organ; its contractions move the blood through the body', 'name': 'heart'}, {'frequency': 'c', 'id': 566, 'synset': 'heater.n.01', 'synonyms': ['heater', 'warmer'], 'def': 'device that heats water or supplies warmth to a room', 'name': 'heater'}, {'frequency': 'c', 'id': 567, 'synset': 'helicopter.n.01', 'synonyms': ['helicopter'], 'def': 'an aircraft without wings that obtains its lift from the rotation of overhead blades', 'name': 'helicopter'}, {'frequency': 'f', 'id': 568, 'synset': 'helmet.n.02', 'synonyms': ['helmet'], 'def': 'a protective headgear made of hard material to resist blows', 'name': 'helmet'}, {'frequency': 'r', 'id': 569, 'synset': 'heron.n.02', 'synonyms': ['heron'], 'def': 'grey or white wading bird with long neck and long legs and (usually) long bill', 'name': 'heron'}, {'frequency': 'c', 'id': 570, 'synset': 'highchair.n.01', 'synonyms': ['highchair', 'feeding_chair'], 'def': 'a chair for feeding a very young child', 'name': 'highchair'}, {'frequency': 'f', 'id': 571, 'synset': 'hinge.n.01', 'synonyms': ['hinge'], 'def': 'a joint that holds two parts together so that one can swing relative to the other', 'name': 'hinge'}, {'frequency': 'r', 'id': 572, 'synset': 'hippopotamus.n.01', 'synonyms': ['hippopotamus'], 'def': 'massive thick-skinned animal living in or around rivers of tropical Africa', 'name': 'hippopotamus'}, {'frequency': 'r', 'id': 573, 'synset': 'hockey_stick.n.01', 'synonyms': ['hockey_stick'], 'def': 'sports implement consisting of a stick used by hockey players to move the puck', 'name': 'hockey_stick'}, {'frequency': 'c', 'id': 574, 'synset': 'hog.n.03', 'synonyms': ['hog', 'pig'], 'def': 'domestic swine', 'name': 'hog'}, {'frequency': 'f', 'id': 575, 'synset': 'home_plate.n.01', 'synonyms': ['home_plate_(baseball)', 'home_base_(baseball)'], 'def': '(baseball) a rubber slab where the batter stands; it must be touched by a base runner in order to score', 'name': 'home_plate_(baseball)'}, {'frequency': 'c', 'id': 576, 'synset': 'honey.n.01', 'synonyms': ['honey'], 'def': 'a sweet yellow liquid produced by bees', 'name': 'honey'}, {'frequency': 'f', 'id': 577, 'synset': 'hood.n.06', 'synonyms': ['fume_hood', 'exhaust_hood'], 'def': 'metal covering leading to a vent that exhausts smoke or fumes', 'name': 'fume_hood'}, {'frequency': 'f', 'id': 578, 'synset': 'hook.n.05', 'synonyms': ['hook'], 'def': 'a curved or bent implement for suspending or pulling something', 'name': 'hook'}, {'frequency': 'f', 'id': 579, 'synset': 'horse.n.01', 'synonyms': ['horse'], 'def': 'a common horse', 'name': 'horse'}, {'frequency': 'f', 'id': 580, 'synset': 'hose.n.03', 'synonyms': ['hose', 'hosepipe'], 'def': 'a flexible pipe for conveying a liquid or gas', 'name': 'hose'}, {'frequency': 'r', 'id': 581, 'synset': 'hot-air_balloon.n.01', 'synonyms': ['hot-air_balloon'], 'def': 'balloon for travel through the air in a basket suspended below a large bag of heated air', 'name': 'hot-air_balloon'}, {'frequency': 'r', 'id': 582, 'synset': 'hot_plate.n.01', 'synonyms': ['hotplate'], 'def': 'a portable electric appliance for heating or cooking or keeping food warm', 'name': 'hotplate'}, {'frequency': 'c', 'id': 583, 'synset': 'hot_sauce.n.01', 'synonyms': ['hot_sauce'], 'def': 'a pungent peppery sauce', 'name': 'hot_sauce'}, {'frequency': 'r', 'id': 584, 'synset': 'hourglass.n.01', 'synonyms': ['hourglass'], 'def': 'a sandglass timer that runs for sixty minutes', 'name': 'hourglass'}, {'frequency': 'r', 'id': 585, 'synset': 'houseboat.n.01', 'synonyms': ['houseboat'], 'def': 'a barge that is designed and equipped for use as a dwelling', 'name': 'houseboat'}, {'frequency': 'r', 'id': 586, 'synset': 'hummingbird.n.01', 'synonyms': ['hummingbird'], 'def': 'tiny American bird having brilliant iridescent plumage and long slender bills', 'name': 'hummingbird'}, {'frequency': 'r', 'id': 587, 'synset': 'hummus.n.01', 'synonyms': ['hummus', 'humus', 'hommos', 'hoummos', 'humous'], 'def': 'a thick spread made from mashed chickpeas', 'name': 'hummus'}, {'frequency': 'c', 'id': 588, 'synset': 'ice_bear.n.01', 'synonyms': ['polar_bear'], 'def': 'white bear of Arctic regions', 'name': 'polar_bear'}, {'frequency': 'c', 'id': 589, 'synset': 'ice_cream.n.01', 'synonyms': ['icecream'], 'def': 'frozen dessert containing cream and sugar and flavoring', 'name': 'icecream'}, {'frequency': 'r', 'id': 590, 'synset': 'ice_lolly.n.01', 'synonyms': ['popsicle'], 'def': 'ice cream or water ice on a small wooden stick', 'name': 'popsicle'}, {'frequency': 'c', 'id': 591, 'synset': 'ice_maker.n.01', 'synonyms': ['ice_maker'], 'def': 'an appliance included in some electric refrigerators for making ice cubes', 'name': 'ice_maker'}, {'frequency': 'r', 'id': 592, 'synset': 'ice_pack.n.01', 'synonyms': ['ice_pack', 'ice_bag'], 'def': 'a waterproof bag filled with ice: applied to the body (especially the head) to cool or reduce swelling', 'name': 'ice_pack'}, {'frequency': 'r', 'id': 593, 'synset': 'ice_skate.n.01', 'synonyms': ['ice_skate'], 'def': 'skate consisting of a boot with a steel blade fitted to the sole', 'name': 'ice_skate'}, {'frequency': 'r', 'id': 594, 'synset': 'ice_tea.n.01', 'synonyms': ['ice_tea', 'iced_tea'], 'def': 'strong tea served over ice', 'name': 'ice_tea'}, {'frequency': 'c', 'id': 595, 'synset': 'igniter.n.01', 'synonyms': ['igniter', 'ignitor', 'lighter'], 'def': 'a substance or device used to start a fire', 'name': 'igniter'}, {'frequency': 'r', 'id': 596, 'synset': 'incense.n.01', 'synonyms': ['incense'], 'def': 'a substance that produces a fragrant odor when burned', 'name': 'incense'}, {'frequency': 'r', 'id': 597, 'synset': 'inhaler.n.01', 'synonyms': ['inhaler', 'inhalator'], 'def': 'a dispenser that produces a chemical vapor to be inhaled through mouth or nose', 'name': 'inhaler'}, {'frequency': 'c', 'id': 598, 'synset': 'ipod.n.01', 'synonyms': ['iPod'], 'def': 'a pocket-sized device used to play music files', 'name': 'iPod'}, {'frequency': 'c', 'id': 599, 'synset': 'iron.n.04', 'synonyms': ['iron_(for_clothing)', 'smoothing_iron_(for_clothing)'], 'def': 'home appliance consisting of a flat metal base that is heated and used to smooth cloth', 'name': 'iron_(for_clothing)'}, {'frequency': 'r', 'id': 600, 'synset': 'ironing_board.n.01', 'synonyms': ['ironing_board'], 'def': 'narrow padded board on collapsible supports; used for ironing clothes', 'name': 'ironing_board'}, {'frequency': 'f', 'id': 601, 'synset': 'jacket.n.01', 'synonyms': ['jacket'], 'def': 'a waist-length coat', 'name': 'jacket'}, {'frequency': 'r', 'id': 602, 'synset': 'jam.n.01', 'synonyms': ['jam'], 'def': 'preserve of crushed fruit', 'name': 'jam'}, {'frequency': 'f', 'id': 603, 'synset': 'jean.n.01', 'synonyms': ['jean', 'blue_jean', 'denim'], 'def': '(usually plural) close-fitting trousers of heavy denim for manual work or casual wear', 'name': 'jean'}, {'frequency': 'c', 'id': 604, 'synset': 'jeep.n.01', 'synonyms': ['jeep', 'landrover'], 'def': 'a car suitable for traveling over rough terrain', 'name': 'jeep'}, {'frequency': 'r', 'id': 605, 'synset': 'jelly_bean.n.01', 'synonyms': ['jelly_bean', 'jelly_egg'], 'def': 'sugar-glazed jellied candy', 'name': 'jelly_bean'}, {'frequency': 'f', 'id': 606, 'synset': 'jersey.n.03', 'synonyms': ['jersey', 'T-shirt', 'tee_shirt'], 'def': 'a close-fitting pullover shirt', 'name': 'jersey'}, {'frequency': 'c', 'id': 607, 'synset': 'jet.n.01', 'synonyms': ['jet_plane', 'jet-propelled_plane'], 'def': 'an airplane powered by one or more jet engines', 'name': 'jet_plane'}, {'frequency': 'c', 'id': 608, 'synset': 'jewelry.n.01', 'synonyms': ['jewelry', 'jewellery'], 'def': 'an adornment (as a bracelet or ring or necklace) made of precious metals and set with gems (or imitation gems)', 'name': 'jewelry'}, {'frequency': 'r', 'id': 609, 'synset': 'joystick.n.02', 'synonyms': ['joystick'], 'def': 'a control device for computers consisting of a vertical handle that can move freely in two directions', 'name': 'joystick'}, {'frequency': 'r', 'id': 610, 'synset': 'jump_suit.n.01', 'synonyms': ['jumpsuit'], 'def': "one-piece garment fashioned after a parachutist's uniform", 'name': 'jumpsuit'}, {'frequency': 'c', 'id': 611, 'synset': 'kayak.n.01', 'synonyms': ['kayak'], 'def': 'a small canoe consisting of a light frame made watertight with animal skins', 'name': 'kayak'}, {'frequency': 'r', 'id': 612, 'synset': 'keg.n.02', 'synonyms': ['keg'], 'def': 'small cask or barrel', 'name': 'keg'}, {'frequency': 'r', 'id': 613, 'synset': 'kennel.n.01', 'synonyms': ['kennel', 'doghouse'], 'def': 'outbuilding that serves as a shelter for a dog', 'name': 'kennel'}, {'frequency': 'c', 'id': 614, 'synset': 'kettle.n.01', 'synonyms': ['kettle', 'boiler'], 'def': 'a metal pot for stewing or boiling; usually has a lid', 'name': 'kettle'}, {'frequency': 'f', 'id': 615, 'synset': 'key.n.01', 'synonyms': ['key'], 'def': 'metal instrument used to unlock a lock', 'name': 'key'}, {'frequency': 'r', 'id': 616, 'synset': 'keycard.n.01', 'synonyms': ['keycard'], 'def': 'a plastic card used to gain access typically to a door', 'name': 'keycard'}, {'frequency': 'r', 'id': 617, 'synset': 'kilt.n.01', 'synonyms': ['kilt'], 'def': 'a knee-length pleated tartan skirt worn by men as part of the traditional dress in the Highlands of northern Scotland', 'name': 'kilt'}, {'frequency': 'c', 'id': 618, 'synset': 'kimono.n.01', 'synonyms': ['kimono'], 'def': 'a loose robe; imitated from robes originally worn by Japanese', 'name': 'kimono'}, {'frequency': 'f', 'id': 619, 'synset': 'kitchen_sink.n.01', 'synonyms': ['kitchen_sink'], 'def': 'a sink in a kitchen', 'name': 'kitchen_sink'}, {'frequency': 'c', 'id': 620, 'synset': 'kitchen_table.n.01', 'synonyms': ['kitchen_table'], 'def': 'a table in the kitchen', 'name': 'kitchen_table'}, {'frequency': 'f', 'id': 621, 'synset': 'kite.n.03', 'synonyms': ['kite'], 'def': 'plaything consisting of a light frame covered with tissue paper; flown in wind at end of a string', 'name': 'kite'}, {'frequency': 'c', 'id': 622, 'synset': 'kitten.n.01', 'synonyms': ['kitten', 'kitty'], 'def': 'young domestic cat', 'name': 'kitten'}, {'frequency': 'c', 'id': 623, 'synset': 'kiwi.n.03', 'synonyms': ['kiwi_fruit'], 'def': 'fuzzy brown egg-shaped fruit with slightly tart green flesh', 'name': 'kiwi_fruit'}, {'frequency': 'f', 'id': 624, 'synset': 'knee_pad.n.01', 'synonyms': ['knee_pad'], 'def': 'protective garment consisting of a pad worn by football or baseball or hockey players', 'name': 'knee_pad'}, {'frequency': 'f', 'id': 625, 'synset': 'knife.n.01', 'synonyms': ['knife'], 'def': 'tool with a blade and point used as a cutting instrument', 'name': 'knife'}, {'frequency': 'r', 'id': 626, 'synset': 'knight.n.02', 'synonyms': ['knight_(chess_piece)', 'horse_(chess_piece)'], 'def': 'a chess game piece shaped to resemble the head of a horse', 'name': 'knight_(chess_piece)'}, {'frequency': 'r', 'id': 627, 'synset': 'knitting_needle.n.01', 'synonyms': ['knitting_needle'], 'def': 'needle consisting of a slender rod with pointed ends; usually used in pairs', 'name': 'knitting_needle'}, {'frequency': 'f', 'id': 628, 'synset': 'knob.n.02', 'synonyms': ['knob'], 'def': 'a round handle often found on a door', 'name': 'knob'}, {'frequency': 'r', 'id': 629, 'synset': 'knocker.n.05', 'synonyms': ['knocker_(on_a_door)', 'doorknocker'], 'def': 'a device (usually metal and ornamental) attached by a hinge to a door', 'name': 'knocker_(on_a_door)'}, {'frequency': 'r', 'id': 630, 'synset': 'koala.n.01', 'synonyms': ['koala', 'koala_bear'], 'def': 'sluggish tailless Australian marsupial with grey furry ears and coat', 'name': 'koala'}, {'frequency': 'r', 'id': 631, 'synset': 'lab_coat.n.01', 'synonyms': ['lab_coat', 'laboratory_coat'], 'def': 'a light coat worn to protect clothing from substances used while working in a laboratory', 'name': 'lab_coat'}, {'frequency': 'f', 'id': 632, 'synset': 'ladder.n.01', 'synonyms': ['ladder'], 'def': 'steps consisting of two parallel members connected by rungs', 'name': 'ladder'}, {'frequency': 'c', 'id': 633, 'synset': 'ladle.n.01', 'synonyms': ['ladle'], 'def': 'a spoon-shaped vessel with a long handle frequently used to transfer liquids', 'name': 'ladle'}, {'frequency': 'r', 'id': 634, 'synset': 'ladybug.n.01', 'synonyms': ['ladybug', 'ladybeetle', 'ladybird_beetle'], 'def': 'small round bright-colored and spotted beetle, typically red and black', 'name': 'ladybug'}, {'frequency': 'c', 'id': 635, 'synset': 'lamb.n.01', 'synonyms': ['lamb_(animal)'], 'def': 'young sheep', 'name': 'lamb_(animal)'}, {'frequency': 'r', 'id': 636, 'synset': 'lamb_chop.n.01', 'synonyms': ['lamb-chop', 'lambchop'], 'def': 'chop cut from a lamb', 'name': 'lamb-chop'}, {'frequency': 'f', 'id': 637, 'synset': 'lamp.n.02', 'synonyms': ['lamp'], 'def': 'a piece of furniture holding one or more electric light bulbs', 'name': 'lamp'}, {'frequency': 'f', 'id': 638, 'synset': 'lamppost.n.01', 'synonyms': ['lamppost'], 'def': 'a metal post supporting an outdoor lamp (such as a streetlight)', 'name': 'lamppost'}, {'frequency': 'f', 'id': 639, 'synset': 'lampshade.n.01', 'synonyms': ['lampshade'], 'def': 'a protective ornamental shade used to screen a light bulb from direct view', 'name': 'lampshade'}, {'frequency': 'c', 'id': 640, 'synset': 'lantern.n.01', 'synonyms': ['lantern'], 'def': 'light in a transparent protective case', 'name': 'lantern'}, {'frequency': 'f', 'id': 641, 'synset': 'lanyard.n.02', 'synonyms': ['lanyard', 'laniard'], 'def': 'a cord worn around the neck to hold a knife or whistle, etc.', 'name': 'lanyard'}, {'frequency': 'f', 'id': 642, 'synset': 'laptop.n.01', 'synonyms': ['laptop_computer', 'notebook_computer'], 'def': 'a portable computer small enough to use in your lap', 'name': 'laptop_computer'}, {'frequency': 'r', 'id': 643, 'synset': 'lasagna.n.01', 'synonyms': ['lasagna', 'lasagne'], 'def': 'baked dish of layers of lasagna pasta with sauce and cheese and meat or vegetables', 'name': 'lasagna'}, {'frequency': 'c', 'id': 644, 'synset': 'latch.n.02', 'synonyms': ['latch'], 'def': 'a bar that can be lowered or slid into a groove to fasten a door or gate', 'name': 'latch'}, {'frequency': 'r', 'id': 645, 'synset': 'lawn_mower.n.01', 'synonyms': ['lawn_mower'], 'def': 'garden tool for mowing grass on lawns', 'name': 'lawn_mower'}, {'frequency': 'r', 'id': 646, 'synset': 'leather.n.01', 'synonyms': ['leather'], 'def': 'an animal skin made smooth and flexible by removing the hair and then tanning', 'name': 'leather'}, {'frequency': 'c', 'id': 647, 'synset': 'legging.n.01', 'synonyms': ['legging_(clothing)', 'leging_(clothing)', 'leg_covering'], 'def': 'a garment covering the leg (usually extending from the knee to the ankle)', 'name': 'legging_(clothing)'}, {'frequency': 'c', 'id': 648, 'synset': 'lego.n.01', 'synonyms': ['Lego', 'Lego_set'], 'def': "a child's plastic construction set for making models from blocks", 'name': 'Lego'}, {'frequency': 'f', 'id': 649, 'synset': 'lemon.n.01', 'synonyms': ['lemon'], 'def': 'yellow oval fruit with juicy acidic flesh', 'name': 'lemon'}, {'frequency': 'r', 'id': 650, 'synset': 'lemonade.n.01', 'synonyms': ['lemonade'], 'def': 'sweetened beverage of diluted lemon juice', 'name': 'lemonade'}, {'frequency': 'f', 'id': 651, 'synset': 'lettuce.n.02', 'synonyms': ['lettuce'], 'def': 'leafy plant commonly eaten in salad or on sandwiches', 'name': 'lettuce'}, {'frequency': 'f', 'id': 652, 'synset': 'license_plate.n.01', 'synonyms': ['license_plate', 'numberplate'], 'def': "a plate mounted on the front and back of car and bearing the car's registration number", 'name': 'license_plate'}, {'frequency': 'f', 'id': 653, 'synset': 'life_buoy.n.01', 'synonyms': ['life_buoy', 'lifesaver', 'life_belt', 'life_ring'], 'def': 'a ring-shaped life preserver used to prevent drowning (NOT a life-jacket or vest)', 'name': 'life_buoy'}, {'frequency': 'f', 'id': 654, 'synset': 'life_jacket.n.01', 'synonyms': ['life_jacket', 'life_vest'], 'def': 'life preserver consisting of a sleeveless jacket of buoyant or inflatable design', 'name': 'life_jacket'}, {'frequency': 'f', 'id': 655, 'synset': 'light_bulb.n.01', 'synonyms': ['lightbulb'], 'def': 'glass bulb or tube shaped electric device that emits light (DO NOT MARK LAMPS AS A WHOLE)', 'name': 'lightbulb'}, {'frequency': 'r', 'id': 656, 'synset': 'lightning_rod.n.02', 'synonyms': ['lightning_rod', 'lightning_conductor'], 'def': 'a metallic conductor that is attached to a high point and leads to the ground', 'name': 'lightning_rod'}, {'frequency': 'c', 'id': 657, 'synset': 'lime.n.06', 'synonyms': ['lime'], 'def': 'the green acidic fruit of any of various lime trees', 'name': 'lime'}, {'frequency': 'r', 'id': 658, 'synset': 'limousine.n.01', 'synonyms': ['limousine'], 'def': 'long luxurious car; usually driven by a chauffeur', 'name': 'limousine'}, {'frequency': 'r', 'id': 659, 'synset': 'linen.n.02', 'synonyms': ['linen_paper'], 'def': 'a high-quality paper made of linen fibers or with a linen finish', 'name': 'linen_paper'}, {'frequency': 'c', 'id': 660, 'synset': 'lion.n.01', 'synonyms': ['lion'], 'def': 'large gregarious predatory cat of Africa and India', 'name': 'lion'}, {'frequency': 'c', 'id': 661, 'synset': 'lip_balm.n.01', 'synonyms': ['lip_balm'], 'def': 'a balm applied to the lips', 'name': 'lip_balm'}, {'frequency': 'c', 'id': 662, 'synset': 'lipstick.n.01', 'synonyms': ['lipstick', 'lip_rouge'], 'def': 'makeup that is used to color the lips', 'name': 'lipstick'}, {'frequency': 'r', 'id': 663, 'synset': 'liquor.n.01', 'synonyms': ['liquor', 'spirits', 'hard_liquor', 'liqueur', 'cordial'], 'def': 'an alcoholic beverage that is distilled rather than fermented', 'name': 'liquor'}, {'frequency': 'r', 'id': 664, 'synset': 'lizard.n.01', 'synonyms': ['lizard'], 'def': 'a reptile with usually two pairs of legs and a tapering tail', 'name': 'lizard'}, {'frequency': 'r', 'id': 665, 'synset': 'loafer.n.02', 'synonyms': ['Loafer_(type_of_shoe)'], 'def': 'a low leather step-in shoe', 'name': 'Loafer_(type_of_shoe)'}, {'frequency': 'f', 'id': 666, 'synset': 'log.n.01', 'synonyms': ['log'], 'def': 'a segment of the trunk of a tree when stripped of branches', 'name': 'log'}, {'frequency': 'c', 'id': 667, 'synset': 'lollipop.n.02', 'synonyms': ['lollipop'], 'def': 'hard candy on a stick', 'name': 'lollipop'}, {'frequency': 'c', 'id': 668, 'synset': 'lotion.n.01', 'synonyms': ['lotion'], 'def': 'any of various cosmetic preparations that are applied to the skin', 'name': 'lotion'}, {'frequency': 'f', 'id': 669, 'synset': 'loudspeaker.n.01', 'synonyms': ['speaker_(stero_equipment)'], 'def': 'electronic device that produces sound often as part of a stereo system', 'name': 'speaker_(stero_equipment)'}, {'frequency': 'c', 'id': 670, 'synset': 'love_seat.n.01', 'synonyms': ['loveseat'], 'def': 'small sofa that seats two people', 'name': 'loveseat'}, {'frequency': 'r', 'id': 671, 'synset': 'machine_gun.n.01', 'synonyms': ['machine_gun'], 'def': 'a rapidly firing automatic gun', 'name': 'machine_gun'}, {'frequency': 'f', 'id': 672, 'synset': 'magazine.n.02', 'synonyms': ['magazine'], 'def': 'a paperback periodic publication', 'name': 'magazine'}, {'frequency': 'f', 'id': 673, 'synset': 'magnet.n.01', 'synonyms': ['magnet'], 'def': 'a device that attracts iron and produces a magnetic field', 'name': 'magnet'}, {'frequency': 'r', 'id': 674, 'synset': 'mail_slot.n.01', 'synonyms': ['mail_slot'], 'def': 'a slot (usually in a door) through which mail can be delivered', 'name': 'mail_slot'}, {'frequency': 'c', 'id': 675, 'synset': 'mailbox.n.01', 'synonyms': ['mailbox_(at_home)', 'letter_box_(at_home)'], 'def': 'a private box for delivery of mail', 'name': 'mailbox_(at_home)'}, {'frequency': 'r', 'id': 676, 'synset': 'mallet.n.01', 'synonyms': ['mallet'], 'def': 'a sports implement with a long handle and a hammer-like head used to hit a ball', 'name': 'mallet'}, {'frequency': 'r', 'id': 677, 'synset': 'mammoth.n.01', 'synonyms': ['mammoth'], 'def': 'any of numerous extinct elephants widely distributed in the Pleistocene', 'name': 'mammoth'}, {'frequency': 'c', 'id': 678, 'synset': 'mandarin.n.05', 'synonyms': ['mandarin_orange'], 'def': 'a somewhat flat reddish-orange loose skinned citrus of China', 'name': 'mandarin_orange'}, {'frequency': 'c', 'id': 679, 'synset': 'manger.n.01', 'synonyms': ['manger', 'trough'], 'def': 'a container (usually in a barn or stable) from which cattle or horses feed', 'name': 'manger'}, {'frequency': 'f', 'id': 680, 'synset': 'manhole.n.01', 'synonyms': ['manhole'], 'def': 'a hole (usually with a flush cover) through which a person can gain access to an underground structure', 'name': 'manhole'}, {'frequency': 'c', 'id': 681, 'synset': 'map.n.01', 'synonyms': ['map'], 'def': "a diagrammatic representation of the earth's surface (or part of it)", 'name': 'map'}, {'frequency': 'c', 'id': 682, 'synset': 'marker.n.03', 'synonyms': ['marker'], 'def': 'a writing implement for making a mark', 'name': 'marker'}, {'frequency': 'r', 'id': 683, 'synset': 'martini.n.01', 'synonyms': ['martini'], 'def': 'a cocktail made of gin (or vodka) with dry vermouth', 'name': 'martini'}, {'frequency': 'r', 'id': 684, 'synset': 'mascot.n.01', 'synonyms': ['mascot'], 'def': 'a person or animal that is adopted by a team or other group as a symbolic figure', 'name': 'mascot'}, {'frequency': 'c', 'id': 685, 'synset': 'mashed_potato.n.01', 'synonyms': ['mashed_potato'], 'def': 'potato that has been peeled and boiled and then mashed', 'name': 'mashed_potato'}, {'frequency': 'r', 'id': 686, 'synset': 'masher.n.02', 'synonyms': ['masher'], 'def': 'a kitchen utensil used for mashing (e.g. potatoes)', 'name': 'masher'}, {'frequency': 'f', 'id': 687, 'synset': 'mask.n.04', 'synonyms': ['mask', 'facemask'], 'def': 'a protective covering worn over the face', 'name': 'mask'}, {'frequency': 'f', 'id': 688, 'synset': 'mast.n.01', 'synonyms': ['mast'], 'def': 'a vertical spar for supporting sails', 'name': 'mast'}, {'frequency': 'c', 'id': 689, 'synset': 'mat.n.03', 'synonyms': ['mat_(gym_equipment)', 'gym_mat'], 'def': 'sports equipment consisting of a piece of thick padding on the floor for gymnastics', 'name': 'mat_(gym_equipment)'}, {'frequency': 'r', 'id': 690, 'synset': 'matchbox.n.01', 'synonyms': ['matchbox'], 'def': 'a box for holding matches', 'name': 'matchbox'}, {'frequency': 'f', 'id': 691, 'synset': 'mattress.n.01', 'synonyms': ['mattress'], 'def': 'a thick pad filled with resilient material used as a bed or part of a bed', 'name': 'mattress'}, {'frequency': 'c', 'id': 692, 'synset': 'measuring_cup.n.01', 'synonyms': ['measuring_cup'], 'def': 'graduated cup used to measure liquid or granular ingredients', 'name': 'measuring_cup'}, {'frequency': 'c', 'id': 693, 'synset': 'measuring_stick.n.01', 'synonyms': ['measuring_stick', 'ruler_(measuring_stick)', 'measuring_rod'], 'def': 'measuring instrument having a sequence of marks at regular intervals', 'name': 'measuring_stick'}, {'frequency': 'c', 'id': 694, 'synset': 'meatball.n.01', 'synonyms': ['meatball'], 'def': 'ground meat formed into a ball and fried or simmered in broth', 'name': 'meatball'}, {'frequency': 'c', 'id': 695, 'synset': 'medicine.n.02', 'synonyms': ['medicine'], 'def': 'something that treats or prevents or alleviates the symptoms of disease', 'name': 'medicine'}, {'frequency': 'r', 'id': 696, 'synset': 'melon.n.01', 'synonyms': ['melon'], 'def': 'fruit of the gourd family having a hard rind and sweet juicy flesh', 'name': 'melon'}, {'frequency': 'f', 'id': 697, 'synset': 'microphone.n.01', 'synonyms': ['microphone'], 'def': 'device for converting sound waves into electrical energy', 'name': 'microphone'}, {'frequency': 'r', 'id': 698, 'synset': 'microscope.n.01', 'synonyms': ['microscope'], 'def': 'magnifier of the image of small objects', 'name': 'microscope'}, {'frequency': 'f', 'id': 699, 'synset': 'microwave.n.02', 'synonyms': ['microwave_oven'], 'def': 'kitchen appliance that cooks food by passing an electromagnetic wave through it', 'name': 'microwave_oven'}, {'frequency': 'r', 'id': 700, 'synset': 'milestone.n.01', 'synonyms': ['milestone', 'milepost'], 'def': 'stone post at side of a road to show distances', 'name': 'milestone'}, {'frequency': 'c', 'id': 701, 'synset': 'milk.n.01', 'synonyms': ['milk'], 'def': 'a white nutritious liquid secreted by mammals and used as food by human beings', 'name': 'milk'}, {'frequency': 'f', 'id': 702, 'synset': 'minivan.n.01', 'synonyms': ['minivan'], 'def': 'a small box-shaped passenger van', 'name': 'minivan'}, {'frequency': 'r', 'id': 703, 'synset': 'mint.n.05', 'synonyms': ['mint_candy'], 'def': 'a candy that is flavored with a mint oil', 'name': 'mint_candy'}, {'frequency': 'f', 'id': 704, 'synset': 'mirror.n.01', 'synonyms': ['mirror'], 'def': 'polished surface that forms images by reflecting light', 'name': 'mirror'}, {'frequency': 'c', 'id': 705, 'synset': 'mitten.n.01', 'synonyms': ['mitten'], 'def': 'glove that encases the thumb separately and the other four fingers together', 'name': 'mitten'}, {'frequency': 'c', 'id': 706, 'synset': 'mixer.n.04', 'synonyms': ['mixer_(kitchen_tool)', 'stand_mixer'], 'def': 'a kitchen utensil that is used for mixing foods', 'name': 'mixer_(kitchen_tool)'}, {'frequency': 'c', 'id': 707, 'synset': 'money.n.03', 'synonyms': ['money'], 'def': 'the official currency issued by a government or national bank', 'name': 'money'}, {'frequency': 'f', 'id': 708, 'synset': 'monitor.n.04', 'synonyms': ['monitor_(computer_equipment) computer_monitor'], 'def': 'a computer monitor', 'name': 'monitor_(computer_equipment) computer_monitor'}, {'frequency': 'c', 'id': 709, 'synset': 'monkey.n.01', 'synonyms': ['monkey'], 'def': 'any of various long-tailed primates', 'name': 'monkey'}, {'frequency': 'f', 'id': 710, 'synset': 'motor.n.01', 'synonyms': ['motor'], 'def': 'machine that converts other forms of energy into mechanical energy and so imparts motion', 'name': 'motor'}, {'frequency': 'f', 'id': 711, 'synset': 'motor_scooter.n.01', 'synonyms': ['motor_scooter', 'scooter'], 'def': 'a wheeled vehicle with small wheels and a low-powered engine', 'name': 'motor_scooter'}, {'frequency': 'r', 'id': 712, 'synset': 'motor_vehicle.n.01', 'synonyms': ['motor_vehicle', 'automotive_vehicle'], 'def': 'a self-propelled wheeled vehicle that does not run on rails', 'name': 'motor_vehicle'}, {'frequency': 'r', 'id': 713, 'synset': 'motorboat.n.01', 'synonyms': ['motorboat', 'powerboat'], 'def': 'a boat propelled by an internal-combustion engine', 'name': 'motorboat'}, {'frequency': 'f', 'id': 714, 'synset': 'motorcycle.n.01', 'synonyms': ['motorcycle'], 'def': 'a motor vehicle with two wheels and a strong frame', 'name': 'motorcycle'}, {'frequency': 'f', 'id': 715, 'synset': 'mound.n.01', 'synonyms': ['mound_(baseball)', "pitcher's_mound"], 'def': '(baseball) the slight elevation on which the pitcher stands', 'name': 'mound_(baseball)'}, {'frequency': 'r', 'id': 716, 'synset': 'mouse.n.01', 'synonyms': ['mouse_(animal_rodent)'], 'def': 'a small rodent with pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'name': 'mouse_(animal_rodent)'}, {'frequency': 'f', 'id': 717, 'synset': 'mouse.n.04', 'synonyms': ['mouse_(computer_equipment)', 'computer_mouse'], 'def': 'a computer input device that controls an on-screen pointer', 'name': 'mouse_(computer_equipment)'}, {'frequency': 'f', 'id': 718, 'synset': 'mousepad.n.01', 'synonyms': ['mousepad'], 'def': 'a small portable pad that provides an operating surface for a computer mouse', 'name': 'mousepad'}, {'frequency': 'c', 'id': 719, 'synset': 'muffin.n.01', 'synonyms': ['muffin'], 'def': 'a sweet quick bread baked in a cup-shaped pan', 'name': 'muffin'}, {'frequency': 'f', 'id': 720, 'synset': 'mug.n.04', 'synonyms': ['mug'], 'def': 'with handle and usually cylindrical', 'name': 'mug'}, {'frequency': 'f', 'id': 721, 'synset': 'mushroom.n.02', 'synonyms': ['mushroom'], 'def': 'a common mushroom', 'name': 'mushroom'}, {'frequency': 'r', 'id': 722, 'synset': 'music_stool.n.01', 'synonyms': ['music_stool', 'piano_stool'], 'def': 'a stool for piano players; usually adjustable in height', 'name': 'music_stool'}, {'frequency': 'r', 'id': 723, 'synset': 'musical_instrument.n.01', 'synonyms': ['musical_instrument', 'instrument_(musical)'], 'def': 'any of various devices or contrivances that can be used to produce musical tones or sounds', 'name': 'musical_instrument'}, {'frequency': 'r', 'id': 724, 'synset': 'nailfile.n.01', 'synonyms': ['nailfile'], 'def': 'a small flat file for shaping the nails', 'name': 'nailfile'}, {'frequency': 'r', 'id': 725, 'synset': 'nameplate.n.01', 'synonyms': ['nameplate'], 'def': 'a plate bearing a name', 'name': 'nameplate'}, {'frequency': 'f', 'id': 726, 'synset': 'napkin.n.01', 'synonyms': ['napkin', 'table_napkin', 'serviette'], 'def': 'a small piece of table linen or paper that is used to wipe the mouth and to cover the lap in order to protect clothing', 'name': 'napkin'}, {'frequency': 'r', 'id': 727, 'synset': 'neckerchief.n.01', 'synonyms': ['neckerchief'], 'def': 'a kerchief worn around the neck', 'name': 'neckerchief'}, {'frequency': 'f', 'id': 728, 'synset': 'necklace.n.01', 'synonyms': ['necklace'], 'def': 'jewelry consisting of a cord or chain (often bearing gems) worn about the neck as an ornament', 'name': 'necklace'}, {'frequency': 'f', 'id': 729, 'synset': 'necktie.n.01', 'synonyms': ['necktie', 'tie_(necktie)'], 'def': 'neckwear consisting of a long narrow piece of material worn under a collar and tied in knot at the front', 'name': 'necktie'}, {'frequency': 'r', 'id': 730, 'synset': 'needle.n.03', 'synonyms': ['needle'], 'def': 'a sharp pointed implement (usually metal)', 'name': 'needle'}, {'frequency': 'c', 'id': 731, 'synset': 'nest.n.01', 'synonyms': ['nest'], 'def': 'a structure in which animals lay eggs or give birth to their young', 'name': 'nest'}, {'frequency': 'r', 'id': 732, 'synset': 'newsstand.n.01', 'synonyms': ['newsstand'], 'def': 'a stall where newspapers and other periodicals are sold', 'name': 'newsstand'}, {'frequency': 'c', 'id': 733, 'synset': 'nightwear.n.01', 'synonyms': ['nightshirt', 'nightwear', 'sleepwear', 'nightclothes'], 'def': 'garments designed to be worn in bed', 'name': 'nightshirt'}, {'frequency': 'r', 'id': 734, 'synset': 'nosebag.n.01', 'synonyms': ['nosebag_(for_animals)', 'feedbag'], 'def': 'a canvas bag that is used to feed an animal (such as a horse); covers the muzzle and fastens at the top of the head', 'name': 'nosebag_(for_animals)'}, {'frequency': 'r', 'id': 735, 'synset': 'noseband.n.01', 'synonyms': ['noseband_(for_animals)', 'nosepiece_(for_animals)'], 'def': "a strap that is the part of a bridle that goes over the animal's nose", 'name': 'noseband_(for_animals)'}, {'frequency': 'f', 'id': 736, 'synset': 'notebook.n.01', 'synonyms': ['notebook'], 'def': 'a book with blank pages for recording notes or memoranda', 'name': 'notebook'}, {'frequency': 'c', 'id': 737, 'synset': 'notepad.n.01', 'synonyms': ['notepad'], 'def': 'a pad of paper for keeping notes', 'name': 'notepad'}, {'frequency': 'c', 'id': 738, 'synset': 'nut.n.03', 'synonyms': ['nut'], 'def': 'a small metal block (usually square or hexagonal) with internal screw thread to be fitted onto a bolt', 'name': 'nut'}, {'frequency': 'r', 'id': 739, 'synset': 'nutcracker.n.01', 'synonyms': ['nutcracker'], 'def': 'a hand tool used to crack nuts open', 'name': 'nutcracker'}, {'frequency': 'c', 'id': 740, 'synset': 'oar.n.01', 'synonyms': ['oar'], 'def': 'an implement used to propel or steer a boat', 'name': 'oar'}, {'frequency': 'r', 'id': 741, 'synset': 'octopus.n.01', 'synonyms': ['octopus_(food)'], 'def': 'tentacles of octopus prepared as food', 'name': 'octopus_(food)'}, {'frequency': 'r', 'id': 742, 'synset': 'octopus.n.02', 'synonyms': ['octopus_(animal)'], 'def': 'bottom-living cephalopod having a soft oval body with eight long tentacles', 'name': 'octopus_(animal)'}, {'frequency': 'c', 'id': 743, 'synset': 'oil_lamp.n.01', 'synonyms': ['oil_lamp', 'kerosene_lamp', 'kerosine_lamp'], 'def': 'a lamp that burns oil (as kerosine) for light', 'name': 'oil_lamp'}, {'frequency': 'c', 'id': 744, 'synset': 'olive_oil.n.01', 'synonyms': ['olive_oil'], 'def': 'oil from olives', 'name': 'olive_oil'}, {'frequency': 'r', 'id': 745, 'synset': 'omelet.n.01', 'synonyms': ['omelet', 'omelette'], 'def': 'beaten eggs cooked until just set; may be folded around e.g. ham or cheese or jelly', 'name': 'omelet'}, {'frequency': 'f', 'id': 746, 'synset': 'onion.n.01', 'synonyms': ['onion'], 'def': 'the bulb of an onion plant', 'name': 'onion'}, {'frequency': 'f', 'id': 747, 'synset': 'orange.n.01', 'synonyms': ['orange_(fruit)'], 'def': 'orange (FRUIT of an orange tree)', 'name': 'orange_(fruit)'}, {'frequency': 'c', 'id': 748, 'synset': 'orange_juice.n.01', 'synonyms': ['orange_juice'], 'def': 'bottled or freshly squeezed juice of oranges', 'name': 'orange_juice'}, {'frequency': 'r', 'id': 749, 'synset': 'oregano.n.01', 'synonyms': ['oregano', 'marjoram'], 'def': 'aromatic Eurasian perennial herb used in cooking and baking', 'name': 'oregano'}, {'frequency': 'c', 'id': 750, 'synset': 'ostrich.n.02', 'synonyms': ['ostrich'], 'def': 'fast-running African flightless bird with two-toed feet; largest living bird', 'name': 'ostrich'}, {'frequency': 'c', 'id': 751, 'synset': 'ottoman.n.03', 'synonyms': ['ottoman', 'pouf', 'pouffe', 'hassock'], 'def': 'thick cushion used as a seat', 'name': 'ottoman'}, {'frequency': 'c', 'id': 752, 'synset': 'overall.n.01', 'synonyms': ['overalls_(clothing)'], 'def': 'work clothing consisting of denim trousers usually with a bib and shoulder straps', 'name': 'overalls_(clothing)'}, {'frequency': 'c', 'id': 753, 'synset': 'owl.n.01', 'synonyms': ['owl'], 'def': 'nocturnal bird of prey with hawk-like beak and claws and large head with front-facing eyes', 'name': 'owl'}, {'frequency': 'c', 'id': 754, 'synset': 'packet.n.03', 'synonyms': ['packet'], 'def': 'a small package or bundle', 'name': 'packet'}, {'frequency': 'r', 'id': 755, 'synset': 'pad.n.03', 'synonyms': ['inkpad', 'inking_pad', 'stamp_pad'], 'def': 'absorbent material saturated with ink used to transfer ink evenly to a rubber stamp', 'name': 'inkpad'}, {'frequency': 'c', 'id': 756, 'synset': 'pad.n.04', 'synonyms': ['pad'], 'def': 'a flat mass of soft material used for protection, stuffing, or comfort', 'name': 'pad'}, {'frequency': 'c', 'id': 757, 'synset': 'paddle.n.04', 'synonyms': ['paddle', 'boat_paddle'], 'def': 'a short light oar used without an oarlock to propel a canoe or small boat', 'name': 'paddle'}, {'frequency': 'c', 'id': 758, 'synset': 'padlock.n.01', 'synonyms': ['padlock'], 'def': 'a detachable, portable lock', 'name': 'padlock'}, {'frequency': 'r', 'id': 759, 'synset': 'paintbox.n.01', 'synonyms': ['paintbox'], 'def': "a box containing a collection of cubes or tubes of artists' paint", 'name': 'paintbox'}, {'frequency': 'c', 'id': 760, 'synset': 'paintbrush.n.01', 'synonyms': ['paintbrush'], 'def': 'a brush used as an applicator to apply paint', 'name': 'paintbrush'}, {'frequency': 'f', 'id': 761, 'synset': 'painting.n.01', 'synonyms': ['painting'], 'def': 'graphic art consisting of an artistic composition made by applying paints to a surface', 'name': 'painting'}, {'frequency': 'c', 'id': 762, 'synset': 'pajama.n.02', 'synonyms': ['pajamas', 'pyjamas'], 'def': 'loose-fitting nightclothes worn for sleeping or lounging', 'name': 'pajamas'}, {'frequency': 'c', 'id': 763, 'synset': 'palette.n.02', 'synonyms': ['palette', 'pallet'], 'def': 'board that provides a flat surface on which artists mix paints and the range of colors used', 'name': 'palette'}, {'frequency': 'f', 'id': 764, 'synset': 'pan.n.01', 'synonyms': ['pan_(for_cooking)', 'cooking_pan'], 'def': 'cooking utensil consisting of a wide metal vessel', 'name': 'pan_(for_cooking)'}, {'frequency': 'r', 'id': 765, 'synset': 'pan.n.03', 'synonyms': ['pan_(metal_container)'], 'def': 'shallow container made of metal', 'name': 'pan_(metal_container)'}, {'frequency': 'c', 'id': 766, 'synset': 'pancake.n.01', 'synonyms': ['pancake'], 'def': 'a flat cake of thin batter fried on both sides on a griddle', 'name': 'pancake'}, {'frequency': 'r', 'id': 767, 'synset': 'pantyhose.n.01', 'synonyms': ['pantyhose'], 'def': "a woman's tights consisting of underpants and stockings", 'name': 'pantyhose'}, {'frequency': 'r', 'id': 768, 'synset': 'papaya.n.02', 'synonyms': ['papaya'], 'def': 'large oval melon-like tropical fruit with yellowish flesh', 'name': 'papaya'}, {'frequency': 'r', 'id': 769, 'synset': 'paper_clip.n.01', 'synonyms': ['paperclip'], 'def': 'a wire or plastic clip for holding sheets of paper together', 'name': 'paperclip'}, {'frequency': 'f', 'id': 770, 'synset': 'paper_plate.n.01', 'synonyms': ['paper_plate'], 'def': 'a disposable plate made of cardboard', 'name': 'paper_plate'}, {'frequency': 'f', 'id': 771, 'synset': 'paper_towel.n.01', 'synonyms': ['paper_towel'], 'def': 'a disposable towel made of absorbent paper', 'name': 'paper_towel'}, {'frequency': 'r', 'id': 772, 'synset': 'paperback_book.n.01', 'synonyms': ['paperback_book', 'paper-back_book', 'softback_book', 'soft-cover_book'], 'def': 'a book with paper covers', 'name': 'paperback_book'}, {'frequency': 'r', 'id': 773, 'synset': 'paperweight.n.01', 'synonyms': ['paperweight'], 'def': 'a weight used to hold down a stack of papers', 'name': 'paperweight'}, {'frequency': 'c', 'id': 774, 'synset': 'parachute.n.01', 'synonyms': ['parachute'], 'def': 'rescue equipment consisting of a device that fills with air and retards your fall', 'name': 'parachute'}, {'frequency': 'r', 'id': 775, 'synset': 'parakeet.n.01', 'synonyms': ['parakeet', 'parrakeet', 'parroket', 'paraquet', 'paroquet', 'parroquet'], 'def': 'any of numerous small slender long-tailed parrots', 'name': 'parakeet'}, {'frequency': 'c', 'id': 776, 'synset': 'parasail.n.01', 'synonyms': ['parasail_(sports)'], 'def': 'parachute that will lift a person up into the air when it is towed by a motorboat or a car', 'name': 'parasail_(sports)'}, {'frequency': 'r', 'id': 777, 'synset': 'parchment.n.01', 'synonyms': ['parchment'], 'def': 'a superior paper resembling sheepskin', 'name': 'parchment'}, {'frequency': 'r', 'id': 778, 'synset': 'parka.n.01', 'synonyms': ['parka', 'anorak'], 'def': "a kind of heavy jacket (`windcheater' is a British term)", 'name': 'parka'}, {'frequency': 'f', 'id': 779, 'synset': 'parking_meter.n.01', 'synonyms': ['parking_meter'], 'def': 'a coin-operated timer located next to a parking space', 'name': 'parking_meter'}, {'frequency': 'c', 'id': 780, 'synset': 'parrot.n.01', 'synonyms': ['parrot'], 'def': 'usually brightly colored tropical birds with short hooked beaks and the ability to mimic sounds', 'name': 'parrot'}, {'frequency': 'c', 'id': 781, 'synset': 'passenger_car.n.01', 'synonyms': ['passenger_car_(part_of_a_train)', 'coach_(part_of_a_train)'], 'def': 'a railcar where passengers ride', 'name': 'passenger_car_(part_of_a_train)'}, {'frequency': 'r', 'id': 782, 'synset': 'passenger_ship.n.01', 'synonyms': ['passenger_ship'], 'def': 'a ship built to carry passengers', 'name': 'passenger_ship'}, {'frequency': 'r', 'id': 783, 'synset': 'passport.n.02', 'synonyms': ['passport'], 'def': 'a document issued by a country to a citizen allowing that person to travel abroad and re-enter the home country', 'name': 'passport'}, {'frequency': 'f', 'id': 784, 'synset': 'pastry.n.02', 'synonyms': ['pastry'], 'def': 'any of various baked foods made of dough or batter', 'name': 'pastry'}, {'frequency': 'r', 'id': 785, 'synset': 'patty.n.01', 'synonyms': ['patty_(food)'], 'def': 'small flat mass of chopped food', 'name': 'patty_(food)'}, {'frequency': 'c', 'id': 786, 'synset': 'pea.n.01', 'synonyms': ['pea_(food)'], 'def': 'seed of a pea plant used for food', 'name': 'pea_(food)'}, {'frequency': 'c', 'id': 787, 'synset': 'peach.n.03', 'synonyms': ['peach'], 'def': 'downy juicy fruit with sweet yellowish or whitish flesh', 'name': 'peach'}, {'frequency': 'c', 'id': 788, 'synset': 'peanut_butter.n.01', 'synonyms': ['peanut_butter'], 'def': 'a spread made from ground peanuts', 'name': 'peanut_butter'}, {'frequency': 'c', 'id': 789, 'synset': 'pear.n.01', 'synonyms': ['pear'], 'def': 'sweet juicy gritty-textured fruit available in many varieties', 'name': 'pear'}, {'frequency': 'r', 'id': 790, 'synset': 'peeler.n.03', 'synonyms': ['peeler_(tool_for_fruit_and_vegetables)'], 'def': 'a device for peeling vegetables or fruits', 'name': 'peeler_(tool_for_fruit_and_vegetables)'}, {'frequency': 'r', 'id': 791, 'synset': 'pegboard.n.01', 'synonyms': ['pegboard'], 'def': 'a board perforated with regularly spaced holes into which pegs can be fitted', 'name': 'pegboard'}, {'frequency': 'c', 'id': 792, 'synset': 'pelican.n.01', 'synonyms': ['pelican'], 'def': 'large long-winged warm-water seabird having a large bill with a distensible pouch for fish', 'name': 'pelican'}, {'frequency': 'f', 'id': 793, 'synset': 'pen.n.01', 'synonyms': ['pen'], 'def': 'a writing implement with a point from which ink flows', 'name': 'pen'}, {'frequency': 'c', 'id': 794, 'synset': 'pencil.n.01', 'synonyms': ['pencil'], 'def': 'a thin cylindrical pointed writing implement made of wood and graphite', 'name': 'pencil'}, {'frequency': 'r', 'id': 795, 'synset': 'pencil_box.n.01', 'synonyms': ['pencil_box', 'pencil_case'], 'def': 'a box for holding pencils', 'name': 'pencil_box'}, {'frequency': 'r', 'id': 796, 'synset': 'pencil_sharpener.n.01', 'synonyms': ['pencil_sharpener'], 'def': 'a rotary implement for sharpening the point on pencils', 'name': 'pencil_sharpener'}, {'frequency': 'r', 'id': 797, 'synset': 'pendulum.n.01', 'synonyms': ['pendulum'], 'def': 'an apparatus consisting of an object mounted so that it swings freely under the influence of gravity', 'name': 'pendulum'}, {'frequency': 'c', 'id': 798, 'synset': 'penguin.n.01', 'synonyms': ['penguin'], 'def': 'short-legged flightless birds of cold southern regions having webbed feet and wings modified as flippers', 'name': 'penguin'}, {'frequency': 'r', 'id': 799, 'synset': 'pennant.n.02', 'synonyms': ['pennant'], 'def': 'a flag longer than it is wide (and often tapering)', 'name': 'pennant'}, {'frequency': 'r', 'id': 800, 'synset': 'penny.n.02', 'synonyms': ['penny_(coin)'], 'def': 'a coin worth one-hundredth of the value of the basic unit', 'name': 'penny_(coin)'}, {'frequency': 'c', 'id': 801, 'synset': 'pepper.n.03', 'synonyms': ['pepper', 'peppercorn'], 'def': 'pungent seasoning from the berry of the common pepper plant; whole or ground', 'name': 'pepper'}, {'frequency': 'c', 'id': 802, 'synset': 'pepper_mill.n.01', 'synonyms': ['pepper_mill', 'pepper_grinder'], 'def': 'a mill for grinding pepper', 'name': 'pepper_mill'}, {'frequency': 'c', 'id': 803, 'synset': 'perfume.n.02', 'synonyms': ['perfume'], 'def': 'a toiletry that emits and diffuses a fragrant odor', 'name': 'perfume'}, {'frequency': 'r', 'id': 804, 'synset': 'persimmon.n.02', 'synonyms': ['persimmon'], 'def': 'orange fruit resembling a plum; edible when fully ripe', 'name': 'persimmon'}, {'frequency': 'f', 'id': 805, 'synset': 'person.n.01', 'synonyms': ['baby', 'child', 'boy', 'girl', 'man', 'woman', 'person', 'human'], 'def': 'a human being', 'name': 'baby'}, {'frequency': 'r', 'id': 806, 'synset': 'pet.n.01', 'synonyms': ['pet'], 'def': 'a domesticated animal kept for companionship or amusement', 'name': 'pet'}, {'frequency': 'r', 'id': 807, 'synset': 'petfood.n.01', 'synonyms': ['petfood', 'pet-food'], 'def': 'food prepared for animal pets', 'name': 'petfood'}, {'frequency': 'r', 'id': 808, 'synset': 'pew.n.01', 'synonyms': ['pew_(church_bench)', 'church_bench'], 'def': 'long bench with backs; used in church by the congregation', 'name': 'pew_(church_bench)'}, {'frequency': 'r', 'id': 809, 'synset': 'phonebook.n.01', 'synonyms': ['phonebook', 'telephone_book', 'telephone_directory'], 'def': 'a directory containing an alphabetical list of telephone subscribers and their telephone numbers', 'name': 'phonebook'}, {'frequency': 'c', 'id': 810, 'synset': 'phonograph_record.n.01', 'synonyms': ['phonograph_record', 'phonograph_recording', 'record_(phonograph_recording)'], 'def': 'sound recording consisting of a typically black disk with a continuous groove', 'name': 'phonograph_record'}, {'frequency': 'c', 'id': 811, 'synset': 'piano.n.01', 'synonyms': ['piano'], 'def': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'name': 'piano'}, {'frequency': 'f', 'id': 812, 'synset': 'pickle.n.01', 'synonyms': ['pickle'], 'def': 'vegetables (especially cucumbers) preserved in brine or vinegar', 'name': 'pickle'}, {'frequency': 'f', 'id': 813, 'synset': 'pickup.n.01', 'synonyms': ['pickup_truck'], 'def': 'a light truck with an open body and low sides and a tailboard', 'name': 'pickup_truck'}, {'frequency': 'c', 'id': 814, 'synset': 'pie.n.01', 'synonyms': ['pie'], 'def': 'dish baked in pastry-lined pan often with a pastry top', 'name': 'pie'}, {'frequency': 'c', 'id': 815, 'synset': 'pigeon.n.01', 'synonyms': ['pigeon'], 'def': 'wild and domesticated birds having a heavy body and short legs', 'name': 'pigeon'}, {'frequency': 'r', 'id': 816, 'synset': 'piggy_bank.n.01', 'synonyms': ['piggy_bank', 'penny_bank'], 'def': "a child's coin bank (often shaped like a pig)", 'name': 'piggy_bank'}, {'frequency': 'f', 'id': 817, 'synset': 'pillow.n.01', 'synonyms': ['pillow'], 'def': 'a cushion to support the head of a sleeping person', 'name': 'pillow'}, {'frequency': 'r', 'id': 818, 'synset': 'pin.n.09', 'synonyms': ['pin_(non_jewelry)'], 'def': 'a small slender (often pointed) piece of wood or metal used to support or fasten or attach things', 'name': 'pin_(non_jewelry)'}, {'frequency': 'f', 'id': 819, 'synset': 'pineapple.n.02', 'synonyms': ['pineapple'], 'def': 'large sweet fleshy tropical fruit with a tuft of stiff leaves', 'name': 'pineapple'}, {'frequency': 'c', 'id': 820, 'synset': 'pinecone.n.01', 'synonyms': ['pinecone'], 'def': 'the seed-producing cone of a pine tree', 'name': 'pinecone'}, {'frequency': 'r', 'id': 821, 'synset': 'ping-pong_ball.n.01', 'synonyms': ['ping-pong_ball'], 'def': 'light hollow ball used in playing table tennis', 'name': 'ping-pong_ball'}, {'frequency': 'r', 'id': 822, 'synset': 'pinwheel.n.03', 'synonyms': ['pinwheel'], 'def': 'a toy consisting of vanes of colored paper or plastic that is pinned to a stick and spins when it is pointed into the wind', 'name': 'pinwheel'}, {'frequency': 'r', 'id': 823, 'synset': 'pipe.n.01', 'synonyms': ['tobacco_pipe'], 'def': 'a tube with a small bowl at one end; used for smoking tobacco', 'name': 'tobacco_pipe'}, {'frequency': 'f', 'id': 824, 'synset': 'pipe.n.02', 'synonyms': ['pipe', 'piping'], 'def': 'a long tube made of metal or plastic that is used to carry water or oil or gas etc.', 'name': 'pipe'}, {'frequency': 'r', 'id': 825, 'synset': 'pistol.n.01', 'synonyms': ['pistol', 'handgun'], 'def': 'a firearm that is held and fired with one hand', 'name': 'pistol'}, {'frequency': 'r', 'id': 826, 'synset': 'pita.n.01', 'synonyms': ['pita_(bread)', 'pocket_bread'], 'def': 'usually small round bread that can open into a pocket for filling', 'name': 'pita_(bread)'}, {'frequency': 'f', 'id': 827, 'synset': 'pitcher.n.02', 'synonyms': ['pitcher_(vessel_for_liquid)', 'ewer'], 'def': 'an open vessel with a handle and a spout for pouring', 'name': 'pitcher_(vessel_for_liquid)'}, {'frequency': 'r', 'id': 828, 'synset': 'pitchfork.n.01', 'synonyms': ['pitchfork'], 'def': 'a long-handled hand tool with sharp widely spaced prongs for lifting and pitching hay', 'name': 'pitchfork'}, {'frequency': 'f', 'id': 829, 'synset': 'pizza.n.01', 'synonyms': ['pizza'], 'def': 'Italian open pie made of thin bread dough spread with a spiced mixture of e.g. tomato sauce and cheese', 'name': 'pizza'}, {'frequency': 'f', 'id': 830, 'synset': 'place_mat.n.01', 'synonyms': ['place_mat'], 'def': 'a mat placed on a table for an individual place setting', 'name': 'place_mat'}, {'frequency': 'f', 'id': 831, 'synset': 'plate.n.04', 'synonyms': ['plate'], 'def': 'dish on which food is served or from which food is eaten', 'name': 'plate'}, {'frequency': 'c', 'id': 832, 'synset': 'platter.n.01', 'synonyms': ['platter'], 'def': 'a large shallow dish used for serving food', 'name': 'platter'}, {'frequency': 'r', 'id': 833, 'synset': 'playing_card.n.01', 'synonyms': ['playing_card'], 'def': 'one of a pack of cards that are used to play card games', 'name': 'playing_card'}, {'frequency': 'r', 'id': 834, 'synset': 'playpen.n.01', 'synonyms': ['playpen'], 'def': 'a portable enclosure in which babies may be left to play', 'name': 'playpen'}, {'frequency': 'c', 'id': 835, 'synset': 'pliers.n.01', 'synonyms': ['pliers', 'plyers'], 'def': 'a gripping hand tool with two hinged arms and (usually) serrated jaws', 'name': 'pliers'}, {'frequency': 'r', 'id': 836, 'synset': 'plow.n.01', 'synonyms': ['plow_(farm_equipment)', 'plough_(farm_equipment)'], 'def': 'a farm tool having one or more heavy blades to break the soil and cut a furrow prior to sowing', 'name': 'plow_(farm_equipment)'}, {'frequency': 'r', 'id': 837, 'synset': 'pocket_watch.n.01', 'synonyms': ['pocket_watch'], 'def': 'a watch that is carried in a small watch pocket', 'name': 'pocket_watch'}, {'frequency': 'c', 'id': 838, 'synset': 'pocketknife.n.01', 'synonyms': ['pocketknife'], 'def': 'a knife with a blade that folds into the handle; suitable for carrying in the pocket', 'name': 'pocketknife'}, {'frequency': 'c', 'id': 839, 'synset': 'poker.n.01', 'synonyms': ['poker_(fire_stirring_tool)', 'stove_poker', 'fire_hook'], 'def': 'fire iron consisting of a metal rod with a handle; used to stir a fire', 'name': 'poker_(fire_stirring_tool)'}, {'frequency': 'f', 'id': 840, 'synset': 'pole.n.01', 'synonyms': ['pole', 'post'], 'def': 'a long (usually round) rod of wood or metal or plastic', 'name': 'pole'}, {'frequency': 'r', 'id': 841, 'synset': 'police_van.n.01', 'synonyms': ['police_van', 'police_wagon', 'paddy_wagon', 'patrol_wagon'], 'def': 'van used by police to transport prisoners', 'name': 'police_van'}, {'frequency': 'f', 'id': 842, 'synset': 'polo_shirt.n.01', 'synonyms': ['polo_shirt', 'sport_shirt'], 'def': 'a shirt with short sleeves designed for comfort and casual wear', 'name': 'polo_shirt'}, {'frequency': 'r', 'id': 843, 'synset': 'poncho.n.01', 'synonyms': ['poncho'], 'def': 'a blanket-like cloak with a hole in the center for the head', 'name': 'poncho'}, {'frequency': 'c', 'id': 844, 'synset': 'pony.n.05', 'synonyms': ['pony'], 'def': 'any of various breeds of small gentle horses usually less than five feet high at the shoulder', 'name': 'pony'}, {'frequency': 'r', 'id': 845, 'synset': 'pool_table.n.01', 'synonyms': ['pool_table', 'billiard_table', 'snooker_table'], 'def': 'game equipment consisting of a heavy table on which pool is played', 'name': 'pool_table'}, {'frequency': 'f', 'id': 846, 'synset': 'pop.n.02', 'synonyms': ['pop_(soda)', 'soda_(pop)', 'tonic', 'soft_drink'], 'def': 'a sweet drink containing carbonated water and flavoring', 'name': 'pop_(soda)'}, {'frequency': 'r', 'id': 847, 'synset': 'portrait.n.02', 'synonyms': ['portrait', 'portrayal'], 'def': 'any likeness of a person, in any medium', 'name': 'portrait'}, {'frequency': 'c', 'id': 848, 'synset': 'postbox.n.01', 'synonyms': ['postbox_(public)', 'mailbox_(public)'], 'def': 'public box for deposit of mail', 'name': 'postbox_(public)'}, {'frequency': 'c', 'id': 849, 'synset': 'postcard.n.01', 'synonyms': ['postcard', 'postal_card', 'mailing-card'], 'def': 'a card for sending messages by post without an envelope', 'name': 'postcard'}, {'frequency': 'f', 'id': 850, 'synset': 'poster.n.01', 'synonyms': ['poster', 'placard'], 'def': 'a sign posted in a public place as an advertisement', 'name': 'poster'}, {'frequency': 'f', 'id': 851, 'synset': 'pot.n.01', 'synonyms': ['pot'], 'def': 'metal or earthenware cooking vessel that is usually round and deep; often has a handle and lid', 'name': 'pot'}, {'frequency': 'f', 'id': 852, 'synset': 'pot.n.04', 'synonyms': ['flowerpot'], 'def': 'a container in which plants are cultivated', 'name': 'flowerpot'}, {'frequency': 'f', 'id': 853, 'synset': 'potato.n.01', 'synonyms': ['potato'], 'def': 'an edible tuber native to South America', 'name': 'potato'}, {'frequency': 'c', 'id': 854, 'synset': 'potholder.n.01', 'synonyms': ['potholder'], 'def': 'an insulated pad for holding hot pots', 'name': 'potholder'}, {'frequency': 'c', 'id': 855, 'synset': 'pottery.n.01', 'synonyms': ['pottery', 'clayware'], 'def': 'ceramic ware made from clay and baked in a kiln', 'name': 'pottery'}, {'frequency': 'c', 'id': 856, 'synset': 'pouch.n.01', 'synonyms': ['pouch'], 'def': 'a small or medium size container for holding or carrying things', 'name': 'pouch'}, {'frequency': 'r', 'id': 857, 'synset': 'power_shovel.n.01', 'synonyms': ['power_shovel', 'excavator', 'digger'], 'def': 'a machine for excavating', 'name': 'power_shovel'}, {'frequency': 'c', 'id': 858, 'synset': 'prawn.n.01', 'synonyms': ['prawn', 'shrimp'], 'def': 'any of various edible decapod crustaceans', 'name': 'prawn'}, {'frequency': 'f', 'id': 859, 'synset': 'printer.n.03', 'synonyms': ['printer', 'printing_machine'], 'def': 'a machine that prints', 'name': 'printer'}, {'frequency': 'c', 'id': 860, 'synset': 'projectile.n.01', 'synonyms': ['projectile_(weapon)', 'missile'], 'def': 'a weapon that is forcibly thrown or projected at a targets', 'name': 'projectile_(weapon)'}, {'frequency': 'c', 'id': 861, 'synset': 'projector.n.02', 'synonyms': ['projector'], 'def': 'an optical instrument that projects an enlarged image onto a screen', 'name': 'projector'}, {'frequency': 'f', 'id': 862, 'synset': 'propeller.n.01', 'synonyms': ['propeller', 'propellor'], 'def': 'a mechanical device that rotates to push against air or water', 'name': 'propeller'}, {'frequency': 'r', 'id': 863, 'synset': 'prune.n.01', 'synonyms': ['prune'], 'def': 'dried plum', 'name': 'prune'}, {'frequency': 'r', 'id': 864, 'synset': 'pudding.n.01', 'synonyms': ['pudding'], 'def': 'any of various soft thick unsweetened baked dishes', 'name': 'pudding'}, {'frequency': 'r', 'id': 865, 'synset': 'puffer.n.02', 'synonyms': ['puffer_(fish)', 'pufferfish', 'blowfish', 'globefish'], 'def': 'fishes whose elongated spiny body can inflate itself with water or air to form a globe', 'name': 'puffer_(fish)'}, {'frequency': 'r', 'id': 866, 'synset': 'puffin.n.01', 'synonyms': ['puffin'], 'def': 'seabirds having short necks and brightly colored compressed bills', 'name': 'puffin'}, {'frequency': 'r', 'id': 867, 'synset': 'pug.n.01', 'synonyms': ['pug-dog'], 'def': 'small compact smooth-coated breed of Asiatic origin having a tightly curled tail and broad flat wrinkled muzzle', 'name': 'pug-dog'}, {'frequency': 'c', 'id': 868, 'synset': 'pumpkin.n.02', 'synonyms': ['pumpkin'], 'def': 'usually large pulpy deep-yellow round fruit of the squash family maturing in late summer or early autumn', 'name': 'pumpkin'}, {'frequency': 'r', 'id': 869, 'synset': 'punch.n.03', 'synonyms': ['puncher'], 'def': 'a tool for making holes or indentations', 'name': 'puncher'}, {'frequency': 'r', 'id': 870, 'synset': 'puppet.n.01', 'synonyms': ['puppet', 'marionette'], 'def': 'a small figure of a person operated from above with strings by a puppeteer', 'name': 'puppet'}, {'frequency': 'r', 'id': 871, 'synset': 'puppy.n.01', 'synonyms': ['puppy'], 'def': 'a young dog', 'name': 'puppy'}, {'frequency': 'r', 'id': 872, 'synset': 'quesadilla.n.01', 'synonyms': ['quesadilla'], 'def': 'a tortilla that is filled with cheese and heated', 'name': 'quesadilla'}, {'frequency': 'r', 'id': 873, 'synset': 'quiche.n.02', 'synonyms': ['quiche'], 'def': 'a tart filled with rich unsweetened custard; often contains other ingredients (as cheese or ham or seafood or vegetables)', 'name': 'quiche'}, {'frequency': 'f', 'id': 874, 'synset': 'quilt.n.01', 'synonyms': ['quilt', 'comforter'], 'def': 'bedding made of two layers of cloth filled with stuffing and stitched together', 'name': 'quilt'}, {'frequency': 'c', 'id': 875, 'synset': 'rabbit.n.01', 'synonyms': ['rabbit'], 'def': 'any of various burrowing animals of the family Leporidae having long ears and short tails', 'name': 'rabbit'}, {'frequency': 'r', 'id': 876, 'synset': 'racer.n.02', 'synonyms': ['race_car', 'racing_car'], 'def': 'a fast car that competes in races', 'name': 'race_car'}, {'frequency': 'c', 'id': 877, 'synset': 'racket.n.04', 'synonyms': ['racket', 'racquet'], 'def': 'a sports implement used to strike a ball in various games', 'name': 'racket'}, {'frequency': 'r', 'id': 878, 'synset': 'radar.n.01', 'synonyms': ['radar'], 'def': 'measuring instrument in which the echo of a pulse of microwave radiation is used to detect and locate distant objects', 'name': 'radar'}, {'frequency': 'c', 'id': 879, 'synset': 'radiator.n.03', 'synonyms': ['radiator'], 'def': 'a mechanism consisting of a metal honeycomb through which hot fluids circulate', 'name': 'radiator'}, {'frequency': 'c', 'id': 880, 'synset': 'radio_receiver.n.01', 'synonyms': ['radio_receiver', 'radio_set', 'radio', 'tuner_(radio)'], 'def': 'an electronic receiver that detects and demodulates and amplifies transmitted radio signals', 'name': 'radio_receiver'}, {'frequency': 'c', 'id': 881, 'synset': 'radish.n.03', 'synonyms': ['radish', 'daikon'], 'def': 'pungent edible root of any of various cultivated radish plants', 'name': 'radish'}, {'frequency': 'c', 'id': 882, 'synset': 'raft.n.01', 'synonyms': ['raft'], 'def': 'a flat float (usually made of logs or planks) that can be used for transport or as a platform for swimmers', 'name': 'raft'}, {'frequency': 'r', 'id': 883, 'synset': 'rag_doll.n.01', 'synonyms': ['rag_doll'], 'def': 'a cloth doll that is stuffed and (usually) painted', 'name': 'rag_doll'}, {'frequency': 'c', 'id': 884, 'synset': 'raincoat.n.01', 'synonyms': ['raincoat', 'waterproof_jacket'], 'def': 'a water-resistant coat', 'name': 'raincoat'}, {'frequency': 'c', 'id': 885, 'synset': 'ram.n.05', 'synonyms': ['ram_(animal)'], 'def': 'uncastrated adult male sheep', 'name': 'ram_(animal)'}, {'frequency': 'c', 'id': 886, 'synset': 'raspberry.n.02', 'synonyms': ['raspberry'], 'def': 'red or black edible aggregate berries usually smaller than the related blackberries', 'name': 'raspberry'}, {'frequency': 'r', 'id': 887, 'synset': 'rat.n.01', 'synonyms': ['rat'], 'def': 'any of various long-tailed rodents similar to but larger than a mouse', 'name': 'rat'}, {'frequency': 'c', 'id': 888, 'synset': 'razorblade.n.01', 'synonyms': ['razorblade'], 'def': 'a blade that has very sharp edge', 'name': 'razorblade'}, {'frequency': 'c', 'id': 889, 'synset': 'reamer.n.01', 'synonyms': ['reamer_(juicer)', 'juicer', 'juice_reamer'], 'def': 'a squeezer with a conical ridged center that is used for squeezing juice from citrus fruit', 'name': 'reamer_(juicer)'}, {'frequency': 'f', 'id': 890, 'synset': 'rearview_mirror.n.01', 'synonyms': ['rearview_mirror'], 'def': 'car mirror that reflects the view out of the rear window', 'name': 'rearview_mirror'}, {'frequency': 'c', 'id': 891, 'synset': 'receipt.n.02', 'synonyms': ['receipt'], 'def': 'an acknowledgment (usually tangible) that payment has been made', 'name': 'receipt'}, {'frequency': 'c', 'id': 892, 'synset': 'recliner.n.01', 'synonyms': ['recliner', 'reclining_chair', 'lounger_(chair)'], 'def': 'an armchair whose back can be lowered and foot can be raised to allow the sitter to recline in it', 'name': 'recliner'}, {'frequency': 'r', 'id': 893, 'synset': 'record_player.n.01', 'synonyms': ['record_player', 'phonograph_(record_player)', 'turntable'], 'def': 'machine in which rotating records cause a stylus to vibrate and the vibrations are amplified acoustically or electronically', 'name': 'record_player'}, {'frequency': 'r', 'id': 894, 'synset': 'red_cabbage.n.02', 'synonyms': ['red_cabbage'], 'def': 'compact head of purplish-red leaves', 'name': 'red_cabbage'}, {'frequency': 'f', 'id': 895, 'synset': 'reflector.n.01', 'synonyms': ['reflector'], 'def': 'device that reflects light, radiation, etc.', 'name': 'reflector'}, {'frequency': 'f', 'id': 896, 'synset': 'remote_control.n.01', 'synonyms': ['remote_control'], 'def': 'a device that can be used to control a machine or apparatus from a distance', 'name': 'remote_control'}, {'frequency': 'c', 'id': 897, 'synset': 'rhinoceros.n.01', 'synonyms': ['rhinoceros'], 'def': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'name': 'rhinoceros'}, {'frequency': 'r', 'id': 898, 'synset': 'rib.n.03', 'synonyms': ['rib_(food)'], 'def': 'cut of meat including one or more ribs', 'name': 'rib_(food)'}, {'frequency': 'r', 'id': 899, 'synset': 'rifle.n.01', 'synonyms': ['rifle'], 'def': 'a shoulder firearm with a long barrel', 'name': 'rifle'}, {'frequency': 'f', 'id': 900, 'synset': 'ring.n.08', 'synonyms': ['ring'], 'def': 'jewelry consisting of a circlet of precious metal (often set with jewels) worn on the finger', 'name': 'ring'}, {'frequency': 'r', 'id': 901, 'synset': 'river_boat.n.01', 'synonyms': ['river_boat'], 'def': 'a boat used on rivers or to ply a river', 'name': 'river_boat'}, {'frequency': 'r', 'id': 902, 'synset': 'road_map.n.02', 'synonyms': ['road_map'], 'def': '(NOT A ROAD) a MAP showing roads (for automobile travel)', 'name': 'road_map'}, {'frequency': 'c', 'id': 903, 'synset': 'robe.n.01', 'synonyms': ['robe'], 'def': 'any loose flowing garment', 'name': 'robe'}, {'frequency': 'c', 'id': 904, 'synset': 'rocking_chair.n.01', 'synonyms': ['rocking_chair'], 'def': 'a chair mounted on rockers', 'name': 'rocking_chair'}, {'frequency': 'r', 'id': 905, 'synset': 'roller_skate.n.01', 'synonyms': ['roller_skate'], 'def': 'a shoe with pairs of rollers (small hard wheels) fixed to the sole', 'name': 'roller_skate'}, {'frequency': 'r', 'id': 906, 'synset': 'rollerblade.n.01', 'synonyms': ['Rollerblade'], 'def': 'an in-line variant of a roller skate', 'name': 'Rollerblade'}, {'frequency': 'c', 'id': 907, 'synset': 'rolling_pin.n.01', 'synonyms': ['rolling_pin'], 'def': 'utensil consisting of a cylinder (usually of wood) with a handle at each end; used to roll out dough', 'name': 'rolling_pin'}, {'frequency': 'r', 'id': 908, 'synset': 'root_beer.n.01', 'synonyms': ['root_beer'], 'def': 'carbonated drink containing extracts of roots and herbs', 'name': 'root_beer'}, {'frequency': 'c', 'id': 909, 'synset': 'router.n.02', 'synonyms': ['router_(computer_equipment)'], 'def': 'a device that forwards data packets between computer networks', 'name': 'router_(computer_equipment)'}, {'frequency': 'f', 'id': 910, 'synset': 'rubber_band.n.01', 'synonyms': ['rubber_band', 'elastic_band'], 'def': 'a narrow band of elastic rubber used to hold things (such as papers) together', 'name': 'rubber_band'}, {'frequency': 'c', 'id': 911, 'synset': 'runner.n.08', 'synonyms': ['runner_(carpet)'], 'def': 'a long narrow carpet', 'name': 'runner_(carpet)'}, {'frequency': 'f', 'id': 912, 'synset': 'sack.n.01', 'synonyms': ['plastic_bag', 'paper_bag'], 'def': "a bag made of paper or plastic for holding customer's purchases", 'name': 'plastic_bag'}, {'frequency': 'f', 'id': 913, 'synset': 'saddle.n.01', 'synonyms': ['saddle_(on_an_animal)'], 'def': 'a seat for the rider of a horse or camel', 'name': 'saddle_(on_an_animal)'}, {'frequency': 'f', 'id': 914, 'synset': 'saddle_blanket.n.01', 'synonyms': ['saddle_blanket', 'saddlecloth', 'horse_blanket'], 'def': 'stable gear consisting of a blanket placed under the saddle', 'name': 'saddle_blanket'}, {'frequency': 'c', 'id': 915, 'synset': 'saddlebag.n.01', 'synonyms': ['saddlebag'], 'def': 'a large bag (or pair of bags) hung over a saddle', 'name': 'saddlebag'}, {'frequency': 'r', 'id': 916, 'synset': 'safety_pin.n.01', 'synonyms': ['safety_pin'], 'def': 'a pin in the form of a clasp; has a guard so the point of the pin will not stick the user', 'name': 'safety_pin'}, {'frequency': 'c', 'id': 917, 'synset': 'sail.n.01', 'synonyms': ['sail'], 'def': 'a large piece of fabric by means of which wind is used to propel a sailing vessel', 'name': 'sail'}, {'frequency': 'c', 'id': 918, 'synset': 'salad.n.01', 'synonyms': ['salad'], 'def': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'name': 'salad'}, {'frequency': 'r', 'id': 919, 'synset': 'salad_plate.n.01', 'synonyms': ['salad_plate', 'salad_bowl'], 'def': 'a plate or bowl for individual servings of salad', 'name': 'salad_plate'}, {'frequency': 'r', 'id': 920, 'synset': 'salami.n.01', 'synonyms': ['salami'], 'def': 'highly seasoned fatty sausage of pork and beef usually dried', 'name': 'salami'}, {'frequency': 'r', 'id': 921, 'synset': 'salmon.n.01', 'synonyms': ['salmon_(fish)'], 'def': 'any of various large food and game fishes of northern waters', 'name': 'salmon_(fish)'}, {'frequency': 'r', 'id': 922, 'synset': 'salmon.n.03', 'synonyms': ['salmon_(food)'], 'def': 'flesh of any of various marine or freshwater fish of the family Salmonidae', 'name': 'salmon_(food)'}, {'frequency': 'r', 'id': 923, 'synset': 'salsa.n.01', 'synonyms': ['salsa'], 'def': 'spicy sauce of tomatoes and onions and chili peppers to accompany Mexican foods', 'name': 'salsa'}, {'frequency': 'f', 'id': 924, 'synset': 'saltshaker.n.01', 'synonyms': ['saltshaker'], 'def': 'a shaker with a perforated top for sprinkling salt', 'name': 'saltshaker'}, {'frequency': 'f', 'id': 925, 'synset': 'sandal.n.01', 'synonyms': ['sandal_(type_of_shoe)'], 'def': 'a shoe consisting of a sole fastened by straps to the foot', 'name': 'sandal_(type_of_shoe)'}, {'frequency': 'f', 'id': 926, 'synset': 'sandwich.n.01', 'synonyms': ['sandwich'], 'def': 'two (or more) slices of bread with a filling between them', 'name': 'sandwich'}, {'frequency': 'r', 'id': 927, 'synset': 'satchel.n.01', 'synonyms': ['satchel'], 'def': 'luggage consisting of a small case with a flat bottom and (usually) a shoulder strap', 'name': 'satchel'}, {'frequency': 'r', 'id': 928, 'synset': 'saucepan.n.01', 'synonyms': ['saucepan'], 'def': 'a deep pan with a handle; used for stewing or boiling', 'name': 'saucepan'}, {'frequency': 'f', 'id': 929, 'synset': 'saucer.n.02', 'synonyms': ['saucer'], 'def': 'a small shallow dish for holding a cup at the table', 'name': 'saucer'}, {'frequency': 'f', 'id': 930, 'synset': 'sausage.n.01', 'synonyms': ['sausage'], 'def': 'highly seasoned minced meat stuffed in casings', 'name': 'sausage'}, {'frequency': 'r', 'id': 931, 'synset': 'sawhorse.n.01', 'synonyms': ['sawhorse', 'sawbuck'], 'def': 'a framework for holding wood that is being sawed', 'name': 'sawhorse'}, {'frequency': 'r', 'id': 932, 'synset': 'sax.n.02', 'synonyms': ['saxophone'], 'def': "a wind instrument with a `J'-shaped form typically made of brass", 'name': 'saxophone'}, {'frequency': 'f', 'id': 933, 'synset': 'scale.n.07', 'synonyms': ['scale_(measuring_instrument)'], 'def': 'a measuring instrument for weighing; shows amount of mass', 'name': 'scale_(measuring_instrument)'}, {'frequency': 'r', 'id': 934, 'synset': 'scarecrow.n.01', 'synonyms': ['scarecrow', 'strawman'], 'def': 'an effigy in the shape of a man to frighten birds away from seeds', 'name': 'scarecrow'}, {'frequency': 'f', 'id': 935, 'synset': 'scarf.n.01', 'synonyms': ['scarf'], 'def': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'name': 'scarf'}, {'frequency': 'c', 'id': 936, 'synset': 'school_bus.n.01', 'synonyms': ['school_bus'], 'def': 'a bus used to transport children to or from school', 'name': 'school_bus'}, {'frequency': 'f', 'id': 937, 'synset': 'scissors.n.01', 'synonyms': ['scissors'], 'def': 'a tool having two crossed pivoting blades with looped handles', 'name': 'scissors'}, {'frequency': 'c', 'id': 938, 'synset': 'scoreboard.n.01', 'synonyms': ['scoreboard'], 'def': 'a large board for displaying the score of a contest (and some other information)', 'name': 'scoreboard'}, {'frequency': 'c', 'id': 939, 'synset': 'scrambled_eggs.n.01', 'synonyms': ['scrambled_eggs'], 'def': 'eggs beaten and cooked to a soft firm consistency while stirring', 'name': 'scrambled_eggs'}, {'frequency': 'r', 'id': 940, 'synset': 'scraper.n.01', 'synonyms': ['scraper'], 'def': 'any of various hand tools for scraping', 'name': 'scraper'}, {'frequency': 'r', 'id': 941, 'synset': 'scratcher.n.03', 'synonyms': ['scratcher'], 'def': 'a device used for scratching', 'name': 'scratcher'}, {'frequency': 'c', 'id': 942, 'synset': 'screwdriver.n.01', 'synonyms': ['screwdriver'], 'def': 'a hand tool for driving screws; has a tip that fits into the head of a screw', 'name': 'screwdriver'}, {'frequency': 'c', 'id': 943, 'synset': 'scrub_brush.n.01', 'synonyms': ['scrubbing_brush'], 'def': 'a brush with short stiff bristles for heavy cleaning', 'name': 'scrubbing_brush'}, {'frequency': 'c', 'id': 944, 'synset': 'sculpture.n.01', 'synonyms': ['sculpture'], 'def': 'a three-dimensional work of art', 'name': 'sculpture'}, {'frequency': 'r', 'id': 945, 'synset': 'seabird.n.01', 'synonyms': ['seabird', 'seafowl'], 'def': 'a bird that frequents coastal waters and the open ocean: gulls; pelicans; gannets; cormorants; albatrosses; petrels; etc.', 'name': 'seabird'}, {'frequency': 'r', 'id': 946, 'synset': 'seahorse.n.02', 'synonyms': ['seahorse'], 'def': 'small fish with horse-like heads bent sharply downward and curled tails', 'name': 'seahorse'}, {'frequency': 'r', 'id': 947, 'synset': 'seaplane.n.01', 'synonyms': ['seaplane', 'hydroplane'], 'def': 'an airplane that can land on or take off from water', 'name': 'seaplane'}, {'frequency': 'c', 'id': 948, 'synset': 'seashell.n.01', 'synonyms': ['seashell'], 'def': 'the shell of a marine organism', 'name': 'seashell'}, {'frequency': 'r', 'id': 949, 'synset': 'seedling.n.01', 'synonyms': ['seedling'], 'def': 'young plant or tree grown from a seed', 'name': 'seedling'}, {'frequency': 'c', 'id': 950, 'synset': 'serving_dish.n.01', 'synonyms': ['serving_dish'], 'def': 'a dish used for serving food', 'name': 'serving_dish'}, {'frequency': 'r', 'id': 951, 'synset': 'sewing_machine.n.01', 'synonyms': ['sewing_machine'], 'def': 'a textile machine used as a home appliance for sewing', 'name': 'sewing_machine'}, {'frequency': 'r', 'id': 952, 'synset': 'shaker.n.03', 'synonyms': ['shaker'], 'def': 'a container in which something can be shaken', 'name': 'shaker'}, {'frequency': 'c', 'id': 953, 'synset': 'shampoo.n.01', 'synonyms': ['shampoo'], 'def': 'cleansing agent consisting of soaps or detergents used for washing the hair', 'name': 'shampoo'}, {'frequency': 'r', 'id': 954, 'synset': 'shark.n.01', 'synonyms': ['shark'], 'def': 'typically large carnivorous fishes with sharpe teeth', 'name': 'shark'}, {'frequency': 'r', 'id': 955, 'synset': 'sharpener.n.01', 'synonyms': ['sharpener'], 'def': 'any implement that is used to make something (an edge or a point) sharper', 'name': 'sharpener'}, {'frequency': 'r', 'id': 956, 'synset': 'sharpie.n.03', 'synonyms': ['Sharpie'], 'def': 'a pen with indelible ink that will write on any surface', 'name': 'Sharpie'}, {'frequency': 'r', 'id': 957, 'synset': 'shaver.n.03', 'synonyms': ['shaver_(electric)', 'electric_shaver', 'electric_razor'], 'def': 'a razor powered by an electric motor', 'name': 'shaver_(electric)'}, {'frequency': 'c', 'id': 958, 'synset': 'shaving_cream.n.01', 'synonyms': ['shaving_cream', 'shaving_soap'], 'def': 'toiletry consisting that forms a rich lather for softening the beard before shaving', 'name': 'shaving_cream'}, {'frequency': 'r', 'id': 959, 'synset': 'shawl.n.01', 'synonyms': ['shawl'], 'def': 'cloak consisting of an oblong piece of cloth used to cover the head and shoulders', 'name': 'shawl'}, {'frequency': 'r', 'id': 960, 'synset': 'shears.n.01', 'synonyms': ['shears'], 'def': 'large scissors with strong blades', 'name': 'shears'}, {'frequency': 'f', 'id': 961, 'synset': 'sheep.n.01', 'synonyms': ['sheep'], 'def': 'woolly usually horned ruminant mammal related to the goat', 'name': 'sheep'}, {'frequency': 'r', 'id': 962, 'synset': 'shepherd_dog.n.01', 'synonyms': ['shepherd_dog', 'sheepdog'], 'def': 'any of various usually long-haired breeds of dog reared to herd and guard sheep', 'name': 'shepherd_dog'}, {'frequency': 'r', 'id': 963, 'synset': 'sherbert.n.01', 'synonyms': ['sherbert', 'sherbet'], 'def': 'a frozen dessert made primarily of fruit juice and sugar', 'name': 'sherbert'}, {'frequency': 'r', 'id': 964, 'synset': 'shield.n.02', 'synonyms': ['shield'], 'def': 'armor carried on the arm to intercept blows', 'name': 'shield'}, {'frequency': 'f', 'id': 965, 'synset': 'shirt.n.01', 'synonyms': ['shirt'], 'def': 'a garment worn on the upper half of the body', 'name': 'shirt'}, {'frequency': 'f', 'id': 966, 'synset': 'shoe.n.01', 'synonyms': ['shoe', 'sneaker_(type_of_shoe)', 'tennis_shoe'], 'def': 'common footwear covering the foot', 'name': 'shoe'}, {'frequency': 'c', 'id': 967, 'synset': 'shopping_bag.n.01', 'synonyms': ['shopping_bag'], 'def': 'a bag made of plastic or strong paper (often with handles); used to transport goods after shopping', 'name': 'shopping_bag'}, {'frequency': 'c', 'id': 968, 'synset': 'shopping_cart.n.01', 'synonyms': ['shopping_cart'], 'def': 'a handcart that holds groceries or other goods while shopping', 'name': 'shopping_cart'}, {'frequency': 'f', 'id': 969, 'synset': 'short_pants.n.01', 'synonyms': ['short_pants', 'shorts_(clothing)', 'trunks_(clothing)'], 'def': 'trousers that end at or above the knee', 'name': 'short_pants'}, {'frequency': 'r', 'id': 970, 'synset': 'shot_glass.n.01', 'synonyms': ['shot_glass'], 'def': 'a small glass adequate to hold a single swallow of whiskey', 'name': 'shot_glass'}, {'frequency': 'c', 'id': 971, 'synset': 'shoulder_bag.n.01', 'synonyms': ['shoulder_bag'], 'def': 'a large handbag that can be carried by a strap looped over the shoulder', 'name': 'shoulder_bag'}, {'frequency': 'c', 'id': 972, 'synset': 'shovel.n.01', 'synonyms': ['shovel'], 'def': 'a hand tool for lifting loose material such as snow, dirt, etc.', 'name': 'shovel'}, {'frequency': 'f', 'id': 973, 'synset': 'shower.n.01', 'synonyms': ['shower_head'], 'def': 'a plumbing fixture that sprays water over you', 'name': 'shower_head'}, {'frequency': 'f', 'id': 974, 'synset': 'shower_curtain.n.01', 'synonyms': ['shower_curtain'], 'def': 'a curtain that keeps water from splashing out of the shower area', 'name': 'shower_curtain'}, {'frequency': 'r', 'id': 975, 'synset': 'shredder.n.01', 'synonyms': ['shredder_(for_paper)'], 'def': 'a device that shreds documents', 'name': 'shredder_(for_paper)'}, {'frequency': 'r', 'id': 976, 'synset': 'sieve.n.01', 'synonyms': ['sieve', 'screen_(sieve)'], 'def': 'a strainer for separating lumps from powdered material or grading particles', 'name': 'sieve'}, {'frequency': 'f', 'id': 977, 'synset': 'signboard.n.01', 'synonyms': ['signboard'], 'def': 'structure displaying a board on which advertisements can be posted', 'name': 'signboard'}, {'frequency': 'c', 'id': 978, 'synset': 'silo.n.01', 'synonyms': ['silo'], 'def': 'a cylindrical tower used for storing goods', 'name': 'silo'}, {'frequency': 'f', 'id': 979, 'synset': 'sink.n.01', 'synonyms': ['sink'], 'def': 'plumbing fixture consisting of a water basin fixed to a wall or floor and having a drainpipe', 'name': 'sink'}, {'frequency': 'f', 'id': 980, 'synset': 'skateboard.n.01', 'synonyms': ['skateboard'], 'def': 'a board with wheels that is ridden in a standing or crouching position and propelled by foot', 'name': 'skateboard'}, {'frequency': 'c', 'id': 981, 'synset': 'skewer.n.01', 'synonyms': ['skewer'], 'def': 'a long pin for holding meat in position while it is being roasted', 'name': 'skewer'}, {'frequency': 'f', 'id': 982, 'synset': 'ski.n.01', 'synonyms': ['ski'], 'def': 'sports equipment for skiing on snow', 'name': 'ski'}, {'frequency': 'f', 'id': 983, 'synset': 'ski_boot.n.01', 'synonyms': ['ski_boot'], 'def': 'a stiff boot that is fastened to a ski with a ski binding', 'name': 'ski_boot'}, {'frequency': 'f', 'id': 984, 'synset': 'ski_parka.n.01', 'synonyms': ['ski_parka', 'ski_jacket'], 'def': 'a parka to be worn while skiing', 'name': 'ski_parka'}, {'frequency': 'f', 'id': 985, 'synset': 'ski_pole.n.01', 'synonyms': ['ski_pole'], 'def': 'a pole with metal points used as an aid in skiing', 'name': 'ski_pole'}, {'frequency': 'f', 'id': 986, 'synset': 'skirt.n.02', 'synonyms': ['skirt'], 'def': 'a garment hanging from the waist; worn mainly by girls and women', 'name': 'skirt'}, {'frequency': 'c', 'id': 987, 'synset': 'sled.n.01', 'synonyms': ['sled', 'sledge', 'sleigh'], 'def': 'a vehicle or flat object for transportation over snow by sliding or pulled by dogs, etc.', 'name': 'sled'}, {'frequency': 'c', 'id': 988, 'synset': 'sleeping_bag.n.01', 'synonyms': ['sleeping_bag'], 'def': 'large padded bag designed to be slept in outdoors', 'name': 'sleeping_bag'}, {'frequency': 'r', 'id': 989, 'synset': 'sling.n.05', 'synonyms': ['sling_(bandage)', 'triangular_bandage'], 'def': 'bandage to support an injured forearm; slung over the shoulder or neck', 'name': 'sling_(bandage)'}, {'frequency': 'c', 'id': 990, 'synset': 'slipper.n.01', 'synonyms': ['slipper_(footwear)', 'carpet_slipper_(footwear)'], 'def': 'low footwear that can be slipped on and off easily; usually worn indoors', 'name': 'slipper_(footwear)'}, {'frequency': 'r', 'id': 991, 'synset': 'smoothie.n.02', 'synonyms': ['smoothie'], 'def': 'a thick smooth drink consisting of fresh fruit pureed with ice cream or yoghurt or milk', 'name': 'smoothie'}, {'frequency': 'r', 'id': 992, 'synset': 'snake.n.01', 'synonyms': ['snake', 'serpent'], 'def': 'limbless scaly elongate reptile; some are venomous', 'name': 'snake'}, {'frequency': 'f', 'id': 993, 'synset': 'snowboard.n.01', 'synonyms': ['snowboard'], 'def': 'a board that resembles a broad ski or a small surfboard; used in a standing position to slide down snow-covered slopes', 'name': 'snowboard'}, {'frequency': 'c', 'id': 994, 'synset': 'snowman.n.01', 'synonyms': ['snowman'], 'def': 'a figure of a person made of packed snow', 'name': 'snowman'}, {'frequency': 'c', 'id': 995, 'synset': 'snowmobile.n.01', 'synonyms': ['snowmobile'], 'def': 'tracked vehicle for travel on snow having skis in front', 'name': 'snowmobile'}, {'frequency': 'f', 'id': 996, 'synset': 'soap.n.01', 'synonyms': ['soap'], 'def': 'a cleansing agent made from the salts of vegetable or animal fats', 'name': 'soap'}, {'frequency': 'f', 'id': 997, 'synset': 'soccer_ball.n.01', 'synonyms': ['soccer_ball'], 'def': "an inflated ball used in playing soccer (called `football' outside of the United States)", 'name': 'soccer_ball'}, {'frequency': 'f', 'id': 998, 'synset': 'sock.n.01', 'synonyms': ['sock'], 'def': 'cloth covering for the foot; worn inside the shoe; reaches to between the ankle and the knee', 'name': 'sock'}, {'frequency': 'r', 'id': 999, 'synset': 'soda_fountain.n.02', 'synonyms': ['soda_fountain'], 'def': 'an apparatus for dispensing soda water', 'name': 'soda_fountain'}, {'frequency': 'r', 'id': 1000, 'synset': 'soda_water.n.01', 'synonyms': ['carbonated_water', 'club_soda', 'seltzer', 'sparkling_water'], 'def': 'effervescent beverage artificially charged with carbon dioxide', 'name': 'carbonated_water'}, {'frequency': 'f', 'id': 1001, 'synset': 'sofa.n.01', 'synonyms': ['sofa', 'couch', 'lounge'], 'def': 'an upholstered seat for more than one person', 'name': 'sofa'}, {'frequency': 'r', 'id': 1002, 'synset': 'softball.n.01', 'synonyms': ['softball'], 'def': 'ball used in playing softball', 'name': 'softball'}, {'frequency': 'c', 'id': 1003, 'synset': 'solar_array.n.01', 'synonyms': ['solar_array', 'solar_battery', 'solar_panel'], 'def': 'electrical device consisting of a large array of connected solar cells', 'name': 'solar_array'}, {'frequency': 'r', 'id': 1004, 'synset': 'sombrero.n.02', 'synonyms': ['sombrero'], 'def': 'a straw hat with a tall crown and broad brim; worn in American southwest and in Mexico', 'name': 'sombrero'}, {'frequency': 'c', 'id': 1005, 'synset': 'soup.n.01', 'synonyms': ['soup'], 'def': 'liquid food especially of meat or fish or vegetable stock often containing pieces of solid food', 'name': 'soup'}, {'frequency': 'r', 'id': 1006, 'synset': 'soup_bowl.n.01', 'synonyms': ['soup_bowl'], 'def': 'a bowl for serving soup', 'name': 'soup_bowl'}, {'frequency': 'c', 'id': 1007, 'synset': 'soupspoon.n.01', 'synonyms': ['soupspoon'], 'def': 'a spoon with a rounded bowl for eating soup', 'name': 'soupspoon'}, {'frequency': 'c', 'id': 1008, 'synset': 'sour_cream.n.01', 'synonyms': ['sour_cream', 'soured_cream'], 'def': 'soured light cream', 'name': 'sour_cream'}, {'frequency': 'r', 'id': 1009, 'synset': 'soya_milk.n.01', 'synonyms': ['soya_milk', 'soybean_milk', 'soymilk'], 'def': 'a milk substitute containing soybean flour and water; used in some infant formulas and in making tofu', 'name': 'soya_milk'}, {'frequency': 'r', 'id': 1010, 'synset': 'space_shuttle.n.01', 'synonyms': ['space_shuttle'], 'def': "a reusable spacecraft with wings for a controlled descent through the Earth's atmosphere", 'name': 'space_shuttle'}, {'frequency': 'r', 'id': 1011, 'synset': 'sparkler.n.02', 'synonyms': ['sparkler_(fireworks)'], 'def': 'a firework that burns slowly and throws out a shower of sparks', 'name': 'sparkler_(fireworks)'}, {'frequency': 'f', 'id': 1012, 'synset': 'spatula.n.02', 'synonyms': ['spatula'], 'def': 'a hand tool with a thin flexible blade used to mix or spread soft substances', 'name': 'spatula'}, {'frequency': 'r', 'id': 1013, 'synset': 'spear.n.01', 'synonyms': ['spear', 'lance'], 'def': 'a long pointed rod used as a tool or weapon', 'name': 'spear'}, {'frequency': 'f', 'id': 1014, 'synset': 'spectacles.n.01', 'synonyms': ['spectacles', 'specs', 'eyeglasses', 'glasses'], 'def': 'optical instrument consisting of a frame that holds a pair of lenses for correcting defective vision', 'name': 'spectacles'}, {'frequency': 'c', 'id': 1015, 'synset': 'spice_rack.n.01', 'synonyms': ['spice_rack'], 'def': 'a rack for displaying containers filled with spices', 'name': 'spice_rack'}, {'frequency': 'r', 'id': 1016, 'synset': 'spider.n.01', 'synonyms': ['spider'], 'def': 'predatory arachnid with eight legs, two poison fangs, two feelers, and usually two silk-spinning organs at the back end of the body', 'name': 'spider'}, {'frequency': 'c', 'id': 1017, 'synset': 'sponge.n.01', 'synonyms': ['sponge'], 'def': 'a porous mass usable to absorb water typically used for cleaning', 'name': 'sponge'}, {'frequency': 'f', 'id': 1018, 'synset': 'spoon.n.01', 'synonyms': ['spoon'], 'def': 'a piece of cutlery with a shallow bowl-shaped container and a handle', 'name': 'spoon'}, {'frequency': 'c', 'id': 1019, 'synset': 'sportswear.n.01', 'synonyms': ['sportswear', 'athletic_wear', 'activewear'], 'def': 'attire worn for sport or for casual wear', 'name': 'sportswear'}, {'frequency': 'c', 'id': 1020, 'synset': 'spotlight.n.02', 'synonyms': ['spotlight'], 'def': 'a lamp that produces a strong beam of light to illuminate a restricted area; used to focus attention of a stage performer', 'name': 'spotlight'}, {'frequency': 'r', 'id': 1021, 'synset': 'squirrel.n.01', 'synonyms': ['squirrel'], 'def': 'a kind of arboreal rodent having a long bushy tail', 'name': 'squirrel'}, {'frequency': 'c', 'id': 1022, 'synset': 'stapler.n.01', 'synonyms': ['stapler_(stapling_machine)'], 'def': 'a machine that inserts staples into sheets of paper in order to fasten them together', 'name': 'stapler_(stapling_machine)'}, {'frequency': 'r', 'id': 1023, 'synset': 'starfish.n.01', 'synonyms': ['starfish', 'sea_star'], 'def': 'echinoderms characterized by five arms extending from a central disk', 'name': 'starfish'}, {'frequency': 'f', 'id': 1024, 'synset': 'statue.n.01', 'synonyms': ['statue_(sculpture)'], 'def': 'a sculpture representing a human or animal', 'name': 'statue_(sculpture)'}, {'frequency': 'c', 'id': 1025, 'synset': 'steak.n.01', 'synonyms': ['steak_(food)'], 'def': 'a slice of meat cut from the fleshy part of an animal or large fish', 'name': 'steak_(food)'}, {'frequency': 'r', 'id': 1026, 'synset': 'steak_knife.n.01', 'synonyms': ['steak_knife'], 'def': 'a sharp table knife used in eating steak', 'name': 'steak_knife'}, {'frequency': 'r', 'id': 1027, 'synset': 'steamer.n.02', 'synonyms': ['steamer_(kitchen_appliance)'], 'def': 'a cooking utensil that can be used to cook food by steaming it', 'name': 'steamer_(kitchen_appliance)'}, {'frequency': 'f', 'id': 1028, 'synset': 'steering_wheel.n.01', 'synonyms': ['steering_wheel'], 'def': 'a handwheel that is used for steering', 'name': 'steering_wheel'}, {'frequency': 'r', 'id': 1029, 'synset': 'stencil.n.01', 'synonyms': ['stencil'], 'def': 'a sheet of material (metal, plastic, etc.) that has been perforated with a pattern; ink or paint can pass through the perforations to create the printed pattern on the surface below', 'name': 'stencil'}, {'frequency': 'r', 'id': 1030, 'synset': 'step_ladder.n.01', 'synonyms': ['stepladder'], 'def': 'a folding portable ladder hinged at the top', 'name': 'stepladder'}, {'frequency': 'c', 'id': 1031, 'synset': 'step_stool.n.01', 'synonyms': ['step_stool'], 'def': 'a stool that has one or two steps that fold under the seat', 'name': 'step_stool'}, {'frequency': 'c', 'id': 1032, 'synset': 'stereo.n.01', 'synonyms': ['stereo_(sound_system)'], 'def': 'electronic device for playing audio', 'name': 'stereo_(sound_system)'}, {'frequency': 'r', 'id': 1033, 'synset': 'stew.n.02', 'synonyms': ['stew'], 'def': 'food prepared by stewing especially meat or fish with vegetables', 'name': 'stew'}, {'frequency': 'r', 'id': 1034, 'synset': 'stirrer.n.02', 'synonyms': ['stirrer'], 'def': 'an implement used for stirring', 'name': 'stirrer'}, {'frequency': 'f', 'id': 1035, 'synset': 'stirrup.n.01', 'synonyms': ['stirrup'], 'def': "support consisting of metal loops into which rider's feet go", 'name': 'stirrup'}, {'frequency': 'c', 'id': 1036, 'synset': 'stocking.n.01', 'synonyms': ['stockings_(leg_wear)'], 'def': 'close-fitting hosiery to cover the foot and leg; come in matched pairs', 'name': 'stockings_(leg_wear)'}, {'frequency': 'f', 'id': 1037, 'synset': 'stool.n.01', 'synonyms': ['stool'], 'def': 'a simple seat without a back or arms', 'name': 'stool'}, {'frequency': 'f', 'id': 1038, 'synset': 'stop_sign.n.01', 'synonyms': ['stop_sign'], 'def': 'a traffic sign to notify drivers that they must come to a complete stop', 'name': 'stop_sign'}, {'frequency': 'f', 'id': 1039, 'synset': 'stoplight.n.01', 'synonyms': ['brake_light'], 'def': 'a red light on the rear of a motor vehicle that signals when the brakes are applied', 'name': 'brake_light'}, {'frequency': 'f', 'id': 1040, 'synset': 'stove.n.01', 'synonyms': ['stove', 'kitchen_stove', 'range_(kitchen_appliance)', 'kitchen_range', 'cooking_stove'], 'def': 'a kitchen appliance used for cooking food', 'name': 'stove'}, {'frequency': 'c', 'id': 1041, 'synset': 'strainer.n.01', 'synonyms': ['strainer'], 'def': 'a filter to retain larger pieces while smaller pieces and liquids pass through', 'name': 'strainer'}, {'frequency': 'f', 'id': 1042, 'synset': 'strap.n.01', 'synonyms': ['strap'], 'def': 'an elongated strip of material for binding things together or holding', 'name': 'strap'}, {'frequency': 'f', 'id': 1043, 'synset': 'straw.n.04', 'synonyms': ['straw_(for_drinking)', 'drinking_straw'], 'def': 'a thin paper or plastic tube used to suck liquids into the mouth', 'name': 'straw_(for_drinking)'}, {'frequency': 'f', 'id': 1044, 'synset': 'strawberry.n.01', 'synonyms': ['strawberry'], 'def': 'sweet fleshy red fruit', 'name': 'strawberry'}, {'frequency': 'f', 'id': 1045, 'synset': 'street_sign.n.01', 'synonyms': ['street_sign'], 'def': 'a sign visible from the street', 'name': 'street_sign'}, {'frequency': 'f', 'id': 1046, 'synset': 'streetlight.n.01', 'synonyms': ['streetlight', 'street_lamp'], 'def': 'a lamp supported on a lamppost; for illuminating a street', 'name': 'streetlight'}, {'frequency': 'r', 'id': 1047, 'synset': 'string_cheese.n.01', 'synonyms': ['string_cheese'], 'def': 'cheese formed in long strings twisted together', 'name': 'string_cheese'}, {'frequency': 'r', 'id': 1048, 'synset': 'stylus.n.02', 'synonyms': ['stylus'], 'def': 'a pointed tool for writing or drawing or engraving', 'name': 'stylus'}, {'frequency': 'r', 'id': 1049, 'synset': 'subwoofer.n.01', 'synonyms': ['subwoofer'], 'def': 'a loudspeaker that is designed to reproduce very low bass frequencies', 'name': 'subwoofer'}, {'frequency': 'r', 'id': 1050, 'synset': 'sugar_bowl.n.01', 'synonyms': ['sugar_bowl'], 'def': 'a dish in which sugar is served', 'name': 'sugar_bowl'}, {'frequency': 'r', 'id': 1051, 'synset': 'sugarcane.n.01', 'synonyms': ['sugarcane_(plant)'], 'def': 'juicy canes whose sap is a source of molasses and commercial sugar; fresh canes are sometimes chewed for the juice', 'name': 'sugarcane_(plant)'}, {'frequency': 'c', 'id': 1052, 'synset': 'suit.n.01', 'synonyms': ['suit_(clothing)'], 'def': 'a set of garments (usually including a jacket and trousers or skirt) for outerwear all of the same fabric and color', 'name': 'suit_(clothing)'}, {'frequency': 'c', 'id': 1053, 'synset': 'sunflower.n.01', 'synonyms': ['sunflower'], 'def': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'name': 'sunflower'}, {'frequency': 'f', 'id': 1054, 'synset': 'sunglasses.n.01', 'synonyms': ['sunglasses'], 'def': 'spectacles that are darkened or polarized to protect the eyes from the glare of the sun', 'name': 'sunglasses'}, {'frequency': 'c', 'id': 1055, 'synset': 'sunhat.n.01', 'synonyms': ['sunhat'], 'def': 'a hat with a broad brim that protects the face from direct exposure to the sun', 'name': 'sunhat'}, {'frequency': 'r', 'id': 1056, 'synset': 'sunscreen.n.01', 'synonyms': ['sunscreen', 'sunblock'], 'def': 'a cream spread on the skin; contains a chemical to filter out ultraviolet light and so protect from sunburn', 'name': 'sunscreen'}, {'frequency': 'f', 'id': 1057, 'synset': 'surfboard.n.01', 'synonyms': ['surfboard'], 'def': 'a narrow buoyant board for riding surf', 'name': 'surfboard'}, {'frequency': 'c', 'id': 1058, 'synset': 'sushi.n.01', 'synonyms': ['sushi'], 'def': 'rice (with raw fish) wrapped in seaweed', 'name': 'sushi'}, {'frequency': 'c', 'id': 1059, 'synset': 'swab.n.02', 'synonyms': ['mop'], 'def': 'cleaning implement consisting of absorbent material fastened to a handle; for cleaning floors', 'name': 'mop'}, {'frequency': 'c', 'id': 1060, 'synset': 'sweat_pants.n.01', 'synonyms': ['sweat_pants'], 'def': 'loose-fitting trousers with elastic cuffs; worn by athletes', 'name': 'sweat_pants'}, {'frequency': 'c', 'id': 1061, 'synset': 'sweatband.n.02', 'synonyms': ['sweatband'], 'def': 'a band of material tied around the forehead or wrist to absorb sweat', 'name': 'sweatband'}, {'frequency': 'f', 'id': 1062, 'synset': 'sweater.n.01', 'synonyms': ['sweater'], 'def': 'a crocheted or knitted garment covering the upper part of the body', 'name': 'sweater'}, {'frequency': 'f', 'id': 1063, 'synset': 'sweatshirt.n.01', 'synonyms': ['sweatshirt'], 'def': 'cotton knit pullover with long sleeves worn during athletic activity', 'name': 'sweatshirt'}, {'frequency': 'c', 'id': 1064, 'synset': 'sweet_potato.n.02', 'synonyms': ['sweet_potato'], 'def': 'the edible tuberous root of the sweet potato vine', 'name': 'sweet_potato'}, {'frequency': 'f', 'id': 1065, 'synset': 'swimsuit.n.01', 'synonyms': ['swimsuit', 'swimwear', 'bathing_suit', 'swimming_costume', 'bathing_costume', 'swimming_trunks', 'bathing_trunks'], 'def': 'garment worn for swimming', 'name': 'swimsuit'}, {'frequency': 'c', 'id': 1066, 'synset': 'sword.n.01', 'synonyms': ['sword'], 'def': 'a cutting or thrusting weapon that has a long metal blade', 'name': 'sword'}, {'frequency': 'r', 'id': 1067, 'synset': 'syringe.n.01', 'synonyms': ['syringe'], 'def': 'a medical instrument used to inject or withdraw fluids', 'name': 'syringe'}, {'frequency': 'r', 'id': 1068, 'synset': 'tabasco.n.02', 'synonyms': ['Tabasco_sauce'], 'def': 'very spicy sauce (trade name Tabasco) made from fully-aged red peppers', 'name': 'Tabasco_sauce'}, {'frequency': 'r', 'id': 1069, 'synset': 'table-tennis_table.n.01', 'synonyms': ['table-tennis_table', 'ping-pong_table'], 'def': 'a table used for playing table tennis', 'name': 'table-tennis_table'}, {'frequency': 'f', 'id': 1070, 'synset': 'table.n.02', 'synonyms': ['table'], 'def': 'a piece of furniture having a smooth flat top that is usually supported by one or more vertical legs', 'name': 'table'}, {'frequency': 'c', 'id': 1071, 'synset': 'table_lamp.n.01', 'synonyms': ['table_lamp'], 'def': 'a lamp that sits on a table', 'name': 'table_lamp'}, {'frequency': 'f', 'id': 1072, 'synset': 'tablecloth.n.01', 'synonyms': ['tablecloth'], 'def': 'a covering spread over a dining table', 'name': 'tablecloth'}, {'frequency': 'r', 'id': 1073, 'synset': 'tachometer.n.01', 'synonyms': ['tachometer'], 'def': 'measuring instrument for indicating speed of rotation', 'name': 'tachometer'}, {'frequency': 'r', 'id': 1074, 'synset': 'taco.n.02', 'synonyms': ['taco'], 'def': 'a small tortilla cupped around a filling', 'name': 'taco'}, {'frequency': 'f', 'id': 1075, 'synset': 'tag.n.02', 'synonyms': ['tag'], 'def': 'a label associated with something for the purpose of identification or information', 'name': 'tag'}, {'frequency': 'f', 'id': 1076, 'synset': 'taillight.n.01', 'synonyms': ['taillight', 'rear_light'], 'def': 'lamp (usually red) mounted at the rear of a motor vehicle', 'name': 'taillight'}, {'frequency': 'r', 'id': 1077, 'synset': 'tambourine.n.01', 'synonyms': ['tambourine'], 'def': 'a shallow drum with a single drumhead and with metallic disks in the sides', 'name': 'tambourine'}, {'frequency': 'r', 'id': 1078, 'synset': 'tank.n.01', 'synonyms': ['army_tank', 'armored_combat_vehicle', 'armoured_combat_vehicle'], 'def': 'an enclosed armored military vehicle; has a cannon and moves on caterpillar treads', 'name': 'army_tank'}, {'frequency': 'c', 'id': 1079, 'synset': 'tank.n.02', 'synonyms': ['tank_(storage_vessel)', 'storage_tank'], 'def': 'a large (usually metallic) vessel for holding gases or liquids', 'name': 'tank_(storage_vessel)'}, {'frequency': 'f', 'id': 1080, 'synset': 'tank_top.n.01', 'synonyms': ['tank_top_(clothing)'], 'def': 'a tight-fitting sleeveless shirt with wide shoulder straps and low neck and no front opening', 'name': 'tank_top_(clothing)'}, {'frequency': 'c', 'id': 1081, 'synset': 'tape.n.01', 'synonyms': ['tape_(sticky_cloth_or_paper)'], 'def': 'a long thin piece of cloth or paper as used for binding or fastening', 'name': 'tape_(sticky_cloth_or_paper)'}, {'frequency': 'c', 'id': 1082, 'synset': 'tape.n.04', 'synonyms': ['tape_measure', 'measuring_tape'], 'def': 'measuring instrument consisting of a narrow strip (cloth or metal) marked in inches or centimeters and used for measuring lengths', 'name': 'tape_measure'}, {'frequency': 'c', 'id': 1083, 'synset': 'tapestry.n.02', 'synonyms': ['tapestry'], 'def': 'a heavy textile with a woven design; used for curtains and upholstery', 'name': 'tapestry'}, {'frequency': 'f', 'id': 1084, 'synset': 'tarpaulin.n.01', 'synonyms': ['tarp'], 'def': 'waterproofed canvas', 'name': 'tarp'}, {'frequency': 'c', 'id': 1085, 'synset': 'tartan.n.01', 'synonyms': ['tartan', 'plaid'], 'def': 'a cloth having a crisscross design', 'name': 'tartan'}, {'frequency': 'c', 'id': 1086, 'synset': 'tassel.n.01', 'synonyms': ['tassel'], 'def': 'adornment consisting of a bunch of cords fastened at one end', 'name': 'tassel'}, {'frequency': 'r', 'id': 1087, 'synset': 'tea_bag.n.01', 'synonyms': ['tea_bag'], 'def': 'a measured amount of tea in a bag for an individual serving of tea', 'name': 'tea_bag'}, {'frequency': 'c', 'id': 1088, 'synset': 'teacup.n.02', 'synonyms': ['teacup'], 'def': 'a cup from which tea is drunk', 'name': 'teacup'}, {'frequency': 'c', 'id': 1089, 'synset': 'teakettle.n.01', 'synonyms': ['teakettle'], 'def': 'kettle for boiling water to make tea', 'name': 'teakettle'}, {'frequency': 'c', 'id': 1090, 'synset': 'teapot.n.01', 'synonyms': ['teapot'], 'def': 'pot for brewing tea; usually has a spout and handle', 'name': 'teapot'}, {'frequency': 'f', 'id': 1091, 'synset': 'teddy.n.01', 'synonyms': ['teddy_bear'], 'def': "plaything consisting of a child's toy bear (usually plush and stuffed with soft materials)", 'name': 'teddy_bear'}, {'frequency': 'f', 'id': 1092, 'synset': 'telephone.n.01', 'synonyms': ['telephone', 'phone', 'telephone_set'], 'def': 'electronic device for communicating by voice over long distances', 'name': 'telephone'}, {'frequency': 'c', 'id': 1093, 'synset': 'telephone_booth.n.01', 'synonyms': ['telephone_booth', 'phone_booth', 'call_box', 'telephone_box', 'telephone_kiosk'], 'def': 'booth for using a telephone', 'name': 'telephone_booth'}, {'frequency': 'f', 'id': 1094, 'synset': 'telephone_pole.n.01', 'synonyms': ['telephone_pole', 'telegraph_pole', 'telegraph_post'], 'def': 'tall pole supporting telephone wires', 'name': 'telephone_pole'}, {'frequency': 'r', 'id': 1095, 'synset': 'telephoto_lens.n.01', 'synonyms': ['telephoto_lens', 'zoom_lens'], 'def': 'a camera lens that magnifies the image', 'name': 'telephoto_lens'}, {'frequency': 'c', 'id': 1096, 'synset': 'television_camera.n.01', 'synonyms': ['television_camera', 'tv_camera'], 'def': 'television equipment for capturing and recording video', 'name': 'television_camera'}, {'frequency': 'f', 'id': 1097, 'synset': 'television_receiver.n.01', 'synonyms': ['television_set', 'tv', 'tv_set'], 'def': 'an electronic device that receives television signals and displays them on a screen', 'name': 'television_set'}, {'frequency': 'f', 'id': 1098, 'synset': 'tennis_ball.n.01', 'synonyms': ['tennis_ball'], 'def': 'ball about the size of a fist used in playing tennis', 'name': 'tennis_ball'}, {'frequency': 'f', 'id': 1099, 'synset': 'tennis_racket.n.01', 'synonyms': ['tennis_racket'], 'def': 'a racket used to play tennis', 'name': 'tennis_racket'}, {'frequency': 'r', 'id': 1100, 'synset': 'tequila.n.01', 'synonyms': ['tequila'], 'def': 'Mexican liquor made from fermented juices of an agave plant', 'name': 'tequila'}, {'frequency': 'c', 'id': 1101, 'synset': 'thermometer.n.01', 'synonyms': ['thermometer'], 'def': 'measuring instrument for measuring temperature', 'name': 'thermometer'}, {'frequency': 'c', 'id': 1102, 'synset': 'thermos.n.01', 'synonyms': ['thermos_bottle'], 'def': 'vacuum flask that preserves temperature of hot or cold drinks', 'name': 'thermos_bottle'}, {'frequency': 'c', 'id': 1103, 'synset': 'thermostat.n.01', 'synonyms': ['thermostat'], 'def': 'a regulator for automatically regulating temperature by starting or stopping the supply of heat', 'name': 'thermostat'}, {'frequency': 'r', 'id': 1104, 'synset': 'thimble.n.02', 'synonyms': ['thimble'], 'def': 'a small metal cap to protect the finger while sewing; can be used as a small container', 'name': 'thimble'}, {'frequency': 'c', 'id': 1105, 'synset': 'thread.n.01', 'synonyms': ['thread', 'yarn'], 'def': 'a fine cord of twisted fibers (of cotton or silk or wool or nylon etc.) used in sewing and weaving', 'name': 'thread'}, {'frequency': 'c', 'id': 1106, 'synset': 'thumbtack.n.01', 'synonyms': ['thumbtack', 'drawing_pin', 'pushpin'], 'def': 'a tack for attaching papers to a bulletin board or drawing board', 'name': 'thumbtack'}, {'frequency': 'c', 'id': 1107, 'synset': 'tiara.n.01', 'synonyms': ['tiara'], 'def': 'a jeweled headdress worn by women on formal occasions', 'name': 'tiara'}, {'frequency': 'c', 'id': 1108, 'synset': 'tiger.n.02', 'synonyms': ['tiger'], 'def': 'large feline of forests in most of Asia having a tawny coat with black stripes', 'name': 'tiger'}, {'frequency': 'c', 'id': 1109, 'synset': 'tights.n.01', 'synonyms': ['tights_(clothing)', 'leotards'], 'def': 'skintight knit hose covering the body from the waist to the feet worn by acrobats and dancers and as stockings by women and girls', 'name': 'tights_(clothing)'}, {'frequency': 'c', 'id': 1110, 'synset': 'timer.n.01', 'synonyms': ['timer', 'stopwatch'], 'def': 'a timepiece that measures a time interval and signals its end', 'name': 'timer'}, {'frequency': 'f', 'id': 1111, 'synset': 'tinfoil.n.01', 'synonyms': ['tinfoil'], 'def': 'foil made of tin or an alloy of tin and lead', 'name': 'tinfoil'}, {'frequency': 'r', 'id': 1112, 'synset': 'tinsel.n.01', 'synonyms': ['tinsel'], 'def': 'a showy decoration that is basically valueless', 'name': 'tinsel'}, {'frequency': 'f', 'id': 1113, 'synset': 'tissue.n.02', 'synonyms': ['tissue_paper'], 'def': 'a soft thin (usually translucent) paper', 'name': 'tissue_paper'}, {'frequency': 'c', 'id': 1114, 'synset': 'toast.n.01', 'synonyms': ['toast_(food)'], 'def': 'slice of bread that has been toasted', 'name': 'toast_(food)'}, {'frequency': 'f', 'id': 1115, 'synset': 'toaster.n.02', 'synonyms': ['toaster'], 'def': 'a kitchen appliance (usually electric) for toasting bread', 'name': 'toaster'}, {'frequency': 'c', 'id': 1116, 'synset': 'toaster_oven.n.01', 'synonyms': ['toaster_oven'], 'def': 'kitchen appliance consisting of a small electric oven for toasting or warming food', 'name': 'toaster_oven'}, {'frequency': 'f', 'id': 1117, 'synset': 'toilet.n.02', 'synonyms': ['toilet'], 'def': 'a plumbing fixture for defecation and urination', 'name': 'toilet'}, {'frequency': 'f', 'id': 1118, 'synset': 'toilet_tissue.n.01', 'synonyms': ['toilet_tissue', 'toilet_paper', 'bathroom_tissue'], 'def': 'a soft thin absorbent paper for use in toilets', 'name': 'toilet_tissue'}, {'frequency': 'f', 'id': 1119, 'synset': 'tomato.n.01', 'synonyms': ['tomato'], 'def': 'mildly acid red or yellow pulpy fruit eaten as a vegetable', 'name': 'tomato'}, {'frequency': 'c', 'id': 1120, 'synset': 'tongs.n.01', 'synonyms': ['tongs'], 'def': 'any of various devices for taking hold of objects; usually have two hinged legs with handles above and pointed hooks below', 'name': 'tongs'}, {'frequency': 'c', 'id': 1121, 'synset': 'toolbox.n.01', 'synonyms': ['toolbox'], 'def': 'a box or chest or cabinet for holding hand tools', 'name': 'toolbox'}, {'frequency': 'f', 'id': 1122, 'synset': 'toothbrush.n.01', 'synonyms': ['toothbrush'], 'def': 'small brush; has long handle; used to clean teeth', 'name': 'toothbrush'}, {'frequency': 'f', 'id': 1123, 'synset': 'toothpaste.n.01', 'synonyms': ['toothpaste'], 'def': 'a dentifrice in the form of a paste', 'name': 'toothpaste'}, {'frequency': 'c', 'id': 1124, 'synset': 'toothpick.n.01', 'synonyms': ['toothpick'], 'def': 'pick consisting of a small strip of wood or plastic; used to pick food from between the teeth', 'name': 'toothpick'}, {'frequency': 'c', 'id': 1125, 'synset': 'top.n.09', 'synonyms': ['cover'], 'def': 'covering for a hole (especially a hole in the top of a container)', 'name': 'cover'}, {'frequency': 'c', 'id': 1126, 'synset': 'tortilla.n.01', 'synonyms': ['tortilla'], 'def': 'thin unleavened pancake made from cornmeal or wheat flour', 'name': 'tortilla'}, {'frequency': 'c', 'id': 1127, 'synset': 'tow_truck.n.01', 'synonyms': ['tow_truck'], 'def': 'a truck equipped to hoist and pull wrecked cars (or to remove cars from no-parking zones)', 'name': 'tow_truck'}, {'frequency': 'f', 'id': 1128, 'synset': 'towel.n.01', 'synonyms': ['towel'], 'def': 'a rectangular piece of absorbent cloth (or paper) for drying or wiping', 'name': 'towel'}, {'frequency': 'f', 'id': 1129, 'synset': 'towel_rack.n.01', 'synonyms': ['towel_rack', 'towel_rail', 'towel_bar'], 'def': 'a rack consisting of one or more bars on which towels can be hung', 'name': 'towel_rack'}, {'frequency': 'f', 'id': 1130, 'synset': 'toy.n.03', 'synonyms': ['toy'], 'def': 'a device regarded as providing amusement', 'name': 'toy'}, {'frequency': 'c', 'id': 1131, 'synset': 'tractor.n.01', 'synonyms': ['tractor_(farm_equipment)'], 'def': 'a wheeled vehicle with large wheels; used in farming and other applications', 'name': 'tractor_(farm_equipment)'}, {'frequency': 'f', 'id': 1132, 'synset': 'traffic_light.n.01', 'synonyms': ['traffic_light'], 'def': 'a device to control vehicle traffic often consisting of three or more lights', 'name': 'traffic_light'}, {'frequency': 'r', 'id': 1133, 'synset': 'trail_bike.n.01', 'synonyms': ['dirt_bike'], 'def': 'a lightweight motorcycle equipped with rugged tires and suspension for off-road use', 'name': 'dirt_bike'}, {'frequency': 'c', 'id': 1134, 'synset': 'trailer_truck.n.01', 'synonyms': ['trailer_truck', 'tractor_trailer', 'trucking_rig', 'articulated_lorry', 'semi_truck'], 'def': 'a truck consisting of a tractor and trailer together', 'name': 'trailer_truck'}, {'frequency': 'f', 'id': 1135, 'synset': 'train.n.01', 'synonyms': ['train_(railroad_vehicle)', 'railroad_train'], 'def': 'public or private transport provided by a line of railway cars coupled together and drawn by a locomotive', 'name': 'train_(railroad_vehicle)'}, {'frequency': 'r', 'id': 1136, 'synset': 'trampoline.n.01', 'synonyms': ['trampoline'], 'def': 'gymnastic apparatus consisting of a strong canvas sheet attached with springs to a metal frame', 'name': 'trampoline'}, {'frequency': 'f', 'id': 1137, 'synset': 'tray.n.01', 'synonyms': ['tray'], 'def': 'an open receptacle for holding or displaying or serving articles or food', 'name': 'tray'}, {'frequency': 'r', 'id': 1138, 'synset': 'tree_house.n.01', 'synonyms': ['tree_house'], 'def': '(NOT A TREE) a PLAYHOUSE built in the branches of a tree', 'name': 'tree_house'}, {'frequency': 'r', 'id': 1139, 'synset': 'trench_coat.n.01', 'synonyms': ['trench_coat'], 'def': 'a military style raincoat; belted with deep pockets', 'name': 'trench_coat'}, {'frequency': 'r', 'id': 1140, 'synset': 'triangle.n.05', 'synonyms': ['triangle_(musical_instrument)'], 'def': 'a percussion instrument consisting of a metal bar bent in the shape of an open triangle', 'name': 'triangle_(musical_instrument)'}, {'frequency': 'r', 'id': 1141, 'synset': 'tricycle.n.01', 'synonyms': ['tricycle'], 'def': 'a vehicle with three wheels that is moved by foot pedals', 'name': 'tricycle'}, {'frequency': 'c', 'id': 1142, 'synset': 'tripod.n.01', 'synonyms': ['tripod'], 'def': 'a three-legged rack used for support', 'name': 'tripod'}, {'frequency': 'f', 'id': 1143, 'synset': 'trouser.n.01', 'synonyms': ['trousers', 'pants_(clothing)'], 'def': 'a garment extending from the waist to the knee or ankle, covering each leg separately', 'name': 'trousers'}, {'frequency': 'f', 'id': 1144, 'synset': 'truck.n.01', 'synonyms': ['truck'], 'def': 'an automotive vehicle suitable for hauling', 'name': 'truck'}, {'frequency': 'r', 'id': 1145, 'synset': 'truffle.n.03', 'synonyms': ['truffle_(chocolate)', 'chocolate_truffle'], 'def': 'creamy chocolate candy', 'name': 'truffle_(chocolate)'}, {'frequency': 'c', 'id': 1146, 'synset': 'trunk.n.02', 'synonyms': ['trunk'], 'def': 'luggage consisting of a large strong case used when traveling or for storage', 'name': 'trunk'}, {'frequency': 'r', 'id': 1147, 'synset': 'tub.n.02', 'synonyms': ['vat'], 'def': 'a large open vessel for holding or storing liquids', 'name': 'vat'}, {'frequency': 'c', 'id': 1148, 'synset': 'turban.n.01', 'synonyms': ['turban'], 'def': 'a traditional headdress consisting of a long scarf wrapped around the head', 'name': 'turban'}, {'frequency': 'r', 'id': 1149, 'synset': 'turkey.n.01', 'synonyms': ['turkey_(bird)'], 'def': 'large gallinaceous bird with fan-shaped tail; widely domesticated for food', 'name': 'turkey_(bird)'}, {'frequency': 'c', 'id': 1150, 'synset': 'turkey.n.04', 'synonyms': ['turkey_(food)'], 'def': 'flesh of large domesticated fowl usually roasted', 'name': 'turkey_(food)'}, {'frequency': 'r', 'id': 1151, 'synset': 'turnip.n.01', 'synonyms': ['turnip'], 'def': 'widely cultivated plant having a large fleshy edible white or yellow root', 'name': 'turnip'}, {'frequency': 'c', 'id': 1152, 'synset': 'turtle.n.02', 'synonyms': ['turtle'], 'def': 'any of various aquatic and land reptiles having a bony shell and flipper-like limbs for swimming', 'name': 'turtle'}, {'frequency': 'r', 'id': 1153, 'synset': 'turtleneck.n.01', 'synonyms': ['turtleneck_(clothing)', 'polo-neck'], 'def': 'a sweater or jersey with a high close-fitting collar', 'name': 'turtleneck_(clothing)'}, {'frequency': 'r', 'id': 1154, 'synset': 'typewriter.n.01', 'synonyms': ['typewriter'], 'def': 'hand-operated character printer for printing written messages one character at a time', 'name': 'typewriter'}, {'frequency': 'f', 'id': 1155, 'synset': 'umbrella.n.01', 'synonyms': ['umbrella'], 'def': 'a lightweight handheld collapsible canopy', 'name': 'umbrella'}, {'frequency': 'c', 'id': 1156, 'synset': 'underwear.n.01', 'synonyms': ['underwear', 'underclothes', 'underclothing', 'underpants'], 'def': 'undergarment worn next to the skin and under the outer garments', 'name': 'underwear'}, {'frequency': 'r', 'id': 1157, 'synset': 'unicycle.n.01', 'synonyms': ['unicycle'], 'def': 'a vehicle with a single wheel that is driven by pedals', 'name': 'unicycle'}, {'frequency': 'c', 'id': 1158, 'synset': 'urinal.n.01', 'synonyms': ['urinal'], 'def': 'a plumbing fixture (usually attached to the wall) used by men to urinate', 'name': 'urinal'}, {'frequency': 'r', 'id': 1159, 'synset': 'urn.n.01', 'synonyms': ['urn'], 'def': 'a large vase that usually has a pedestal or feet', 'name': 'urn'}, {'frequency': 'c', 'id': 1160, 'synset': 'vacuum.n.04', 'synonyms': ['vacuum_cleaner'], 'def': 'an electrical home appliance that cleans by suction', 'name': 'vacuum_cleaner'}, {'frequency': 'c', 'id': 1161, 'synset': 'valve.n.03', 'synonyms': ['valve'], 'def': 'control consisting of a mechanical device for controlling the flow of a fluid', 'name': 'valve'}, {'frequency': 'f', 'id': 1162, 'synset': 'vase.n.01', 'synonyms': ['vase'], 'def': 'an open jar of glass or porcelain used as an ornament or to hold flowers', 'name': 'vase'}, {'frequency': 'c', 'id': 1163, 'synset': 'vending_machine.n.01', 'synonyms': ['vending_machine'], 'def': 'a slot machine for selling goods', 'name': 'vending_machine'}, {'frequency': 'f', 'id': 1164, 'synset': 'vent.n.01', 'synonyms': ['vent', 'blowhole', 'air_vent'], 'def': 'a hole for the escape of gas or air', 'name': 'vent'}, {'frequency': 'c', 'id': 1165, 'synset': 'videotape.n.01', 'synonyms': ['videotape'], 'def': 'a video recording made on magnetic tape', 'name': 'videotape'}, {'frequency': 'r', 'id': 1166, 'synset': 'vinegar.n.01', 'synonyms': ['vinegar'], 'def': 'sour-tasting liquid produced usually by oxidation of the alcohol in wine or cider and used as a condiment or food preservative', 'name': 'vinegar'}, {'frequency': 'r', 'id': 1167, 'synset': 'violin.n.01', 'synonyms': ['violin', 'fiddle'], 'def': 'bowed stringed instrument that is the highest member of the violin family', 'name': 'violin'}, {'frequency': 'r', 'id': 1168, 'synset': 'vodka.n.01', 'synonyms': ['vodka'], 'def': 'unaged colorless liquor originating in Russia', 'name': 'vodka'}, {'frequency': 'r', 'id': 1169, 'synset': 'volleyball.n.02', 'synonyms': ['volleyball'], 'def': 'an inflated ball used in playing volleyball', 'name': 'volleyball'}, {'frequency': 'r', 'id': 1170, 'synset': 'vulture.n.01', 'synonyms': ['vulture'], 'def': 'any of various large birds of prey having naked heads and weak claws and feeding chiefly on carrion', 'name': 'vulture'}, {'frequency': 'c', 'id': 1171, 'synset': 'waffle.n.01', 'synonyms': ['waffle'], 'def': 'pancake batter baked in a waffle iron', 'name': 'waffle'}, {'frequency': 'r', 'id': 1172, 'synset': 'waffle_iron.n.01', 'synonyms': ['waffle_iron'], 'def': 'a kitchen appliance for baking waffles', 'name': 'waffle_iron'}, {'frequency': 'c', 'id': 1173, 'synset': 'wagon.n.01', 'synonyms': ['wagon'], 'def': 'any of various kinds of wheeled vehicles drawn by an animal or a tractor', 'name': 'wagon'}, {'frequency': 'c', 'id': 1174, 'synset': 'wagon_wheel.n.01', 'synonyms': ['wagon_wheel'], 'def': 'a wheel of a wagon', 'name': 'wagon_wheel'}, {'frequency': 'c', 'id': 1175, 'synset': 'walking_stick.n.01', 'synonyms': ['walking_stick'], 'def': 'a stick carried in the hand for support in walking', 'name': 'walking_stick'}, {'frequency': 'c', 'id': 1176, 'synset': 'wall_clock.n.01', 'synonyms': ['wall_clock'], 'def': 'a clock mounted on a wall', 'name': 'wall_clock'}, {'frequency': 'f', 'id': 1177, 'synset': 'wall_socket.n.01', 'synonyms': ['wall_socket', 'wall_plug', 'electric_outlet', 'electrical_outlet', 'outlet', 'electric_receptacle'], 'def': 'receptacle providing a place in a wiring system where current can be taken to run electrical devices', 'name': 'wall_socket'}, {'frequency': 'c', 'id': 1178, 'synset': 'wallet.n.01', 'synonyms': ['wallet', 'billfold'], 'def': 'a pocket-size case for holding papers and paper money', 'name': 'wallet'}, {'frequency': 'r', 'id': 1179, 'synset': 'walrus.n.01', 'synonyms': ['walrus'], 'def': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'name': 'walrus'}, {'frequency': 'r', 'id': 1180, 'synset': 'wardrobe.n.01', 'synonyms': ['wardrobe'], 'def': 'a tall piece of furniture that provides storage space for clothes; has a door and rails or hooks for hanging clothes', 'name': 'wardrobe'}, {'frequency': 'r', 'id': 1181, 'synset': 'wasabi.n.02', 'synonyms': ['wasabi'], 'def': 'the thick green root of the wasabi plant that the Japanese use in cooking and that tastes like strong horseradish', 'name': 'wasabi'}, {'frequency': 'c', 'id': 1182, 'synset': 'washer.n.03', 'synonyms': ['automatic_washer', 'washing_machine'], 'def': 'a home appliance for washing clothes and linens automatically', 'name': 'automatic_washer'}, {'frequency': 'f', 'id': 1183, 'synset': 'watch.n.01', 'synonyms': ['watch', 'wristwatch'], 'def': 'a small, portable timepiece', 'name': 'watch'}, {'frequency': 'f', 'id': 1184, 'synset': 'water_bottle.n.01', 'synonyms': ['water_bottle'], 'def': 'a bottle for holding water', 'name': 'water_bottle'}, {'frequency': 'c', 'id': 1185, 'synset': 'water_cooler.n.01', 'synonyms': ['water_cooler'], 'def': 'a device for cooling and dispensing drinking water', 'name': 'water_cooler'}, {'frequency': 'c', 'id': 1186, 'synset': 'water_faucet.n.01', 'synonyms': ['water_faucet', 'water_tap', 'tap_(water_faucet)'], 'def': 'a faucet for drawing water from a pipe or cask', 'name': 'water_faucet'}, {'frequency': 'r', 'id': 1187, 'synset': 'water_filter.n.01', 'synonyms': ['water_filter'], 'def': 'a filter to remove impurities from the water supply', 'name': 'water_filter'}, {'frequency': 'r', 'id': 1188, 'synset': 'water_heater.n.01', 'synonyms': ['water_heater', 'hot-water_heater'], 'def': 'a heater and storage tank to supply heated water', 'name': 'water_heater'}, {'frequency': 'r', 'id': 1189, 'synset': 'water_jug.n.01', 'synonyms': ['water_jug'], 'def': 'a jug that holds water', 'name': 'water_jug'}, {'frequency': 'r', 'id': 1190, 'synset': 'water_pistol.n.01', 'synonyms': ['water_gun', 'squirt_gun'], 'def': 'plaything consisting of a toy pistol that squirts water', 'name': 'water_gun'}, {'frequency': 'c', 'id': 1191, 'synset': 'water_scooter.n.01', 'synonyms': ['water_scooter', 'sea_scooter', 'jet_ski'], 'def': 'a motorboat resembling a motor scooter (NOT A SURFBOARD OR WATER SKI)', 'name': 'water_scooter'}, {'frequency': 'c', 'id': 1192, 'synset': 'water_ski.n.01', 'synonyms': ['water_ski'], 'def': 'broad ski for skimming over water towed by a speedboat (DO NOT MARK WATER)', 'name': 'water_ski'}, {'frequency': 'c', 'id': 1193, 'synset': 'water_tower.n.01', 'synonyms': ['water_tower'], 'def': 'a large reservoir for water', 'name': 'water_tower'}, {'frequency': 'c', 'id': 1194, 'synset': 'watering_can.n.01', 'synonyms': ['watering_can'], 'def': 'a container with a handle and a spout with a perforated nozzle; used to sprinkle water over plants', 'name': 'watering_can'}, {'frequency': 'c', 'id': 1195, 'synset': 'watermelon.n.02', 'synonyms': ['watermelon'], 'def': 'large oblong or roundish melon with a hard green rind and sweet watery red or occasionally yellowish pulp', 'name': 'watermelon'}, {'frequency': 'f', 'id': 1196, 'synset': 'weathervane.n.01', 'synonyms': ['weathervane', 'vane_(weathervane)', 'wind_vane'], 'def': 'mechanical device attached to an elevated structure; rotates freely to show the direction of the wind', 'name': 'weathervane'}, {'frequency': 'c', 'id': 1197, 'synset': 'webcam.n.01', 'synonyms': ['webcam'], 'def': 'a digital camera designed to take digital photographs and transmit them over the internet', 'name': 'webcam'}, {'frequency': 'c', 'id': 1198, 'synset': 'wedding_cake.n.01', 'synonyms': ['wedding_cake', 'bridecake'], 'def': 'a rich cake with two or more tiers and covered with frosting and decorations; served at a wedding reception', 'name': 'wedding_cake'}, {'frequency': 'c', 'id': 1199, 'synset': 'wedding_ring.n.01', 'synonyms': ['wedding_ring', 'wedding_band'], 'def': 'a ring given to the bride and/or groom at the wedding', 'name': 'wedding_ring'}, {'frequency': 'f', 'id': 1200, 'synset': 'wet_suit.n.01', 'synonyms': ['wet_suit'], 'def': 'a close-fitting garment made of a permeable material; worn in cold water to retain body heat', 'name': 'wet_suit'}, {'frequency': 'f', 'id': 1201, 'synset': 'wheel.n.01', 'synonyms': ['wheel'], 'def': 'a circular frame with spokes (or a solid disc) that can rotate on a shaft or axle', 'name': 'wheel'}, {'frequency': 'c', 'id': 1202, 'synset': 'wheelchair.n.01', 'synonyms': ['wheelchair'], 'def': 'a movable chair mounted on large wheels', 'name': 'wheelchair'}, {'frequency': 'c', 'id': 1203, 'synset': 'whipped_cream.n.01', 'synonyms': ['whipped_cream'], 'def': 'cream that has been beaten until light and fluffy', 'name': 'whipped_cream'}, {'frequency': 'r', 'id': 1204, 'synset': 'whiskey.n.01', 'synonyms': ['whiskey'], 'def': 'a liquor made from fermented mash of grain', 'name': 'whiskey'}, {'frequency': 'r', 'id': 1205, 'synset': 'whistle.n.03', 'synonyms': ['whistle'], 'def': 'a small wind instrument that produces a whistling sound by blowing into it', 'name': 'whistle'}, {'frequency': 'r', 'id': 1206, 'synset': 'wick.n.02', 'synonyms': ['wick'], 'def': 'a loosely woven cord in a candle or oil lamp that is lit on fire', 'name': 'wick'}, {'frequency': 'c', 'id': 1207, 'synset': 'wig.n.01', 'synonyms': ['wig'], 'def': 'hairpiece covering the head and made of real or synthetic hair', 'name': 'wig'}, {'frequency': 'c', 'id': 1208, 'synset': 'wind_chime.n.01', 'synonyms': ['wind_chime'], 'def': 'a decorative arrangement of pieces of metal or glass or pottery that hang together loosely so the wind can cause them to tinkle', 'name': 'wind_chime'}, {'frequency': 'c', 'id': 1209, 'synset': 'windmill.n.01', 'synonyms': ['windmill'], 'def': 'a mill that is powered by the wind', 'name': 'windmill'}, {'frequency': 'c', 'id': 1210, 'synset': 'window_box.n.01', 'synonyms': ['window_box_(for_plants)'], 'def': 'a container for growing plants on a windowsill', 'name': 'window_box_(for_plants)'}, {'frequency': 'f', 'id': 1211, 'synset': 'windshield_wiper.n.01', 'synonyms': ['windshield_wiper', 'windscreen_wiper', 'wiper_(for_windshield/screen)'], 'def': 'a mechanical device that cleans the windshield', 'name': 'windshield_wiper'}, {'frequency': 'c', 'id': 1212, 'synset': 'windsock.n.01', 'synonyms': ['windsock', 'air_sock', 'air-sleeve', 'wind_sleeve', 'wind_cone'], 'def': 'a truncated cloth cone mounted on a mast/pole; shows wind direction', 'name': 'windsock'}, {'frequency': 'f', 'id': 1213, 'synset': 'wine_bottle.n.01', 'synonyms': ['wine_bottle'], 'def': 'a bottle for holding wine', 'name': 'wine_bottle'}, {'frequency': 'r', 'id': 1214, 'synset': 'wine_bucket.n.01', 'synonyms': ['wine_bucket', 'wine_cooler'], 'def': 'a bucket of ice used to chill a bottle of wine', 'name': 'wine_bucket'}, {'frequency': 'f', 'id': 1215, 'synset': 'wineglass.n.01', 'synonyms': ['wineglass'], 'def': 'a glass that has a stem and in which wine is served', 'name': 'wineglass'}, {'frequency': 'r', 'id': 1216, 'synset': 'wing_chair.n.01', 'synonyms': ['wing_chair'], 'def': 'easy chair having wings on each side of a high back', 'name': 'wing_chair'}, {'frequency': 'c', 'id': 1217, 'synset': 'winker.n.02', 'synonyms': ['blinder_(for_horses)'], 'def': 'blinds that prevent a horse from seeing something on either side', 'name': 'blinder_(for_horses)'}, {'frequency': 'c', 'id': 1218, 'synset': 'wok.n.01', 'synonyms': ['wok'], 'def': 'pan with a convex bottom; used for frying in Chinese cooking', 'name': 'wok'}, {'frequency': 'r', 'id': 1219, 'synset': 'wolf.n.01', 'synonyms': ['wolf'], 'def': 'a wild carnivorous mammal of the dog family, living and hunting in packs', 'name': 'wolf'}, {'frequency': 'c', 'id': 1220, 'synset': 'wooden_spoon.n.02', 'synonyms': ['wooden_spoon'], 'def': 'a spoon made of wood', 'name': 'wooden_spoon'}, {'frequency': 'c', 'id': 1221, 'synset': 'wreath.n.01', 'synonyms': ['wreath'], 'def': 'an arrangement of flowers, leaves, or stems fastened in a ring', 'name': 'wreath'}, {'frequency': 'c', 'id': 1222, 'synset': 'wrench.n.03', 'synonyms': ['wrench', 'spanner'], 'def': 'a hand tool that is used to hold or twist a nut or bolt', 'name': 'wrench'}, {'frequency': 'c', 'id': 1223, 'synset': 'wristband.n.01', 'synonyms': ['wristband'], 'def': 'band consisting of a part of a sleeve that covers the wrist', 'name': 'wristband'}, {'frequency': 'f', 'id': 1224, 'synset': 'wristlet.n.01', 'synonyms': ['wristlet', 'wrist_band'], 'def': 'a band or bracelet worn around the wrist', 'name': 'wristlet'}, {'frequency': 'r', 'id': 1225, 'synset': 'yacht.n.01', 'synonyms': ['yacht'], 'def': 'an expensive vessel propelled by sail or power and used for cruising or racing', 'name': 'yacht'}, {'frequency': 'r', 'id': 1226, 'synset': 'yak.n.02', 'synonyms': ['yak'], 'def': 'large long-haired wild ox of Tibet often domesticated', 'name': 'yak'}, {'frequency': 'c', 'id': 1227, 'synset': 'yogurt.n.01', 'synonyms': ['yogurt', 'yoghurt', 'yoghourt'], 'def': 'a custard-like food made from curdled milk', 'name': 'yogurt'}, {'frequency': 'r', 'id': 1228, 'synset': 'yoke.n.07', 'synonyms': ['yoke_(animal_equipment)'], 'def': 'gear joining two animals at the neck; NOT egg yolk', 'name': 'yoke_(animal_equipment)'}, {'frequency': 'f', 'id': 1229, 'synset': 'zebra.n.01', 'synonyms': ['zebra'], 'def': 'any of several fleet black-and-white striped African equines', 'name': 'zebra'}, {'frequency': 'c', 'id': 1230, 'synset': 'zucchini.n.02', 'synonyms': ['zucchini', 'courgette'], 'def': 'small cucumber-shaped vegetable marrow; typically dark green', 'name': 'zucchini'}] # noqa -# fmt: on diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py deleted file mode 100644 index 5872d96..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/pascal_voc.py +++ /dev/null @@ -1,80 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import numpy as np -import os -import xml.etree.ElementTree as ET -from fvcore.common.file_io import PathManager - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -__all__ = ["register_pascal_voc"] - - -# fmt: off -CLASS_NAMES = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", - "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor", -] -# fmt: on - - -def load_voc_instances(dirname: str, split: str): - """ - Load Pascal VOC detection annotations to Detectron2 format. - - Args: - dirname: Contain "Annotations", "ImageSets", "JPEGImages" - split (str): one of "train", "test", "val", "trainval" - """ - with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: - fileids = np.loadtxt(f, dtype=np.str) - - # Needs to read many small annotation files. Makes sense at local - annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/")) - dicts = [] - for fileid in fileids: - anno_file = os.path.join(annotation_dirname, fileid + ".xml") - jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") - - with PathManager.open(anno_file) as f: - tree = ET.parse(f) - - r = { - "file_name": jpeg_file, - "image_id": fileid, - "height": int(tree.findall("./size/height")[0].text), - "width": int(tree.findall("./size/width")[0].text), - } - instances = [] - - for obj in tree.findall("object"): - cls = obj.find("name").text - # We include "difficult" samples in training. - # Based on limited experiments, they don't hurt accuracy. - # difficult = int(obj.find("difficult").text) - # if difficult == 1: - # continue - bbox = obj.find("bndbox") - bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] - # Original annotations are integers in the range [1, W or H] - # Assuming they mean 1-based pixel indices (inclusive), - # a box with annotation (xmin=1, xmax=W) covers the whole image. - # In coordinate space this is represented by (xmin=0, xmax=W) - bbox[0] -= 1.0 - bbox[1] -= 1.0 - instances.append( - {"category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} - ) - r["annotations"] = instances - dicts.append(r) - return dicts - - -def register_pascal_voc(name, dirname, split, year): - DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split)) - MetadataCatalog.get(name).set( - thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py deleted file mode 100644 index a0a4db6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/datasets/register_coco.py +++ /dev/null @@ -1,129 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import os - -from detectron2.data import DatasetCatalog, MetadataCatalog - -from .coco import load_coco_json, load_sem_seg - -""" -This file contains functions to register a COCO-format dataset to the DatasetCatalog. -""" - -__all__ = ["register_coco_instances", "register_coco_panoptic_separated"] - - -def register_coco_instances(name, metadata, json_file, image_root): - """ - Register a dataset in COCO's json annotation format for - instance detection, instance segmentation and keypoint detection. - (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. - `instances*.json` and `person_keypoints*.json` in the dataset). - - This is an example of how to register a new dataset. - You can do something similar to this function, to register new data. - - Args: - name (str): the name that identifies a dataset, e.g. "coco_2014_train". - metadata (dict): extra metadata associated with this dataset. You can - leave it as an empty dict. - json_file (str): path to the json instance annotation file. - image_root (str or path-like): directory which contains all the images. - """ - assert isinstance(name, str), name - assert isinstance(json_file, (str, os.PathLike)), json_file - assert isinstance(image_root, (str, os.PathLike)), image_root - # 1. register a function which returns dicts - DatasetCatalog.register(name, lambda: load_coco_json(json_file, image_root, name)) - - # 2. Optionally, add metadata about this dataset, - # since they might be useful in evaluation, visualization or logging - MetadataCatalog.get(name).set( - json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata - ) - - -def register_coco_panoptic_separated( - name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json -): - """ - Register a COCO panoptic segmentation dataset named `name`. - The annotations in this registered dataset will contain both instance annotations and - semantic annotations, each with its own contiguous ids. Hence it's called "separated". - - It follows the setting used by the PanopticFPN paper: - - 1. The instance annotations directly come from polygons in the COCO - instances annotation task, rather than from the masks in the COCO panoptic annotations. - - The two format have small differences: - Polygons in the instance annotations may have overlaps. - The mask annotations are produced by labeling the overlapped polygons - with depth ordering. - - 2. The semantic annotations are converted from panoptic annotations, where - all "things" are assigned a semantic id of 0. - All semantic categories will therefore have ids in contiguous - range [1, #stuff_categories]. - - This function will also register a pure semantic segmentation dataset - named ``name + '_stuffonly'``. - - Args: - name (str): the name that identifies a dataset, - e.g. "coco_2017_train_panoptic" - metadata (dict): extra metadata associated with this dataset. - image_root (str): directory which contains all the images - panoptic_root (str): directory which contains panoptic annotation images - panoptic_json (str): path to the json panoptic annotation file - sem_seg_root (str): directory which contains all the ground truth segmentation annotations. - instances_json (str): path to the json instance annotation file - """ - panoptic_name = name + "_separated" - DatasetCatalog.register( - panoptic_name, - lambda: merge_to_panoptic( - load_coco_json(instances_json, image_root, panoptic_name), - load_sem_seg(sem_seg_root, image_root), - ), - ) - MetadataCatalog.get(panoptic_name).set( - panoptic_root=panoptic_root, - image_root=image_root, - panoptic_json=panoptic_json, - sem_seg_root=sem_seg_root, - json_file=instances_json, # TODO rename - evaluator_type="coco_panoptic_seg", - **metadata - ) - - semantic_name = name + "_stuffonly" - DatasetCatalog.register(semantic_name, lambda: load_sem_seg(sem_seg_root, image_root)) - MetadataCatalog.get(semantic_name).set( - sem_seg_root=sem_seg_root, image_root=image_root, evaluator_type="sem_seg", **metadata - ) - - -def merge_to_panoptic(detection_dicts, sem_seg_dicts): - """ - Create dataset dicts for panoptic segmentation, by - merging two dicts using "file_name" field to match their entries. - - Args: - detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. - sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. - - Returns: - list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in - both detection_dicts and sem_seg_dicts that correspond to the same image. - The function assumes that the same key in different dicts has the same value. - """ - results = [] - sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} - assert len(sem_seg_file_to_entry) > 0 - - for det_dict in detection_dicts: - dic = copy.copy(det_dict) - dic.update(sem_seg_file_to_entry[dic["file_name"]]) - results.append(dic) - return results diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py deleted file mode 100644 index e19c7e2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/detection_utils.py +++ /dev/null @@ -1,516 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -Common data processing utilities that are used in a -typical object detection data pipeline. -""" -import logging -import numpy as np -import pycocotools.mask as mask_util -import torch -from fvcore.common.file_io import PathManager -from PIL import Image, ImageOps - -from detectron2.structures import ( - BitMasks, - Boxes, - BoxMode, - Instances, - Keypoints, - PolygonMasks, - RotatedBoxes, - polygons_to_bitmask, -) - -from . import transforms as T -from .catalog import MetadataCatalog - - -class SizeMismatchError(ValueError): - """ - When loaded image has difference width/height compared with annotation. - """ - - -# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601 -_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]] -_M_YUV2RGB = [[1.0, 0.0, 1.13983], [1.0, -0.39465, -0.58060], [1.0, 2.03211, 0.0]] - - -def convert_PIL_to_numpy(image, format): - """ - Convert PIL image to numpy array of target format. - - Args: - image (PIL.Image): a PIL image - format (str): the format of output image - - Returns: - (np.ndarray): also see `read_image` - """ - if format is not None: - # PIL only supports RGB, so convert to RGB and flip channels over below - conversion_format = format - if format in ["BGR", "YUV-BT.601"]: - conversion_format = "RGB" - image = image.convert(conversion_format) - image = np.asarray(image) - # PIL squeezes out the channel dimension for "L", so make it HWC - if format == "L": - image = np.expand_dims(image, -1) - - # handle formats not supported by PIL - elif format == "BGR": - # flip channels if needed - image = image[:, :, ::-1] - elif format == "YUV-BT.601": - image = image / 255.0 - image = np.dot(image, np.array(_M_RGB2YUV).T) - - return image - - -def convert_image_to_rgb(image, format): - """ - Convert numpy image from given format to RGB. - - Args: - image (np.ndarray): a numpy image - format (str): the format of input image, also see `read_image` - - Returns: - (np.ndarray): HWC RGB image in 0-255 range, can be either float or uint8 - """ - if format == "BGR": - image = image[:, :, [2, 1, 0]] - elif format == "YUV-BT.601": - image = np.dot(image, np.array(_M_YUV2RGB).T) - image = image * 255.0 - else: - if format == "L": - image = image[:, :, 0] - image = image.astype(np.uint8) - image = np.asarray(Image.fromarray(image, mode=format).convert("RGB")) - return image - - -def read_image(file_name, format=None): - """ - Read an image into the given format. - Will apply rotation and flipping if the image has such exif information. - - Args: - file_name (str): image file path - format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601" - - Returns: - image (np.ndarray): an HWC image in the given format, which is 0-255, uint8 for - supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601. - """ - with PathManager.open(file_name, "rb") as f: - image = Image.open(f) - - # capture and ignore this bug: https://github.com/python-pillow/Pillow/issues/3973 - try: - image = ImageOps.exif_transpose(image) - except Exception: - pass - - return convert_PIL_to_numpy(image, format) - - -def check_image_size(dataset_dict, image): - """ - Raise an error if the image does not match the size specified in the dict. - """ - if "width" in dataset_dict or "height" in dataset_dict: - image_wh = (image.shape[1], image.shape[0]) - expected_wh = (dataset_dict["width"], dataset_dict["height"]) - if not image_wh == expected_wh: - raise SizeMismatchError( - "Mismatched (W,H){}, got {}, expect {}".format( - " for image " + dataset_dict["file_name"] - if "file_name" in dataset_dict - else "", - image_wh, - expected_wh, - ) - ) - - # To ensure bbox always remap to original image size - if "width" not in dataset_dict: - dataset_dict["width"] = image.shape[1] - if "height" not in dataset_dict: - dataset_dict["height"] = image.shape[0] - - -def transform_proposals(dataset_dict, image_shape, transforms, min_box_side_len, proposal_topk): - """ - Apply transformations to the proposals in dataset_dict, if any. - - Args: - dataset_dict (dict): a dict read from the dataset, possibly - contains fields "proposal_boxes", "proposal_objectness_logits", "proposal_bbox_mode" - image_shape (tuple): height, width - transforms (TransformList): - min_box_side_len (int): keep proposals with at least this size - proposal_topk (int): only keep top-K scoring proposals - - The input dict is modified in-place, with abovementioned keys removed. A new - key "proposals" will be added. Its value is an `Instances` - object which contains the transformed proposals in its field - "proposal_boxes" and "objectness_logits". - """ - if "proposal_boxes" in dataset_dict: - # Transform proposal boxes - boxes = transforms.apply_box( - BoxMode.convert( - dataset_dict.pop("proposal_boxes"), - dataset_dict.pop("proposal_bbox_mode"), - BoxMode.XYXY_ABS, - ) - ) - boxes = Boxes(boxes) - objectness_logits = torch.as_tensor( - dataset_dict.pop("proposal_objectness_logits").astype("float32") - ) - - boxes.clip(image_shape) - keep = boxes.nonempty(threshold=min_box_side_len) - boxes = boxes[keep] - objectness_logits = objectness_logits[keep] - - proposals = Instances(image_shape) - proposals.proposal_boxes = boxes[:proposal_topk] - proposals.objectness_logits = objectness_logits[:proposal_topk] - dataset_dict["proposals"] = proposals - - -def transform_instance_annotations( - annotation, transforms, image_size, *, keypoint_hflip_indices=None -): - """ - Apply transforms to box, segmentation and keypoints annotations of a single instance. - - It will use `transforms.apply_box` for the box, and - `transforms.apply_coords` for segmentation polygons & keypoints. - If you need anything more specially designed for each data structure, - you'll need to implement your own version of this function or the transforms. - - Args: - annotation (dict): dict of instance annotations for a single instance. - It will be modified in-place. - transforms (TransformList): - image_size (tuple): the height, width of the transformed image - keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. - - Returns: - dict: - the same input dict with fields "bbox", "segmentation", "keypoints" - transformed according to `transforms`. - The "bbox_mode" field will be set to XYXY_ABS. - """ - bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS) - # Note that bbox is 1d (per-instance bounding box) - annotation["bbox"] = transforms.apply_box([bbox])[0] - annotation["bbox_mode"] = BoxMode.XYXY_ABS - - if "segmentation" in annotation: - # each instance contains 1 or more polygons - segm = annotation["segmentation"] - if isinstance(segm, list): - # polygons - polygons = [np.asarray(p).reshape(-1, 2) for p in segm] - annotation["segmentation"] = [ - p.reshape(-1) for p in transforms.apply_polygons(polygons) - ] - elif isinstance(segm, dict): - # RLE - mask = mask_util.decode(segm) - mask = transforms.apply_segmentation(mask) - assert tuple(mask.shape[:2]) == image_size - annotation["segmentation"] = mask - else: - raise ValueError( - "Cannot transform segmentation of type '{}'!" - "Supported types are: polygons as list[list[float] or ndarray]," - " COCO-style RLE as a dict.".format(type(segm)) - ) - - if "keypoints" in annotation: - keypoints = transform_keypoint_annotations( - annotation["keypoints"], transforms, image_size, keypoint_hflip_indices - ) - annotation["keypoints"] = keypoints - - return annotation - - -def transform_keypoint_annotations(keypoints, transforms, image_size, keypoint_hflip_indices=None): - """ - Transform keypoint annotations of an image. - - Args: - keypoints (list[float]): Nx3 float in Detectron2 Dataset format. - transforms (TransformList): - image_size (tuple): the height, width of the transformed image - keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`. - """ - # (N*3,) -> (N, 3) - keypoints = np.asarray(keypoints, dtype="float64").reshape(-1, 3) - keypoints[:, :2] = transforms.apply_coords(keypoints[:, :2]) - - # This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - - # Alternative way: check if probe points was horizontally flipped. - # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]]) - # probe_aug = transforms.apply_coords(probe.copy()) - # do_hflip = np.sign(probe[1][0] - probe[0][0]) != np.sign(probe_aug[1][0] - probe_aug[0][0]) # noqa - - # If flipped, swap each keypoint with its opposite-handed equivalent - if do_hflip: - assert keypoint_hflip_indices is not None - keypoints = keypoints[keypoint_hflip_indices, :] - - # Maintain COCO convention that if visibility == 0, then x, y = 0 - # TODO may need to reset visibility for cropped keypoints, - # but it does not matter for our existing algorithms - keypoints[keypoints[:, 2] == 0] = 0 - return keypoints - - -def annotations_to_instances(annos, image_size, mask_format="polygon"): - """ - Create an :class:`Instances` object used by the models, - from instance annotations in the dataset dict. - - Args: - annos (list[dict]): a list of instance annotations in one image, each - element for one instance. - image_size (tuple): height, width - - Returns: - Instances: - It will contain fields "gt_boxes", "gt_classes", - "gt_masks", "gt_keypoints", if they can be obtained from `annos`. - This is the format that builtin models expect. - """ - boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos] - target = Instances(image_size) - boxes = target.gt_boxes = Boxes(boxes) - boxes.clip(image_size) - - classes = [obj["category_id"] for obj in annos] - classes = torch.tensor(classes, dtype=torch.int64) - target.gt_classes = classes - - if len(annos) and "segmentation" in annos[0]: - segms = [obj["segmentation"] for obj in annos] - if mask_format == "polygon": - masks = PolygonMasks(segms) - else: - assert mask_format == "bitmask", mask_format - masks = [] - for segm in segms: - if isinstance(segm, list): - # polygon - masks.append(polygons_to_bitmask(segm, *image_size)) - elif isinstance(segm, dict): - # COCO RLE - masks.append(mask_util.decode(segm)) - elif isinstance(segm, np.ndarray): - assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( - segm.ndim - ) - # mask array - masks.append(segm) - else: - raise ValueError( - "Cannot convert segmentation of type '{}' to BitMasks!" - "Supported types are: polygons as list[list[float] or ndarray]," - " COCO-style RLE as a dict, or a full-image segmentation mask " - "as a 2D ndarray.".format(type(segm)) - ) - # torch.from_numpy does not support array with negative stride. - masks = BitMasks( - torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks]) - ) - target.gt_masks = masks - - if len(annos) and "keypoints" in annos[0]: - kpts = [obj.get("keypoints", []) for obj in annos] - target.gt_keypoints = Keypoints(kpts) - - return target - - -def annotations_to_instances_rotated(annos, image_size): - """ - Create an :class:`Instances` object used by the models, - from instance annotations in the dataset dict. - Compared to `annotations_to_instances`, this function is for rotated boxes only - - Args: - annos (list[dict]): a list of instance annotations in one image, each - element for one instance. - image_size (tuple): height, width - - Returns: - Instances: - Containing fields "gt_boxes", "gt_classes", - if they can be obtained from `annos`. - This is the format that builtin models expect. - """ - boxes = [obj["bbox"] for obj in annos] - target = Instances(image_size) - boxes = target.gt_boxes = RotatedBoxes(boxes) - boxes.clip(image_size) - - classes = [obj["category_id"] for obj in annos] - classes = torch.tensor(classes, dtype=torch.int64) - target.gt_classes = classes - - return target - - -def filter_empty_instances(instances, by_box=True, by_mask=True, box_threshold=1e-5): - """ - Filter out empty instances in an `Instances` object. - - Args: - instances (Instances): - by_box (bool): whether to filter out instances with empty boxes - by_mask (bool): whether to filter out instances with empty masks - box_threshold (float): minimum width and height to be considered non-empty - - Returns: - Instances: the filtered instances. - """ - assert by_box or by_mask - r = [] - if by_box: - r.append(instances.gt_boxes.nonempty(threshold=box_threshold)) - if instances.has("gt_masks") and by_mask: - r.append(instances.gt_masks.nonempty()) - - # TODO: can also filter visible keypoints - - if not r: - return instances - m = r[0] - for x in r[1:]: - m = m & x - return instances[m] - - -def create_keypoint_hflip_indices(dataset_names): - """ - Args: - dataset_names (list[str]): list of dataset names - Returns: - ndarray[int]: a vector of size=#keypoints, storing the - horizontally-flipped keypoint indices. - """ - - check_metadata_consistency("keypoint_names", dataset_names) - check_metadata_consistency("keypoint_flip_map", dataset_names) - - meta = MetadataCatalog.get(dataset_names[0]) - names = meta.keypoint_names - # TODO flip -> hflip - flip_map = dict(meta.keypoint_flip_map) - flip_map.update({v: k for k, v in flip_map.items()}) - flipped_names = [i if i not in flip_map else flip_map[i] for i in names] - flip_indices = [names.index(i) for i in flipped_names] - return np.asarray(flip_indices) - - -def gen_crop_transform_with_instance(crop_size, image_size, instance): - """ - Generate a CropTransform so that the cropping region contains - the center of the given instance. - - Args: - crop_size (tuple): h, w in pixels - image_size (tuple): h, w - instance (dict): an annotation dict of one instance, in Detectron2's - dataset format. - """ - crop_size = np.asarray(crop_size, dtype=np.int32) - bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) - center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 - assert ( - image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] - ), "The annotation bounding box is outside of the image!" - assert ( - image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] - ), "Crop size is larger than image size!" - - min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) - max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) - max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) - - y0 = np.random.randint(min_yx[0], max_yx[0] + 1) - x0 = np.random.randint(min_yx[1], max_yx[1] + 1) - return T.CropTransform(x0, y0, crop_size[1], crop_size[0]) - - -def check_metadata_consistency(key, dataset_names): - """ - Check that the data have consistent metadata. - - Args: - key (str): a metadata key - dataset_names (list[str]): a list of dataset names - - Raises: - AttributeError: if the key does not exist in the metadata - ValueError: if the given data do not have the same metadata values defined by key - """ - if len(dataset_names) == 0: - return - logger = logging.getLogger(__name__) - entries_per_dataset = [getattr(MetadataCatalog.get(d), key) for d in dataset_names] - for idx, entry in enumerate(entries_per_dataset): - if entry != entries_per_dataset[0]: - logger.error( - "Metadata '{}' for dataset '{}' is '{}'".format(key, dataset_names[idx], str(entry)) - ) - logger.error( - "Metadata '{}' for dataset '{}' is '{}'".format( - key, dataset_names[0], str(entries_per_dataset[0]) - ) - ) - raise ValueError("Datasets have different metadata '{}'!".format(key)) - - -def build_transform_gen(cfg, is_train): - """ - Create a list of :class:`TransformGen` from config. - Now it includes resizing and flipping. - - Returns: - list[TransformGen] - """ - if is_train: - min_size = cfg.INPUT.MIN_SIZE_TRAIN - max_size = cfg.INPUT.MAX_SIZE_TRAIN - sample_style = cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING - else: - min_size = cfg.INPUT.MIN_SIZE_TEST - max_size = cfg.INPUT.MAX_SIZE_TEST - sample_style = "choice" - if sample_style == "range": - assert len(min_size) == 2, "more than 2 ({}) min_size(s) are provided for ranges".format( - len(min_size) - ) - - logger = logging.getLogger(__name__) - tfm_gens = [] - tfm_gens.append(T.ResizeShortestEdge(min_size, max_size, sample_style)) - if is_train: - tfm_gens.append(T.RandomFlip()) - logger.info("TransformGens used in training: " + str(tfm_gens)) - return tfm_gens diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py deleted file mode 100644 index 9cfa8a6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler -from .grouped_batch_sampler import GroupedBatchSampler - -__all__ = [ - "GroupedBatchSampler", - "TrainingSampler", - "InferenceSampler", - "RepeatFactorTrainingSampler", -] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py deleted file mode 100644 index 4ac57bb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/distributed_sampler.py +++ /dev/null @@ -1,199 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -import math -from collections import defaultdict -from typing import Optional -import torch -from torch.utils.data.sampler import Sampler - -from detectron2.utils import comm - - -class TrainingSampler(Sampler): - """ - In training, we only care about the "infinite stream" of training data. - So this sampler produces an infinite stream of indices and - all workers cooperate to correctly shuffle the indices and sample different indices. - - The samplers in each worker effectively produces `indices[worker_id::num_workers]` - where `indices` is an infinite stream of indices consisting of - `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) - or `range(size) + range(size) + ...` (if shuffle is False) - """ - - def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None): - """ - Args: - size (int): the total number of data of the underlying dataset to sample from - shuffle (bool): whether to shuffle the indices or not - seed (int): the initial seed of the shuffle. Must be the same - across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - """ - self._size = size - assert size > 0 - self._shuffle = shuffle - if seed is None: - seed = comm.shared_random_seed() - self._seed = int(seed) - - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - - def __iter__(self): - start = self._rank - yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) - - def _infinite_indices(self): - g = torch.Generator() - g.manual_seed(self._seed) - while True: - if self._shuffle: - yield from torch.randperm(self._size, generator=g) - else: - yield from torch.arange(self._size) - - -class RepeatFactorTrainingSampler(Sampler): - """ - Similar to TrainingSampler, but suitable for training on class imbalanced data - like LVIS. In each epoch, an image may appear multiple times based on its "repeat - factor". The repeat factor for an image is a function of the frequency the rarest - category labeled in that image. The "frequency of category c" in [0, 1] is defined - as the fraction of images in the training set (without repeats) in which category c - appears. - - See :paper:`lvis` (>= v2) Appendix B.2. - """ - - def __init__(self, dataset_dicts, repeat_thresh, shuffle=True, seed=None): - """ - Args: - dataset_dicts (list[dict]): annotations in Detectron2 dataset format. - repeat_thresh (float): frequency threshold below which data is repeated. - shuffle (bool): whether to shuffle the indices or not - seed (int): the initial seed of the shuffle. Must be the same - across all workers. If None, will use a random seed shared - among workers (require synchronization among all workers). - """ - self._shuffle = shuffle - if seed is None: - seed = comm.shared_random_seed() - self._seed = int(seed) - - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - - # Get fractional repeat factors and split into whole number (_int_part) - # and fractional (_frac_part) parts. - rep_factors = self._get_repeat_factors(dataset_dicts, repeat_thresh) - self._int_part = torch.trunc(rep_factors) - self._frac_part = rep_factors - self._int_part - - def _get_repeat_factors(self, dataset_dicts, repeat_thresh): - """ - Compute (fractional) per-image repeat factors. - - Args: - See __init__. - - Returns: - torch.Tensor: the i-th element is the repeat factor for the dataset image - at index i. - """ - # 1. For each category c, compute the fraction of images that contain it: f(c) - category_freq = defaultdict(int) - for dataset_dict in dataset_dicts: # For each image (without repeats) - cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} - for cat_id in cat_ids: - category_freq[cat_id] += 1 - num_images = len(dataset_dicts) - for k, v in category_freq.items(): - category_freq[k] = v / num_images - - # 2. For each category c, compute the category-level repeat factor: - # r(c) = max(1, sqrt(t / f(c))) - category_rep = { - cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq)) - for cat_id, cat_freq in category_freq.items() - } - - # 3. For each image I, compute the image-level repeat factor: - # r(I) = max_{c in I} r(c) - rep_factors = [] - for dataset_dict in dataset_dicts: - cat_ids = {ann["category_id"] for ann in dataset_dict["annotations"]} - rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}) - rep_factors.append(rep_factor) - - return torch.tensor(rep_factors, dtype=torch.float32) - - def _get_epoch_indices(self, generator): - """ - Create a list of dataset indices (with repeats) to use for one epoch. - - Args: - generator (torch.Generator): pseudo random number generator used for - stochastic rounding. - - Returns: - torch.Tensor: list of dataset indices to use in one epoch. Each index - is repeated based on its calculated repeat factor. - """ - # Since repeat factors are fractional, we use stochastic rounding so - # that the target repeat factor is achieved in expectation over the - # course of training - rands = torch.rand(len(self._frac_part), generator=generator) - rep_factors = self._int_part + (rands < self._frac_part).float() - # Construct a list of indices in which we repeat images as specified - indices = [] - for dataset_index, rep_factor in enumerate(rep_factors): - indices.extend([dataset_index] * int(rep_factor.item())) - return torch.tensor(indices, dtype=torch.int64) - - def __iter__(self): - start = self._rank - yield from itertools.islice(self._infinite_indices(), start, None, self._world_size) - - def _infinite_indices(self): - g = torch.Generator() - g.manual_seed(self._seed) - while True: - # Sample indices with repeats determined by stochastic rounding; each - # "epoch" may have a slightly different size due to the rounding. - indices = self._get_epoch_indices(g) - if self._shuffle: - randperm = torch.randperm(len(indices), generator=g) - yield from indices[randperm] - else: - yield from indices - - -class InferenceSampler(Sampler): - """ - Produce indices for inference. - Inference needs to run on the __exact__ set of samples, - therefore when the total number of samples is not divisible by the number of workers, - this sampler produces different number of samples on different workers. - """ - - def __init__(self, size: int): - """ - Args: - size (int): the total number of data of the underlying dataset to sample from - """ - self._size = size - assert size > 0 - self._rank = comm.get_rank() - self._world_size = comm.get_world_size() - - shard_size = (self._size - 1) // self._world_size + 1 - begin = shard_size * self._rank - end = min(shard_size * (self._rank + 1), self._size) - self._local_indices = range(begin, end) - - def __iter__(self): - yield from self._local_indices - - def __len__(self): - return len(self._local_indices) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py deleted file mode 100644 index 138e106..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/samplers/grouped_batch_sampler.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from torch.utils.data.sampler import BatchSampler, Sampler - - -class GroupedBatchSampler(BatchSampler): - """ - Wraps another sampler to yield a mini-batch of indices. - It enforces that the batch only contain elements from the same group. - It also tries to provide mini-batches which follows an ordering which is - as close as possible to the ordering from the original sampler. - """ - - def __init__(self, sampler, group_ids, batch_size): - """ - Args: - sampler (Sampler): Base sampler. - group_ids (list[int]): If the sampler produces indices in range [0, N), - `group_ids` must be a list of `N` ints which contains the group id of each sample. - The group ids must be a set of integers in the range [0, num_groups). - batch_size (int): Size of mini-batch. - """ - if not isinstance(sampler, Sampler): - raise ValueError( - "sampler should be an instance of " - "torch.utils.data.Sampler, but got sampler={}".format(sampler) - ) - self.sampler = sampler - self.group_ids = np.asarray(group_ids) - assert self.group_ids.ndim == 1 - self.batch_size = batch_size - groups = np.unique(self.group_ids).tolist() - - # buffer the indices of each group until batch size is reached - self.buffer_per_group = {k: [] for k in groups} - - def __iter__(self): - for idx in self.sampler: - group_id = self.group_ids[idx] - group_buffer = self.buffer_per_group[group_id] - group_buffer.append(idx) - if len(group_buffer) == self.batch_size: - yield group_buffer[:] # yield a copy of the list - del group_buffer[:] - - def __len__(self): - raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py deleted file mode 100644 index f7638bb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .transform import * -from fvcore.transforms.transform import * -from .transform_gen import * - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py deleted file mode 100644 index bd93753..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform.py +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# File: transform.py - -import numpy as np -import torch -import torch.nn.functional as F -from fvcore.transforms.transform import HFlipTransform, NoOpTransform, Transform -from PIL import Image - -try: - import cv2 # noqa -except ImportError: - # OpenCV is an optional dependency at the moment - pass - -__all__ = ["ExtentTransform", "ResizeTransform", "RotationTransform"] - - -class ExtentTransform(Transform): - """ - Extracts a subregion from the source image and scales it to the output size. - - The fill color is used to map pixels from the source rect that fall outside - the source image. - - See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform - """ - - def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): - """ - Args: - src_rect (x0, y0, x1, y1): src coordinates - output_size (h, w): dst image size - interp: PIL interpolation methods - fill: Fill color used when src_rect extends outside image - """ - super().__init__() - self._set_attributes(locals()) - - def apply_image(self, img, interp=None): - h, w = self.output_size - ret = Image.fromarray(img).transform( - size=(w, h), - method=Image.EXTENT, - data=self.src_rect, - resample=interp if interp else self.interp, - fill=self.fill, - ) - return np.asarray(ret) - - def apply_coords(self, coords): - # Transform image center from source coordinates into output coordinates - # and then map the new origin to the corner of the output image. - h, w = self.output_size - x0, y0, x1, y1 = self.src_rect - new_coords = coords.astype(np.float32) - new_coords[:, 0] -= 0.5 * (x0 + x1) - new_coords[:, 1] -= 0.5 * (y0 + y1) - new_coords[:, 0] *= w / (x1 - x0) - new_coords[:, 1] *= h / (y1 - y0) - new_coords[:, 0] += 0.5 * w - new_coords[:, 1] += 0.5 * h - return new_coords - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=Image.NEAREST) - return segmentation - - -class ResizeTransform(Transform): - """ - Resize the image to a target size. - """ - - def __init__(self, h, w, new_h, new_w, interp=None): - """ - Args: - h, w (int): original image size - new_h, new_w (int): new image size - interp: PIL interpolation methods, defaults to bilinear. - """ - # TODO decide on PIL vs opencv - super().__init__() - if interp is None: - interp = Image.BILINEAR - self._set_attributes(locals()) - - def apply_image(self, img, interp=None): - assert img.shape[:2] == (self.h, self.w) - assert len(img.shape) <= 4 - - if img.dtype == np.uint8: - pil_image = Image.fromarray(img) - interp_method = interp if interp is not None else self.interp - pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) - ret = np.asarray(pil_image) - else: - # PIL only supports uint8 - img = torch.from_numpy(img) - shape = list(img.shape) - shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:] - img = img.view(shape_4d).permute(2, 3, 0, 1) # hw(c) -> nchw - _PIL_RESIZE_TO_INTERPOLATE_MODE = {Image.BILINEAR: "bilinear", Image.BICUBIC: "bicubic"} - mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[self.interp] - img = F.interpolate(img, (self.new_h, self.new_w), mode=mode, align_corners=False) - shape[:2] = (self.new_h, self.new_w) - ret = img.permute(2, 3, 0, 1).view(shape).numpy() # nchw -> hw(c) - - return ret - - def apply_coords(self, coords): - coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) - coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) - return coords - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=Image.NEAREST) - return segmentation - - def inverse(self): - return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp) - - -class RotationTransform(Transform): - """ - This method returns a copy of this image, rotated the given - number of degrees counter clockwise around its center. - """ - - def __init__(self, h, w, angle, expand=True, center=None, interp=None): - """ - Args: - h, w (int): original image size - angle (float): degrees for rotation - expand (bool): choose if the image should be resized to fit the whole - rotated image (default), or simply cropped - center (tuple (width, height)): coordinates of the rotation center - if left to None, the center will be fit to the center of each image - center has no effect if expand=True because it only affects shifting - interp: cv2 interpolation method, default cv2.INTER_LINEAR - """ - super().__init__() - image_center = np.array((w / 2, h / 2)) - if center is None: - center = image_center - if interp is None: - interp = cv2.INTER_LINEAR - abs_cos, abs_sin = abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))) - if expand: - # find the new width and height bounds - bound_w, bound_h = np.rint( - [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] - ).astype(int) - else: - bound_w, bound_h = w, h - - self._set_attributes(locals()) - self.rm_coords = self.create_rotation_matrix() - # Needed because of this problem https://github.com/opencv/opencv/issues/11784 - self.rm_image = self.create_rotation_matrix(offset=-0.5) - - def apply_image(self, img, interp=None): - """ - demo should be a numpy array, formatted as Height * Width * Nchannels - """ - if len(img) == 0 or self.angle % 360 == 0: - return img - assert img.shape[:2] == (self.h, self.w) - interp = interp if interp is not None else self.interp - return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) - - def apply_coords(self, coords): - """ - coords should be a N * 2 array-like, containing N couples of (x, y) points - """ - coords = np.asarray(coords, dtype=float) - if len(coords) == 0 or self.angle % 360 == 0: - return coords - return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) - return segmentation - - def create_rotation_matrix(self, offset=0): - center = (self.center[0] + offset, self.center[1] + offset) - rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) - if self.expand: - # Find the coordinates of the center of rotation in the new image - # The only point for which we know the future coordinates is the center of the image - rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] - new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center - # shift the rotation center to the new coordinates - rm[:, 2] += new_center - return rm - - -def HFlip_rotated_box(transform, rotated_boxes): - """ - Apply the horizontal flip transform on rotated boxes. - - Args: - rotated_boxes (ndarray): Nx5 floating point array of - (x_center, y_center, width, height, angle_degrees) format - in absolute coordinates. - """ - # Transform x_center - rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] - # Transform angle - rotated_boxes[:, 4] = -rotated_boxes[:, 4] - return rotated_boxes - - -def Resize_rotated_box(transform, rotated_boxes): - """ - Apply the resizing transform on rotated boxes. For details of how these (approximation) - formulas are derived, please refer to :meth:`RotatedBoxes.scale`. - - Args: - rotated_boxes (ndarray): Nx5 floating point array of - (x_center, y_center, width, height, angle_degrees) format - in absolute coordinates. - """ - scale_factor_x = transform.new_w * 1.0 / transform.w - scale_factor_y = transform.new_h * 1.0 / transform.h - rotated_boxes[:, 0] *= scale_factor_x - rotated_boxes[:, 1] *= scale_factor_y - theta = rotated_boxes[:, 4] * np.pi / 180.0 - c = np.cos(theta) - s = np.sin(theta) - rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) - rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) - rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi - - return rotated_boxes - - -HFlipTransform.register_type("rotated_box", HFlip_rotated_box) -NoOpTransform.register_type("rotated_box", lambda t, x: x) -ResizeTransform.register_type("rotated_box", Resize_rotated_box) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py deleted file mode 100644 index 197a0eb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/data/transforms/transform_gen.py +++ /dev/null @@ -1,534 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# File: transformer.py - -import inspect -import numpy as np -import pprint -import sys -from abc import ABCMeta, abstractmethod -from fvcore.transforms.transform import ( - BlendTransform, - CropTransform, - HFlipTransform, - NoOpTransform, - Transform, - TransformList, - VFlipTransform, -) -from PIL import Image - -from .transform import ExtentTransform, ResizeTransform, RotationTransform - -__all__ = [ - "RandomApply", - "RandomBrightness", - "RandomContrast", - "RandomCrop", - "RandomExtent", - "RandomFlip", - "RandomSaturation", - "RandomLighting", - "RandomRotation", - "Resize", - "ResizeShortestEdge", - "TransformGen", - "apply_transform_gens", -] - - -def check_dtype(img): - assert isinstance(img, np.ndarray), "[TransformGen] Needs an numpy array, but got a {}!".format( - type(img) - ) - assert not isinstance(img.dtype, np.integer) or ( - img.dtype == np.uint8 - ), "[TransformGen] Got image of type {}, use uint8 or floating points instead!".format( - img.dtype - ) - assert img.ndim in [2, 3], img.ndim - - -class TransformGen(metaclass=ABCMeta): - """ - TransformGen takes an image of type uint8 in range [0, 255], or - floating point in range [0, 1] or [0, 255] as input. - - It creates a :class:`Transform` based on the given image, sometimes with randomness. - The transform can then be used to transform images - or other data (boxes, points, annotations, etc.) associated with it. - - The assumption made in this class - is that the image itself is sufficient to instantiate a transform. - When this assumption is not true, you need to create the transforms by your own. - - A list of `TransformGen` can be applied with :func:`apply_transform_gens`. - """ - - def _init(self, params=None): - if params: - for k, v in params.items(): - if k != "self" and not k.startswith("_"): - setattr(self, k, v) - - @abstractmethod - def get_transform(self, img): - pass - - def _rand_range(self, low=1.0, high=None, size=None): - """ - Uniform float random number between low and high. - """ - if high is None: - low, high = 0, low - if size is None: - size = [] - return np.random.uniform(low, high, size) - - def __repr__(self): - """ - Produce something like: - "MyTransformGen(field1={self.field1}, field2={self.field2})" - """ - try: - sig = inspect.signature(self.__init__) - classname = type(self).__name__ - argstr = [] - for name, param in sig.parameters.items(): - assert ( - param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD - ), "The default __repr__ doesn't support *args or **kwargs" - assert hasattr(self, name), ( - "Attribute {} not found! " - "Default __repr__ only works if attributes match the constructor.".format(name) - ) - attr = getattr(self, name) - default = param.default - if default is attr: - continue - argstr.append("{}={}".format(name, pprint.pformat(attr))) - return "{}({})".format(classname, ", ".join(argstr)) - except AssertionError: - return super().__repr__() - - __str__ = __repr__ - - -class RandomApply(TransformGen): - """ - Randomly apply the wrapper transformation with a given probability. - """ - - def __init__(self, transform, prob=0.5): - """ - Args: - transform (Transform, TransformGen): the transform to be wrapped - by the `RandomApply`. The `transform` can either be a - `Transform` or `TransformGen` instance. - prob (float): probability between 0.0 and 1.0 that - the wrapper transformation is applied - """ - super().__init__() - assert isinstance(transform, (Transform, TransformGen)), ( - f"The given transform must either be a Transform or TransformGen instance. " - f"Not {type(transform)}" - ) - assert 0.0 <= prob <= 1.0, f"Probablity must be between 0.0 and 1.0 (given: {prob})" - self.prob = prob - self.transform = transform - - def get_transform(self, img): - do = self._rand_range() < self.prob - if do: - if isinstance(self.transform, TransformGen): - return self.transform.get_transform(img) - else: - return self.transform - else: - return NoOpTransform() - - -class RandomFlip(TransformGen): - """ - Flip the image horizontally or vertically with the given probability. - """ - - def __init__(self, prob=0.5, *, horizontal=True, vertical=False): - """ - Args: - prob (float): probability of flip. - horizontal (boolean): whether to apply horizontal flipping - vertical (boolean): whether to apply vertical flipping - """ - super().__init__() - - if horizontal and vertical: - raise ValueError("Cannot do both horiz and vert. Please use two Flip instead.") - if not horizontal and not vertical: - raise ValueError("At least one of horiz or vert has to be True!") - self._init(locals()) - - def get_transform(self, img): - h, w = img.shape[:2] - do = self._rand_range() < self.prob - if do: - if self.horizontal: - return HFlipTransform(w) - elif self.vertical: - return VFlipTransform(h) - else: - return NoOpTransform() - - -class Resize(TransformGen): - """ Resize image to a target size""" - - def __init__(self, shape, interp=Image.BILINEAR): - """ - Args: - shape: (h, w) tuple or a int - interp: PIL interpolation method - """ - if isinstance(shape, int): - shape = (shape, shape) - shape = tuple(shape) - self._init(locals()) - - def get_transform(self, img): - return ResizeTransform( - img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp - ) - - -class ResizeShortestEdge(TransformGen): - """ - Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. - If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. - """ - - def __init__( - self, short_edge_length, max_size=sys.maxsize, sample_style="range", interp=Image.BILINEAR - ): - """ - Args: - short_edge_length (list[int]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the shortest edge length. - If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. - max_size (int): maximum allowed longest edge length. - sample_style (str): either "range" or "choice". - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - - self.is_range = sample_style == "range" - if isinstance(short_edge_length, int): - short_edge_length = (short_edge_length, short_edge_length) - self._init(locals()) - - def get_transform(self, img): - h, w = img.shape[:2] - - if self.is_range: - size = np.random.randint(self.short_edge_length[0], self.short_edge_length[1] + 1) - else: - size = np.random.choice(self.short_edge_length) - if size == 0: - return NoOpTransform() - - scale = size * 1.0 / min(h, w) - if h < w: - newh, neww = size, scale * w - else: - newh, neww = scale * h, size - if max(newh, neww) > self.max_size: - scale = self.max_size * 1.0 / max(newh, neww) - newh = newh * scale - neww = neww * scale - neww = int(neww + 0.5) - newh = int(newh + 0.5) - return ResizeTransform(h, w, newh, neww, self.interp) - - -class RandomRotation(TransformGen): - """ - This method returns a copy of this image, rotated the given - number of degrees counter clockwise around the given center. - """ - - def __init__(self, angle, expand=True, center=None, sample_style="range", interp=None): - """ - Args: - angle (list[float]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the angle (in degrees). - If ``sample_style=="choice"``, a list of angles to sample from - expand (bool): choose if the image should be resized to fit the whole - rotated image (default), or simply cropped - center (list[[float, float]]): If ``sample_style=="range"``, - a [[minx, miny], [maxx, maxy]] relative interval from which to sample the center, - [0, 0] being the top left of the image and [1, 1] the bottom right. - If ``sample_style=="choice"``, a list of centers to sample from - Default: None, which means that the center of rotation is the center of the image - center has no effect if expand=True because it only affects shifting - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - self.is_range = sample_style == "range" - if isinstance(angle, (float, int)): - angle = (angle, angle) - if center is not None and isinstance(center[0], (float, int)): - center = (center, center) - self._init(locals()) - - def get_transform(self, img): - h, w = img.shape[:2] - center = None - if self.is_range: - angle = np.random.uniform(self.angle[0], self.angle[1]) - if self.center is not None: - center = ( - np.random.uniform(self.center[0][0], self.center[1][0]), - np.random.uniform(self.center[0][1], self.center[1][1]), - ) - else: - angle = np.random.choice(self.angle) - if self.center is not None: - center = np.random.choice(self.center) - - if center is not None: - center = (w * center[0], h * center[1]) # Convert to absolute coordinates - - return RotationTransform(h, w, angle, expand=self.expand, center=center, interp=self.interp) - - -class RandomCrop(TransformGen): - """ - Randomly crop a subimage out of an image. - """ - - def __init__(self, crop_type: str, crop_size): - """ - Args: - crop_type (str): one of "relative_range", "relative", "absolute". - See `config/defaults.py` for explanation. - crop_size (tuple[float]): the relative ratio or absolute pixels of - height and width - """ - super().__init__() - assert crop_type in ["relative_range", "relative", "absolute"] - self._init(locals()) - - def get_transform(self, img): - h, w = img.shape[:2] - croph, cropw = self.get_crop_size((h, w)) - assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(self) - h0 = np.random.randint(h - croph + 1) - w0 = np.random.randint(w - cropw + 1) - return CropTransform(w0, h0, cropw, croph) - - def get_crop_size(self, image_size): - """ - Args: - image_size (tuple): height, width - - Returns: - crop_size (tuple): height, width in absolute pixels - """ - h, w = image_size - if self.crop_type == "relative": - ch, cw = self.crop_size - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "relative_range": - crop_size = np.asarray(self.crop_size, dtype=np.float32) - ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "absolute": - return (min(self.crop_size[0], h), min(self.crop_size[1], w)) - else: - NotImplementedError("Unknown crop type {}".format(self.crop_type)) - - -class RandomExtent(TransformGen): - """ - Outputs an image by cropping a random "subrect" of the source image. - - The subrect can be parameterized to include pixels outside the source image, - in which case they will be set to zeros (i.e. black). The size of the output - image will vary with the size of the random subrect. - """ - - def __init__(self, scale_range, shift_range): - """ - Args: - output_size (h, w): Dimensions of output image - scale_range (l, h): Range of input-to-output size scaling factor - shift_range (x, y): Range of shifts of the cropped subrect. The rect - is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], - where (w, h) is the (width, height) of the input image. Set each - component to zero to crop at the image's center. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img): - img_h, img_w = img.shape[:2] - - # Initialize src_rect to fit the input image. - src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) - - # Apply a random scaling to the src_rect. - src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) - - # Apply a random shift to the coordinates origin. - src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) - src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) - - # Map src_rect coordinates into image coordinates (center at corner). - src_rect[0::2] += 0.5 * img_w - src_rect[1::2] += 0.5 * img_h - - return ExtentTransform( - src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]), - output_size=(int(src_rect[3] - src_rect[1]), int(src_rect[2] - src_rect[0])), - ) - - -class RandomContrast(TransformGen): - """ - Randomly transforms image contrast. - - Contrast intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce contrast - - intensity = 1 will preserve the input image - - intensity > 1 will increase contrast - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation - intensity_max (float): Maximum augmentation - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img): - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w) - - -class RandomBrightness(TransformGen): - """ - Randomly transforms image brightness. - - Brightness intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce brightness - - intensity = 1 will preserve the input image - - intensity > 1 will increase brightness - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation - intensity_max (float): Maximum augmentation - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img): - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w) - - -class RandomSaturation(TransformGen): - """ - Randomly transforms image saturation. - - Saturation intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce saturation (make the image more grayscale) - - intensity = 1 will preserve the input image - - intensity > 1 will increase saturation - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max): - """ - Args: - intensity_min (float): Minimum augmentation (1 preserves input). - intensity_max (float): Maximum augmentation (1 preserves input). - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img): - assert img.shape[-1] == 3, "Saturation only works on RGB images" - w = np.random.uniform(self.intensity_min, self.intensity_max) - grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] - return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w) - - -class RandomLighting(TransformGen): - """ - Randomly transforms image color using fixed PCA over ImageNet. - - The degree of color jittering is randomly sampled via a normal distribution, - with standard deviation given by the scale parameter. - """ - - def __init__(self, scale): - """ - Args: - scale (float): Standard deviation of principal component weighting. - """ - super().__init__() - self._init(locals()) - self.eigen_vecs = np.array( - [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.8140], [-0.5836, -0.6948, 0.4203]] - ) - self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) - - def get_transform(self, img): - assert img.shape[-1] == 3, "Saturation only works on RGB images" - weights = np.random.normal(scale=self.scale, size=3) - return BlendTransform( - src_image=self.eigen_vecs.dot(weights * self.eigen_vals), src_weight=1.0, dst_weight=1.0 - ) - - -def apply_transform_gens(transform_gens, img): - """ - Apply a list of :class:`TransformGen` or :class:`Transform` on the input image, and - returns the transformed image and a list of transforms. - - We cannot simply create and return all transforms without - applying it to the image, because a subsequent transform may - need the output of the previous one. - - Args: - transform_gens (list): list of :class:`TransformGen` or :class:`Transform` instance to - be applied. - img (ndarray): uint8 or floating point images with 1 or 3 channels. - - Returns: - ndarray: the transformed image - TransformList: contain the transforms that's used. - """ - for g in transform_gens: - assert isinstance(g, (Transform, TransformGen)), g - - check_dtype(img) - - tfms = [] - for g in transform_gens: - tfm = g.get_transform(img) if isinstance(g, TransformGen) else g - assert isinstance( - tfm, Transform - ), "TransformGen {} must return an instance of Transform! Got {} instead".format(g, tfm) - img = tfm.apply_image(img) - tfms.append(tfm) - return img, TransformList(tfms) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py deleted file mode 100644 index 6a4538d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -from .launch import * -from .train_loop import * - -__all__ = [k for k in globals().keys() if not k.startswith("_")] - - -# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) -# but still make them available here -from .hooks import * -from .defaults import * diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py deleted file mode 100644 index db9ab68..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/defaults.py +++ /dev/null @@ -1,531 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -This file contains components with some default boilerplate logic user may need -in training / testing. They will not work for everyone, but many users may find them useful. - -The behavior of functions/classes in this file is subject to change, -since they are meant to represent the "common default behavior" people need in their projects. -""" - -import argparse -import logging -import os -import sys -from collections import OrderedDict -import torch -from fvcore.common.file_io import PathManager -from fvcore.nn.precise_bn import get_bn_modules -from torch.nn.parallel import DistributedDataParallel - -import detectron2.data.transforms as T -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.data import ( - MetadataCatalog, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.evaluation import ( - DatasetEvaluator, - inference_on_dataset, - print_csv_format, - verify_results, -) -from detectron2.modeling import build_model -from detectron2.solver import build_lr_scheduler, build_optimizer -from detectron2.utils import comm -from detectron2.utils.collect_env import collect_env_info -from detectron2.utils.env import seed_all_rng -from detectron2.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter -from detectron2.utils.logger import setup_logger - -from . import hooks -from .train_loop import SimpleTrainer - -__all__ = ["default_argument_parser", "default_setup", "DefaultPredictor", "DefaultTrainer"] - - -def default_argument_parser(epilog=None): - """ - Create a parser with some common arguments used by detectron2 users. - - Args: - epilog (str): epilog passed to ArgumentParser describing the usage. - - Returns: - argparse.ArgumentParser: - """ - parser = argparse.ArgumentParser( - epilog=epilog - or f""" -Examples: - -Run on single machine: - $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth - -Run on multiple machines: - (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] - (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] -""", - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") - parser.add_argument( - "--resume", - action="store_true", - help="whether to attempt to resume from the checkpoint directory", - ) - parser.add_argument("--eval-only", action="store_true", help="perform evaluation only") - parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*") - parser.add_argument("--num-machines", type=int, default=1, help="total number of machines") - parser.add_argument( - "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)" - ) - - # PyTorch still may leave orphan processes in multi-gpu training. - # Therefore we use a deterministic way to obtain port, - # so that users are aware of orphan processes by seeing the port occupied. - port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 - parser.add_argument( - "--dist-url", - default="tcp://127.0.0.1:{}".format(port), - help="initialization URL for pytorch distributed backend. See " - "https://pytorch.org/docs/stable/distributed.html for details.", - ) - parser.add_argument( - "opts", - help="Modify config options using the command-line", - default=None, - nargs=argparse.REMAINDER, - ) - return parser - - -def default_setup(cfg, args): - """ - Perform some basic common setups at the beginning of a job, including: - - 1. Set up the detectron2 logger - 2. Log basic information about environment, cmdline arguments, and config - 3. Backup the config to the output directory - - Args: - cfg (CfgNode): the full config to be used - args (argparse.NameSpace): the command line arguments to be logged - """ - output_dir = cfg.OUTPUT_DIR - if comm.is_main_process() and output_dir: - PathManager.mkdirs(output_dir) - - rank = comm.get_rank() - setup_logger(output_dir, distributed_rank=rank, name="fvcore") - logger = setup_logger(output_dir, distributed_rank=rank) - - logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) - logger.info("Environment info:\n" + collect_env_info()) - - logger.info("Command line arguments: " + str(args)) - if hasattr(args, "config_file") and args.config_file != "": - logger.info( - "Contents of args.config_file={}:\n{}".format( - args.config_file, PathManager.open(args.config_file, "r").read() - ) - ) - - logger.info("Running with full config:\n{}".format(cfg)) - if comm.is_main_process() and output_dir: - # Note: some of our scripts may expect the existence of - # config.yaml in output directory - path = os.path.join(output_dir, "config.yaml") - with PathManager.open(path, "w") as f: - f.write(cfg.dump()) - logger.info("Full config saved to {}".format(path)) - - # make sure each worker has a different, yet deterministic seed if specified - seed_all_rng(None if cfg.SEED < 0 else cfg.SEED + rank) - - # cudnn benchmark has large overhead. It shouldn't be used considering the small size of - # typical validation set. - if not (hasattr(args, "eval_only") and args.eval_only): - torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK - - -class DefaultPredictor: - """ - Create a simple end-to-end predictor with the given config that runs on - single device for a single input image. - - Compared to using the model directly, this class does the following additions: - - 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. - 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. - 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. - 4. Take one input image and produce a single output, instead of a batch. - - If you'd like to do anything more fancy, please refer to its source code - as examples to build and use the model manually. - - Attributes: - metadata (Metadata): the metadata of the underlying dataset, obtained from - cfg.DATASETS.TEST. - - Examples: - - .. code-block:: python - - pred = DefaultPredictor(cfg) - inputs = cv2.imread("input.jpg") - outputs = pred(inputs) - """ - - def __init__(self, cfg): - self.cfg = cfg.clone() # cfg can be modified by model - self.model = build_model(self.cfg) - self.model.eval() - self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0]) - - checkpointer = DetectionCheckpointer(self.model) - checkpointer.load(cfg.MODEL.WEIGHTS) - - self.transform_gen = T.ResizeShortestEdge( - [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST - ) - - self.input_format = cfg.INPUT.FORMAT - assert self.input_format in ["RGB", "BGR"], self.input_format - - def __call__(self, original_image): - """ - Args: - original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). - - Returns: - predictions (dict): - the output of the model for one image only. - See :doc:`/tutorials/models` for details about the format. - """ - with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258 - # Apply pre-processing to image. - if self.input_format == "RGB": - # whether the model expects BGR inputs or RGB - original_image = original_image[:, :, ::-1] - height, width = original_image.shape[:2] - image = self.transform_gen.get_transform(original_image).apply_image(original_image) - image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) - - inputs = {"image": image, "height": height, "width": width} - predictions = self.model([inputs])[0] - return predictions - - -class DefaultTrainer(SimpleTrainer): - """ - A trainer with default training logic. Compared to `SimpleTrainer`, it - contains the following logic in addition: - - 1. Create model, optimizer, scheduler, dataloader from the given config. - 2. Load a checkpoint or `cfg.MODEL.WEIGHTS`, if exists, when - `resume_or_load` is called. - 3. Register a few common hooks. - - It is created to simplify the **standard model training workflow** and reduce code boilerplate - for users who only need the standard training workflow, with standard features. - It means this class makes *many assumptions* about your training logic that - may easily become invalid in a new research. In fact, any assumptions beyond those made in the - :class:`SimpleTrainer` are too much for research. - - The code of this class has been annotated about restrictive assumptions it mades. - When they do not work for you, you're encouraged to: - - 1. Overwrite methods of this class, OR: - 2. Use :class:`SimpleTrainer`, which only does minimal SGD training and - nothing else. You can then add your own hooks if needed. OR: - 3. Write your own training loop similar to `tools/plain_train_net.py`. - - Also note that the behavior of this class, like other functions/classes in - this file, is not stable, since it is meant to represent the "common default behavior". - It is only guaranteed to work well with the standard models and training workflow in detectron2. - To obtain more stable behavior, write your own training logic with other public APIs. - - Examples: - - .. code-block:: python - - trainer = DefaultTrainer(cfg) - trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS - trainer.train() - - Attributes: - scheduler: - checkpointer (DetectionCheckpointer): - cfg (CfgNode): - """ - - def __init__(self, cfg): - """ - Args: - cfg (CfgNode): - """ - logger = logging.getLogger("detectron2") - if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for d2 - setup_logger() - # Assume these objects must be constructed in this order. - model = self.build_model(cfg) - optimizer = self.build_optimizer(cfg, model) - data_loader = self.build_train_loader(cfg) - - # For training, wrap with DDP. But don't need this for inference. - if comm.get_world_size() > 1: - model = DistributedDataParallel( - model, device_ids=[comm.get_local_rank()], broadcast_buffers=False - ) - super().__init__(model, data_loader, optimizer) - - self.scheduler = self.build_lr_scheduler(cfg, optimizer) - # Assume no other objects need to be checkpointed. - # We can later make it checkpoint the stateful hooks - self.checkpointer = DetectionCheckpointer( - # Assume you want to save checkpoints together with logs/statistics - model, - cfg.OUTPUT_DIR, - optimizer=optimizer, - scheduler=self.scheduler, - ) - self.start_iter = 0 - self.max_iter = cfg.SOLVER.MAX_ITER - self.cfg = cfg - - self.register_hooks(self.build_hooks()) - - def resume_or_load(self, resume=True): - """ - If `resume==True`, and last checkpoint exists, resume from it and load all - checkpointables (eg. optimizer and scheduler). - - Otherwise, load the model specified by the config (skip all checkpointables). - - Args: - resume (bool): whether to do resume or not - """ - checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume) - self.start_iter = checkpoint.get("iteration", -1) if resume else -1 - # The checkpoint stores the training iteration that just finished, thus we start - # at the next iteration (or iter zero if there's no checkpoint). - self.start_iter += 1 - - def build_hooks(self): - """ - Build a list of default hooks, including timing, evaluation, - checkpointing, lr scheduling, precise BN, writing events. - - Returns: - list[HookBase]: - """ - cfg = self.cfg.clone() - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN - - ret = [ - hooks.IterationTimer(), - hooks.LRScheduler(self.optimizer, self.scheduler), - hooks.PreciseBN( - # Run at the same freq as (but before) evaluation. - cfg.TEST.EVAL_PERIOD, - self.model, - # Build a new data loader to not affect training - self.build_train_loader(cfg), - cfg.TEST.PRECISE_BN.NUM_ITER, - ) - if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model) - else None, - ] - - # Do PreciseBN before checkpointer, because it updates the model and need to - # be saved by checkpointer. - # This is not always the best: if checkpointing has a different frequency, - # some checkpoints may have more precise statistics than others. - if comm.is_main_process(): - ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) - - def test_and_save_results(): - self._last_eval_results = self.test(self.cfg, self.model) - return self._last_eval_results - - # Do evaluation after checkpointer, because then if it fails, - # we can use the saved checkpoint to debug. - ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) - - if comm.is_main_process(): - # run writers in the end, so that evaluation metrics are written - ret.append(hooks.PeriodicWriter(self.build_writers(), period=20)) - return ret - - def build_writers(self): - """ - Build a list of writers to be used. By default it contains - writers that write metrics to the screen, - a json file, and a tensorboard event file respectively. - If you'd like a different list of writers, you can overwrite it in - your trainer. - - Returns: - list[EventWriter]: a list of :class:`EventWriter` objects. - - It is now implemented by: - - .. code-block:: python - - return [ - CommonMetricPrinter(self.max_iter), - JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")), - TensorboardXWriter(self.cfg.OUTPUT_DIR), - ] - - """ - # Here the default print/log frequency of each writer is used. - return [ - # It may not always print what you want to see, since it prints "common" metrics only. - CommonMetricPrinter(self.max_iter), - JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")), - TensorboardXWriter(self.cfg.OUTPUT_DIR), - ] - - def train(self): - """ - Run training. - - Returns: - OrderedDict of results, if evaluation is enabled. Otherwise None. - """ - super().train(self.start_iter, self.max_iter) - if len(self.cfg.TEST.EXPECTED_RESULTS) and comm.is_main_process(): - assert hasattr( - self, "_last_eval_results" - ), "No evaluation results obtained during training!" - verify_results(self.cfg, self._last_eval_results) - return self._last_eval_results - - @classmethod - def build_model(cls, cfg): - """ - Returns: - torch.nn.Module: - - It now calls :func:`detectron2.modeling.build_model`. - Overwrite it if you'd like a different model. - """ - model = build_model(cfg) - logger = logging.getLogger(__name__) - logger.info("Model:\n{}".format(model)) - return model - - @classmethod - def build_optimizer(cls, cfg, model): - """ - Returns: - torch.optim.Optimizer: - - It now calls :func:`detectron2.solver.build_optimizer`. - Overwrite it if you'd like a different optimizer. - """ - return build_optimizer(cfg, model) - - @classmethod - def build_lr_scheduler(cls, cfg, optimizer): - """ - It now calls :func:`detectron2.solver.build_lr_scheduler`. - Overwrite it if you'd like a different scheduler. - """ - return build_lr_scheduler(cfg, optimizer) - - @classmethod - def build_train_loader(cls, cfg): - """ - Returns: - iterable - - It now calls :func:`detectron2.data.build_detection_train_loader`. - Overwrite it if you'd like a different data loader. - """ - return build_detection_train_loader(cfg) - - @classmethod - def build_test_loader(cls, cfg, dataset_name): - """ - Returns: - iterable - - It now calls :func:`detectron2.data.build_detection_test_loader`. - Overwrite it if you'd like a different data loader. - """ - return build_detection_test_loader(cfg, dataset_name) - - @classmethod - def build_evaluator(cls, cfg, dataset_name): - """ - Returns: - DatasetEvaluator or None - - It is not implemented by default. - """ - raise NotImplementedError( - """ -If you want DefaultTrainer to automatically run evaluation, -please implement `build_evaluator()` in subclasses (see train_net.py for example). -Alternatively, you can call evaluation functions yourself (see Colab balloon tutorial for example). -""" - ) - - @classmethod - def test(cls, cfg, model, evaluators=None): - """ - Args: - cfg (CfgNode): - model (nn.Module): - evaluators (list[DatasetEvaluator] or None): if None, will call - :meth:`build_evaluator`. Otherwise, must have the same length as - `cfg.DATASETS.TEST`. - - Returns: - dict: a dict of result metrics - """ - logger = logging.getLogger(__name__) - if isinstance(evaluators, DatasetEvaluator): - evaluators = [evaluators] - if evaluators is not None: - assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( - len(cfg.DATASETS.TEST), len(evaluators) - ) - - results = OrderedDict() - for idx, dataset_name in enumerate(cfg.DATASETS.TEST): - data_loader = cls.build_test_loader(cfg, dataset_name) - # When evaluators are passed in as arguments, - # implicitly assume that evaluators can be created before data_loader. - if evaluators is not None: - evaluator = evaluators[idx] - else: - try: - evaluator = cls.build_evaluator(cfg, dataset_name) - except NotImplementedError: - logger.warn( - "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " - "or implement its `build_evaluator` method." - ) - results[dataset_name] = {} - continue - results_i = inference_on_dataset(model, data_loader, evaluator) - results[dataset_name] = results_i - if comm.is_main_process(): - assert isinstance( - results_i, dict - ), "Evaluator must return a dict on the main process. Got {} instead.".format( - results_i - ) - logger.info("Evaluation results for {} in csv format:".format(dataset_name)) - print_csv_format(results_i) - - if len(results) == 1: - results = list(results.values())[0] - return results diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py deleted file mode 100644 index e5085b4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/hooks.py +++ /dev/null @@ -1,427 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import datetime -import itertools -import logging -import os -import tempfile -import time -from collections import Counter -import torch -from fvcore.common.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer -from fvcore.common.file_io import PathManager -from fvcore.common.timer import Timer -from fvcore.nn.precise_bn import get_bn_modules, update_bn_stats - -import detectron2.utils.comm as comm -from detectron2.evaluation.testing import flatten_results_dict -from detectron2.utils.events import EventStorage, EventWriter - -from .train_loop import HookBase - -__all__ = [ - "CallbackHook", - "IterationTimer", - "PeriodicWriter", - "PeriodicCheckpointer", - "LRScheduler", - "AutogradProfiler", - "EvalHook", - "PreciseBN", -] - - -""" -Implement some common hooks. -""" - - -class CallbackHook(HookBase): - """ - Create a hook using callback functions provided by the user. - """ - - def __init__(self, *, before_train=None, after_train=None, before_step=None, after_step=None): - """ - Each argument is a function that takes one argument: the trainer. - """ - self._before_train = before_train - self._before_step = before_step - self._after_step = after_step - self._after_train = after_train - - def before_train(self): - if self._before_train: - self._before_train(self.trainer) - - def after_train(self): - if self._after_train: - self._after_train(self.trainer) - # The functions may be closures that hold reference to the trainer - # Therefore, delete them to avoid circular reference. - del self._before_train, self._after_train - del self._before_step, self._after_step - - def before_step(self): - if self._before_step: - self._before_step(self.trainer) - - def after_step(self): - if self._after_step: - self._after_step(self.trainer) - - -class IterationTimer(HookBase): - """ - Track the time spent for each iteration (each run_step call in the trainer). - Print a summary in the end of training. - - This hook uses the time between the call to its :meth:`before_step` - and :meth:`after_step` methods. - Under the convention that :meth:`before_step` of all hooks should only - take negligible amount of time, the :class:`IterationTimer` hook should be - placed at the beginning of the list of hooks to obtain accurate timing. - """ - - def __init__(self, warmup_iter=3): - """ - Args: - warmup_iter (int): the number of iterations at the beginning to exclude - from timing. - """ - self._warmup_iter = warmup_iter - self._step_timer = Timer() - self._start_time = time.perf_counter() - self._total_timer = Timer() - - def before_train(self): - self._start_time = time.perf_counter() - self._total_timer.reset() - self._total_timer.pause() - - def after_train(self): - logger = logging.getLogger(__name__) - total_time = time.perf_counter() - self._start_time - total_time_minus_hooks = self._total_timer.seconds() - hook_time = total_time - total_time_minus_hooks - - num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter - - if num_iter > 0 and total_time_minus_hooks > 0: - # Speed is meaningful only after warmup - # NOTE this format is parsed by grep in some scripts - logger.info( - "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( - num_iter, - str(datetime.timedelta(seconds=int(total_time_minus_hooks))), - total_time_minus_hooks / num_iter, - ) - ) - - logger.info( - "Total training time: {} ({} on hooks)".format( - str(datetime.timedelta(seconds=int(total_time))), - str(datetime.timedelta(seconds=int(hook_time))), - ) - ) - - def before_step(self): - self._step_timer.reset() - self._total_timer.resume() - - def after_step(self): - # +1 because we're in after_step - iter_done = self.trainer.iter - self.trainer.start_iter + 1 - if iter_done >= self._warmup_iter: - sec = self._step_timer.seconds() - self.trainer.storage.put_scalars(time=sec) - else: - self._start_time = time.perf_counter() - self._total_timer.reset() - - self._total_timer.pause() - - -class PeriodicWriter(HookBase): - """ - Write events to EventStorage periodically. - - It is executed every ``period`` iterations and after the last iteration. - """ - - def __init__(self, writers, period=20): - """ - Args: - writers (list[EventWriter]): a list of EventWriter objects - period (int): - """ - self._writers = writers - for w in writers: - assert isinstance(w, EventWriter), w - self._period = period - - def after_step(self): - if (self.trainer.iter + 1) % self._period == 0 or ( - self.trainer.iter == self.trainer.max_iter - 1 - ): - for writer in self._writers: - writer.write() - - def after_train(self): - for writer in self._writers: - writer.close() - - -class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase): - """ - Same as :class:`detectron2.checkpoint.PeriodicCheckpointer`, but as a hook. - - Note that when used as a hook, - it is unable to save additional data other than what's defined - by the given `checkpointer`. - - It is executed every ``period`` iterations and after the last iteration. - """ - - def before_train(self): - self.max_iter = self.trainer.max_iter - - def after_step(self): - # No way to use **kwargs - self.step(self.trainer.iter) - - -class LRScheduler(HookBase): - """ - A hook which executes a torch builtin LR scheduler and summarizes the LR. - It is executed after every iteration. - """ - - def __init__(self, optimizer, scheduler): - """ - Args: - optimizer (torch.optim.Optimizer): - scheduler (torch.optim._LRScheduler) - """ - self._optimizer = optimizer - self._scheduler = scheduler - - # NOTE: some heuristics on what LR to summarize - # summarize the param group with most parameters - largest_group = max(len(g["params"]) for g in optimizer.param_groups) - - if largest_group == 1: - # If all groups have one parameter, - # then find the most common initial LR, and use it for summary - lr_count = Counter([g["lr"] for g in optimizer.param_groups]) - lr = lr_count.most_common()[0][0] - for i, g in enumerate(optimizer.param_groups): - if g["lr"] == lr: - self._best_param_group_id = i - break - else: - for i, g in enumerate(optimizer.param_groups): - if len(g["params"]) == largest_group: - self._best_param_group_id = i - break - - def after_step(self): - lr = self._optimizer.param_groups[self._best_param_group_id]["lr"] - self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False) - self._scheduler.step() - - -class AutogradProfiler(HookBase): - """ - A hook which runs `torch.autograd.profiler.profile`. - - Examples: - - .. code-block:: python - - hooks.AutogradProfiler( - lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR - ) - - The above example will run the profiler for iteration 10~20 and dump - results to ``OUTPUT_DIR``. We did not profile the first few iterations - because they are typically slower than the rest. - The result files can be loaded in the ``chrome://tracing`` page in chrome browser. - - Note: - When used together with NCCL on older version of GPUs, - autograd profiler may cause deadlock because it unnecessarily allocates - memory on every device it sees. The memory management calls, if - interleaved with NCCL calls, lead to deadlock on GPUs that do not - support `cudaLaunchCooperativeKernelMultiDevice`. - """ - - def __init__(self, enable_predicate, output_dir, *, use_cuda=True): - """ - Args: - enable_predicate (callable[trainer -> bool]): a function which takes a trainer, - and returns whether to enable the profiler. - It will be called once every step, and can be used to select which steps to profile. - output_dir (str): the output directory to dump tracing files. - use_cuda (bool): same as in `torch.autograd.profiler.profile`. - """ - self._enable_predicate = enable_predicate - self._use_cuda = use_cuda - self._output_dir = output_dir - - def before_step(self): - if self._enable_predicate(self.trainer): - self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda) - self._profiler.__enter__() - else: - self._profiler = None - - def after_step(self): - if self._profiler is None: - return - self._profiler.__exit__(None, None, None) - PathManager.mkdirs(self._output_dir) - out_file = os.path.join( - self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter) - ) - if "://" not in out_file: - self._profiler.export_chrome_trace(out_file) - else: - # Support non-posix filesystems - with tempfile.TemporaryDirectory(prefix="detectron2_profiler") as d: - tmp_file = os.path.join(d, "tmp.json") - self._profiler.export_chrome_trace(tmp_file) - with open(tmp_file) as f: - content = f.read() - with PathManager.open(out_file, "w") as f: - f.write(content) - - -class EvalHook(HookBase): - """ - Run an evaluation function periodically, and at the end of training. - - It is executed every ``eval_period`` iterations and after the last iteration. - """ - - def __init__(self, eval_period, eval_function): - """ - Args: - eval_period (int): the period to run `eval_function`. - eval_function (callable): a function which takes no arguments, and - returns a nested dict of evaluation metrics. - - Note: - This hook must be enabled in all or none workers. - If you would like only certain workers to perform evaluation, - give other workers a no-op function (`eval_function=lambda: None`). - """ - self._period = eval_period - self._func = eval_function - - def _do_eval(self): - results = self._func() - - if results: - assert isinstance( - results, dict - ), "Eval function must return a dict. Got {} instead.".format(results) - - flattened_results = flatten_results_dict(results) - for k, v in flattened_results.items(): - try: - v = float(v) - except Exception: - raise ValueError( - "[EvalHook] eval_function should return a nested dict of float. " - "Got '{}: {}' instead.".format(k, v) - ) - self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False) - - # Evaluation may take different time among workers. - # A barrier make them start the next iteration together. - comm.synchronize() - - def after_step(self): - next_iter = self.trainer.iter + 1 - is_final = next_iter == self.trainer.max_iter - if is_final or (self._period > 0 and next_iter % self._period == 0): - self._do_eval() - - def after_train(self): - # func is likely a closure that holds reference to the trainer - # therefore we clean it to avoid circular reference in the end - del self._func - - -class PreciseBN(HookBase): - """ - The standard implementation of BatchNorm uses EMA in inference, which is - sometimes suboptimal. - This class computes the true average of statistics rather than the moving average, - and put true averages to every BN layer in the given model. - - It is executed every ``period`` iterations and after the last iteration. - """ - - def __init__(self, period, model, data_loader, num_iter): - """ - Args: - period (int): the period this hook is run, or 0 to not run during training. - The hook will always run in the end of training. - model (nn.Module): a module whose all BN layers in training mode will be - updated by precise BN. - Note that user is responsible for ensuring the BN layers to be - updated are in training mode when this hook is triggered. - data_loader (iterable): it will produce data to be run by `model(data)`. - num_iter (int): number of iterations used to compute the precise - statistics. - """ - self._logger = logging.getLogger(__name__) - if len(get_bn_modules(model)) == 0: - self._logger.info( - "PreciseBN is disabled because model does not contain BN layers in training mode." - ) - self._disabled = True - return - - self._model = model - self._data_loader = data_loader - self._num_iter = num_iter - self._period = period - self._disabled = False - - self._data_iter = None - - def after_step(self): - next_iter = self.trainer.iter + 1 - is_final = next_iter == self.trainer.max_iter - if is_final or (self._period > 0 and next_iter % self._period == 0): - self.update_stats() - - def update_stats(self): - """ - Update the model with precise statistics. Users can manually call this method. - """ - if self._disabled: - return - - if self._data_iter is None: - self._data_iter = iter(self._data_loader) - - def data_loader(): - for num_iter in itertools.count(1): - if num_iter % 100 == 0: - self._logger.info( - "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter) - ) - # This way we can reuse the same iterator - yield next(self._data_iter) - - with EventStorage(): # capture events in a new storage to discard them - self._logger.info( - "Running precise-BN for {} iterations... ".format(self._num_iter) - + "Note that this could produce different statistics every time." - ) - update_bn_stats(self._model, data_loader(), self._num_iter) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py deleted file mode 100644 index 9efbb03..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/launch.py +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import torch -import torch.distributed as dist -import torch.multiprocessing as mp - -from detectron2.utils import comm - -__all__ = ["launch"] - - -def _find_free_port(): - import socket - - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Binding to port 0 will cause the OS to find an available port for us - sock.bind(("", 0)) - port = sock.getsockname()[1] - sock.close() - # NOTE: there is still a chance the port could be taken by other processes. - return port - - -def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()): - """ - Args: - main_func: a function that will be called by `main_func(*args)` - num_machines (int): the total number of machines - machine_rank (int): the rank of this machine (one per machine) - dist_url (str): url to connect to for distributed jobs, including protocol - e.g. "tcp://127.0.0.1:8686". - Can be set to "auto" to automatically select a free port on localhost - args (tuple): arguments passed to main_func - """ - world_size = num_machines * num_gpus_per_machine - if world_size > 1: - # https://github.com/pytorch/pytorch/pull/14391 - # TODO prctl in spawned processes - - if dist_url == "auto": - assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs." - port = _find_free_port() - dist_url = f"tcp://127.0.0.1:{port}" - if num_machines > 1 and dist_url.startswith("file://"): - logger = logging.getLogger(__name__) - logger.warning( - "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" - ) - - mp.spawn( - _distributed_worker, - nprocs=num_gpus_per_machine, - args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args), - daemon=False, - ) - else: - main_func(*args) - - -def _distributed_worker( - local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args -): - assert torch.cuda.is_available(), "cuda is not available. Please check your installation." - global_rank = machine_rank * num_gpus_per_machine + local_rank - try: - dist.init_process_group( - backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank - ) - except Exception as e: - logger = logging.getLogger(__name__) - logger.error("Process group URL: {}".format(dist_url)) - raise e - # synchronize is needed here to prevent a possible timeout after calling init_process_group - # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 - comm.synchronize() - - assert num_gpus_per_machine <= torch.cuda.device_count() - torch.cuda.set_device(local_rank) - - # Setup the local process group (which contains ranks within the same machine) - assert comm._LOCAL_PROCESS_GROUP is None - num_machines = world_size // num_gpus_per_machine - for i in range(num_machines): - ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) - pg = dist.new_group(ranks_on_i) - if i == machine_rank: - comm._LOCAL_PROCESS_GROUP = pg - - main_func(*args) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py deleted file mode 100644 index 453c9ac..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/engine/train_loop.py +++ /dev/null @@ -1,273 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import logging -import numpy as np -import time -import weakref -import torch - -import detectron2.utils.comm as comm -from detectron2.utils.events import EventStorage - -__all__ = ["HookBase", "TrainerBase", "SimpleTrainer"] - - -class HookBase: - """ - Base class for hooks that can be registered with :class:`TrainerBase`. - - Each hook can implement 4 methods. The way they are called is demonstrated - in the following snippet: - - .. code-block:: python - - hook.before_train() - for iter in range(start_iter, max_iter): - hook.before_step() - trainer.run_step() - hook.after_step() - hook.after_train() - - Notes: - 1. In the hook method, users can access `self.trainer` to access more - properties about the context (e.g., current iteration). - - 2. A hook that does something in :meth:`before_step` can often be - implemented equivalently in :meth:`after_step`. - If the hook takes non-trivial time, it is strongly recommended to - implement the hook in :meth:`after_step` instead of :meth:`before_step`. - The convention is that :meth:`before_step` should only take negligible time. - - Following this convention will allow hooks that do care about the difference - between :meth:`before_step` and :meth:`after_step` (e.g., timer) to - function properly. - - Attributes: - trainer: A weak reference to the trainer object. Set by the trainer when the hook is - registered. - """ - - def before_train(self): - """ - Called before the first iteration. - """ - pass - - def after_train(self): - """ - Called after the last iteration. - """ - pass - - def before_step(self): - """ - Called before each iteration. - """ - pass - - def after_step(self): - """ - Called after each iteration. - """ - pass - - -class TrainerBase: - """ - Base class for iterative trainer with hooks. - - The only assumption we made here is: the training runs in a loop. - A subclass can implement what the loop is. - We made no assumptions about the existence of dataloader, optimizer, model, etc. - - Attributes: - iter(int): the current iteration. - - start_iter(int): The iteration to start with. - By convention the minimum possible value is 0. - - max_iter(int): The iteration to end training. - - storage(EventStorage): An EventStorage that's opened during the course of training. - """ - - def __init__(self): - self._hooks = [] - - def register_hooks(self, hooks): - """ - Register hooks to the trainer. The hooks are executed in the order - they are registered. - - Args: - hooks (list[Optional[HookBase]]): list of hooks - """ - hooks = [h for h in hooks if h is not None] - for h in hooks: - assert isinstance(h, HookBase) - # To avoid circular reference, hooks and trainer cannot own each other. - # This normally does not matter, but will cause memory leak if the - # involved objects contain __del__: - # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ - h.trainer = weakref.proxy(self) - self._hooks.extend(hooks) - - def train(self, start_iter: int, max_iter: int): - """ - Args: - start_iter, max_iter (int): See docs above - """ - logger = logging.getLogger(__name__) - logger.info("Starting training from iteration {}".format(start_iter)) - - self.iter = self.start_iter = start_iter - self.max_iter = max_iter - - with EventStorage(start_iter) as self.storage: - try: - self.before_train() - for self.iter in range(start_iter, max_iter): - self.before_step() - self.run_step() - self.after_step() - except Exception: - logger.exception("Exception during training:") - raise - finally: - self.after_train() - - def before_train(self): - for h in self._hooks: - h.before_train() - - def after_train(self): - for h in self._hooks: - h.after_train() - - def before_step(self): - for h in self._hooks: - h.before_step() - - def after_step(self): - for h in self._hooks: - h.after_step() - # this guarantees, that in each hook's after_step, storage.iter == trainer.iter - self.storage.step() - - def run_step(self): - raise NotImplementedError - - -class SimpleTrainer(TrainerBase): - """ - A simple trainer for the most common type of task: - single-cost single-optimizer single-data-source iterative optimization. - It assumes that every step, you: - - 1. Compute the loss with a data from the data_loader. - 2. Compute the gradients with the above loss. - 3. Update the model with the optimizer. - - If you want to do anything fancier than this, - either subclass TrainerBase and implement your own `run_step`, - or write your own training loop. - """ - - def __init__(self, model, data_loader, optimizer): - """ - Args: - model: a torch Module. Takes a data from data_loader and returns a - dict of losses. - data_loader: an iterable. Contains data to be used to call model. - optimizer: a torch optimizer. - """ - super().__init__() - - """ - We set the model to training mode in the trainer. - However it's valid to train a model that's in eval mode. - If you want your model (or a submodule of it) to behave - like evaluation during training, you can overwrite its train() method. - """ - model.train() - - self.model = model - self.data_loader = data_loader - self._data_loader_iter = iter(data_loader) - self.optimizer = optimizer - - def run_step(self): - """ - Implement the standard training logic described above. - """ - assert self.model.training, "[SimpleTrainer] model was changed to eval mode!" - start = time.perf_counter() - """ - If you want to do something with the data, you can wrap the dataloader. - """ - data = next(self._data_loader_iter) - data_time = time.perf_counter() - start - - """ - If you want to do something with the losses, you can wrap the model. - """ - loss_dict = self.model(data) - losses = sum(loss_dict.values()) - self._detect_anomaly(losses, loss_dict) - - metrics_dict = loss_dict - metrics_dict["data_time"] = data_time - self._write_metrics(metrics_dict) - - """ - If you need to accumulate gradients or something similar, you can - wrap the optimizer with your custom `zero_grad()` method. - """ - self.optimizer.zero_grad() - losses.backward() - - """ - If you need gradient clipping/scaling or other processing, you can - wrap the optimizer with your custom `step()` method. - """ - self.optimizer.step() - - def _detect_anomaly(self, losses, loss_dict): - if not torch.isfinite(losses).all(): - raise FloatingPointError( - "Loss became infinite or NaN at iteration={}!\nloss_dict = {}".format( - self.iter, loss_dict - ) - ) - - def _write_metrics(self, metrics_dict: dict): - """ - Args: - metrics_dict (dict): dict of scalar metrics - """ - metrics_dict = { - k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) - for k, v in metrics_dict.items() - } - # gather metrics among all workers for logging - # This assumes we do DDP-style training, which is currently the only - # supported method in detectron2. - all_metrics_dict = comm.gather(metrics_dict) - - if comm.is_main_process(): - if "data_time" in all_metrics_dict[0]: - # data_time among workers can have high variance. The actual latency - # caused by data_time is the maximum among workers. - data_time = np.max([x.pop("data_time") for x in all_metrics_dict]) - self.storage.put_scalar("data_time", data_time) - - # average the rest metrics - metrics_dict = { - k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() - } - total_losses_reduced = sum(loss for loss in metrics_dict.values()) - - self.storage.put_scalar("total_loss", total_losses_reduced) - if len(metrics_dict) > 1: - self.storage.put_scalars(**metrics_dict) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py deleted file mode 100644 index f1d2f10..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator -from .coco_evaluation import COCOEvaluator -from .rotated_coco_evaluation import RotatedCOCOEvaluator -from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset -from .lvis_evaluation import LVISEvaluator -from .panoptic_evaluation import COCOPanopticEvaluator -from .pascal_voc_evaluation import PascalVOCDetectionEvaluator -from .sem_seg_evaluation import SemSegEvaluator -from .testing import print_csv_format, verify_results - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py deleted file mode 100644 index f6287a8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/cityscapes_evaluation.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import glob -import logging -import numpy as np -import os -import tempfile -from collections import OrderedDict -import torch -from fvcore.common.file_io import PathManager -from PIL import Image - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm - -from .evaluator import DatasetEvaluator - - -class CityscapesEvaluator(DatasetEvaluator): - """ - Base class for evaluation using cityscapes API. - """ - - def __init__(self, dataset_name): - """ - Args: - dataset_name (str): the name of the dataset. - It must have the following metadata associated with it: - "thing_classes", "gt_dir". - """ - self._metadata = MetadataCatalog.get(dataset_name) - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - def reset(self): - self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") - self._temp_dir = self._working_dir.name - # All workers will write to the same results directory - # TODO this does not work in distributed training - self._temp_dir = comm.all_gather(self._temp_dir)[0] - if self._temp_dir != self._working_dir.name: - self._working_dir.cleanup() - self._logger.info( - "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) - ) - - -class CityscapesInstanceEvaluator(CityscapesEvaluator): - """ - Evaluate instance segmentation results using cityscapes API. - - Note: - * It does not work in multi-machine distributed training. - * It contains a synchronization, therefore has to be used on all ranks. - * Only the main process runs evaluation. - """ - - def process(self, inputs, outputs): - from cityscapesscripts.helpers.labels import name2label - - for input, output in zip(inputs, outputs): - file_name = input["file_name"] - basename = os.path.splitext(os.path.basename(file_name))[0] - pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") - - output = output["instances"].to(self._cpu_device) - num_instances = len(output) - with open(pred_txt, "w") as fout: - for i in range(num_instances): - pred_class = output.pred_classes[i] - classes = self._metadata.thing_classes[pred_class] - class_id = name2label[classes].id - score = output.scores[i] - mask = output.pred_masks[i].numpy().astype("uint8") - png_filename = os.path.join( - self._temp_dir, basename + "_{}_{}.png".format(i, classes) - ) - - Image.fromarray(mask * 255).save(png_filename) - fout.write("{} {} {}\n".format(os.path.basename(png_filename), class_id, score)) - - def evaluate(self): - """ - Returns: - dict: has a key "segm", whose value is a dict of "AP" and "AP50". - """ - comm.synchronize() - if comm.get_rank() > 0: - return - import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval - - self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) - - # set some global states in cityscapes evaluation API, before evaluating - cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) - cityscapes_eval.args.predictionWalk = None - cityscapes_eval.args.JSONOutput = False - cityscapes_eval.args.colorized = False - cityscapes_eval.args.gtInstancesFile = os.path.join(self._temp_dir, "gtInstances.json") - - # These lines are adopted from - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa - gt_dir = PathManager.get_local_path(self._metadata.gt_dir) - groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_instanceIds.png")) - assert len( - groundTruthImgList - ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( - cityscapes_eval.args.groundTruthSearch - ) - predictionImgList = [] - for gt in groundTruthImgList: - predictionImgList.append(cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) - results = cityscapes_eval.evaluateImgLists( - predictionImgList, groundTruthImgList, cityscapes_eval.args - )["averages"] - - ret = OrderedDict() - ret["segm"] = {"AP": results["allAp"] * 100, "AP50": results["allAp50%"] * 100} - self._working_dir.cleanup() - return ret - - -class CityscapesSemSegEvaluator(CityscapesEvaluator): - """ - Evaluate semantic segmentation results using cityscapes API. - - Note: - * It does not work in multi-machine distributed training. - * It contains a synchronization, therefore has to be used on all ranks. - * Only the main process runs evaluation. - """ - - def process(self, inputs, outputs): - from cityscapesscripts.helpers.labels import trainId2label - - for input, output in zip(inputs, outputs): - file_name = input["file_name"] - basename = os.path.splitext(os.path.basename(file_name))[0] - pred_filename = os.path.join(self._temp_dir, basename + "_pred.png") - - output = output["sem_seg"].argmax(dim=0).to(self._cpu_device).numpy() - pred = 255 * np.ones(output.shape, dtype=np.uint8) - for train_id, label in trainId2label.items(): - if label.ignoreInEval: - continue - pred[output == train_id] = label.id - Image.fromarray(pred).save(pred_filename) - - def evaluate(self): - comm.synchronize() - if comm.get_rank() > 0: - return - # Load the Cityscapes eval script *after* setting the required env var, - # since the script reads CITYSCAPES_DATASET into global variables at load time. - import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as cityscapes_eval - - self._logger.info("Evaluating results under {} ...".format(self._temp_dir)) - - # set some global states in cityscapes evaluation API, before evaluating - cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) - cityscapes_eval.args.predictionWalk = None - cityscapes_eval.args.JSONOutput = False - cityscapes_eval.args.colorized = False - - # These lines are adopted from - # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalPixelLevelSemanticLabeling.py # noqa - gt_dir = PathManager.get_local_path(self._metadata.gt_dir) - groundTruthImgList = glob.glob(os.path.join(gt_dir, "*", "*_gtFine_labelIds.png")) - assert len( - groundTruthImgList - ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( - cityscapes_eval.args.groundTruthSearch - ) - predictionImgList = [] - for gt in groundTruthImgList: - predictionImgList.append(cityscapes_eval.getPrediction(cityscapes_eval.args, gt)) - results = cityscapes_eval.evaluateImgLists( - predictionImgList, groundTruthImgList, cityscapes_eval.args - ) - ret = OrderedDict() - ret["sem_seg"] = { - "IoU": 100.0 * results["averageScoreClasses"], - "iIoU": 100.0 * results["averageScoreInstClasses"], - "IoU_sup": 100.0 * results["averageScoreCategories"], - "iIoU_sup": 100.0 * results["averageScoreInstCategories"], - } - self._working_dir.cleanup() - return ret diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py deleted file mode 100644 index 64b0903..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/coco_evaluation.py +++ /dev/null @@ -1,512 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import contextlib -import copy -import io -import itertools -import json -import logging -import numpy as np -import os -import pickle -from collections import OrderedDict -import pycocotools.mask as mask_util -import torch -from fvcore.common.file_io import PathManager -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from tabulate import tabulate - -import detectron2.utils.comm as comm -from detectron2.data import MetadataCatalog -from detectron2.data.datasets.coco import convert_to_coco_json -from detectron2.structures import Boxes, BoxMode, pairwise_iou -from detectron2.utils.logger import create_small_table - -from .evaluator import DatasetEvaluator - - -class COCOEvaluator(DatasetEvaluator): - """ - Evaluate object proposal, instance detection/segmentation, keypoint detection - outputs using COCO's metrics and APIs. - """ - - def __init__(self, dataset_name, cfg, distributed, output_dir=None): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - It must have either the following corresponding metadata: - - "json_file": the path to the COCO format annotation - - Or it must be in detectron2's standard dataset format - so it can be converted to COCO format automatically. - cfg (CfgNode): config instance - distributed (True): if True, will collect results from all ranks and run evaluation - in the main process. - Otherwise, will evaluate the results in the current process. - output_dir (str): optional, an output directory to dump all - results predicted on the dataset. The dump contains two files: - - 1. "instance_predictions.pth" a file in torch serialization - format that contains all the raw original predictions. - 2. "coco_instances_results.json" a json file in COCO's result - format. - """ - self._tasks = self._tasks_from_config(cfg) - self._distributed = distributed - self._output_dir = output_dir - - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - self._metadata = MetadataCatalog.get(dataset_name) - if not hasattr(self._metadata, "json_file"): - self._logger.warning( - f"json_file was not found in MetaDataCatalog for '{dataset_name}'." - " Trying to convert it to COCO format ..." - ) - - cache_path = os.path.join(output_dir, f"{dataset_name}_coco_format.json") - self._metadata.json_file = cache_path - convert_to_coco_json(dataset_name, cache_path) - - json_file = PathManager.get_local_path(self._metadata.json_file) - with contextlib.redirect_stdout(io.StringIO()): - self._coco_api = COCO(json_file) - - self._kpt_oks_sigmas = cfg.TEST.KEYPOINT_OKS_SIGMAS - # Test set json files do not contain annotations (evaluation must be - # performed using the COCO evaluation server). - self._do_evaluation = "annotations" in self._coco_api.split_name - - def reset(self): - self._predictions = [] - - def _tasks_from_config(self, cfg): - """ - Returns: - tuple[str]: tasks that can be evaluated under the given configuration. - """ - tasks = ("bbox",) - if cfg.MODEL.MASK_ON: - tasks = tasks + ("segm",) - if cfg.MODEL.KEYPOINT_ON: - tasks = tasks + ("keypoints",) - return tasks - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - # TODO this is ugly - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - self._predictions.append(prediction) - - def evaluate(self): - if self._distributed: - comm.synchronize() - predictions = comm.gather(self._predictions, dst=0) - predictions = list(itertools.chain(*predictions)) - - if not comm.is_main_process(): - return {} - else: - predictions = self._predictions - - if len(predictions) == 0: - self._logger.warning("[COCOEvaluator] Did not receive valid predictions.") - return {} - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "instances_predictions.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(predictions, f) - - self._results = OrderedDict() - if "proposals" in predictions[0]: - self._eval_box_proposals(predictions) - if "instances" in predictions[0]: - self._eval_predictions(set(self._tasks), predictions) - # Copy so the caller can do whatever with results - return copy.deepcopy(self._results) - - def _eval_predictions(self, tasks, predictions): - """ - Evaluate predictions on the given tasks. - Fill self._results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) - - # unmap the category ids for COCO - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - for result in coco_results: - category_id = result["category_id"] - assert ( - category_id in reverse_id_mapping - ), "A prediction has category_id={}, which is not available in the dataset.".format( - category_id - ) - result["category_id"] = reverse_id_mapping[category_id] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "coco_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(coco_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating predictions ...") - for task in sorted(tasks): - coco_eval = ( - _evaluate_predictions_on_coco( - self._coco_api, coco_results, task, kpt_oks_sigmas=self._kpt_oks_sigmas - ) - if len(coco_results) > 0 - else None # cocoapi does not handle empty results very well - ) - - res = self._derive_coco_results( - coco_eval, task, class_names=self._metadata.get("thing_classes") - ) - self._results[task] = res - - def _eval_box_proposals(self, predictions): - """ - Evaluate the box proposals in predictions. - Fill self._results with the metrics for "box_proposals" task. - """ - if self._output_dir: - # Saving generated box proposals to file. - # Predicted box_proposals are in XYXY_ABS mode. - bbox_mode = BoxMode.XYXY_ABS.value - ids, boxes, objectness_logits = [], [], [] - for prediction in predictions: - ids.append(prediction["image_id"]) - boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) - objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) - - proposal_data = { - "boxes": boxes, - "objectness_logits": objectness_logits, - "ids": ids, - "bbox_mode": bbox_mode, - } - with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: - pickle.dump(proposal_data, f) - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating bbox proposals ...") - res = {} - areas = {"all": "", "small": "s", "medium": "m", "large": "l"} - for limit in [100, 1000]: - for area, suffix in areas.items(): - stats = _evaluate_box_proposals(predictions, self._coco_api, area=area, limit=limit) - key = "AR{}@{:d}".format(suffix, limit) - res[key] = float(stats["ar"].item() * 100) - self._logger.info("Proposal metrics: \n" + create_small_table(res)) - self._results["box_proposals"] = res - - def _derive_coco_results(self, coco_eval, iou_type, class_names=None): - """ - Derive the desired score numbers from summarized COCOeval. - - Args: - coco_eval (None or COCOEval): None represents no predictions from model. - iou_type (str): - class_names (None or list[str]): if provided, will use it to predict - per-category AP. - - Returns: - a dict of {metric name: score} - """ - - metrics = { - "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl"], - "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl"], - "keypoints": ["AP", "AP50", "AP75", "APm", "APl"], - }[iou_type] - - if coco_eval is None: - self._logger.warn("No predictions from the model!") - return {metric: float("nan") for metric in metrics} - - # the standard metrics - results = { - metric: float(coco_eval.stats[idx] * 100 if coco_eval.stats[idx] >= 0 else "nan") - for idx, metric in enumerate(metrics) - } - self._logger.info( - "Evaluation results for {}: \n".format(iou_type) + create_small_table(results) - ) - if not np.isfinite(sum(results.values())): - self._logger.info("Note that some metrics cannot be computed.") - - if class_names is None or len(class_names) <= 1: - return results - # Compute per-category AP - # from https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L222-L252 # noqa - precisions = coco_eval.eval["precision"] - # precision has dims (iou, recall, cls, area range, max dets) - assert len(class_names) == precisions.shape[2] - - results_per_category = [] - for idx, name in enumerate(class_names): - # area range index 0: all area ranges - # max dets index -1: typically 100 per image - precision = precisions[:, :, idx, 0, -1] - precision = precision[precision > -1] - ap = np.mean(precision) if precision.size else float("nan") - results_per_category.append(("{}".format(name), float(ap * 100))) - - # tabulate it - N_COLS = min(6, len(results_per_category) * 2) - results_flatten = list(itertools.chain(*results_per_category)) - results_2d = itertools.zip_longest(*[results_flatten[i::N_COLS] for i in range(N_COLS)]) - table = tabulate( - results_2d, - tablefmt="pipe", - floatfmt=".3f", - headers=["category", "AP"] * (N_COLS // 2), - numalign="left", - ) - self._logger.info("Per-category {} AP: \n".format(iou_type) + table) - - results.update({"AP-" + name: ap for name, ap in results_per_category}) - return results - - -def instances_to_coco_json(instances, img_id): - """ - Dump an "Instances" object to a COCO-format json that's used for evaluation. - - Args: - instances (Instances): - img_id (int): the image id - - Returns: - list[dict]: list of json annotations in COCO format. - """ - num_instance = len(instances) - if num_instance == 0: - return [] - - boxes = instances.pred_boxes.tensor.numpy() - boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - boxes = boxes.tolist() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - - has_mask = instances.has("pred_masks") - if has_mask: - # use RLE to encode the masks, because they are too large and takes memory - # since this evaluator stores outputs of the entire dataset - rles = [ - mask_util.encode(np.array(mask[:, :, None], order="F", dtype="uint8"))[0] - for mask in instances.pred_masks - ] - for rle in rles: - # "counts" is an array encoded by mask_util as a byte-stream. Python3's - # json writer which always produces strings cannot serialize a bytestream - # unless you decode it. Thankfully, utf-8 works out (which is also what - # the pycocotools/_mask.pyx does). - rle["counts"] = rle["counts"].decode("utf-8") - - has_keypoints = instances.has("pred_keypoints") - if has_keypoints: - keypoints = instances.pred_keypoints - - results = [] - for k in range(num_instance): - result = { - "image_id": img_id, - "category_id": classes[k], - "bbox": boxes[k], - "score": scores[k], - } - if has_mask: - result["segmentation"] = rles[k] - if has_keypoints: - # In COCO annotations, - # keypoints coordinates are pixel indices. - # However our predictions are floating point coordinates. - # Therefore we subtract 0.5 to be consistent with the annotation format. - # This is the inverse of data loading logic in `data/coco.py`. - keypoints[k][:, :2] -= 0.5 - result["keypoints"] = keypoints[k].flatten().tolist() - results.append(result) - return results - - -# inspired from Detectron: -# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa -def _evaluate_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): - """ - Evaluate detection proposal recall metrics. This function is a much - faster alternative to the official COCO API recall evaluation code. However, - it produces slightly different results. - """ - # Record max overlap value for each gt box - # Return vector of overlap values - areas = { - "all": 0, - "small": 1, - "medium": 2, - "large": 3, - "96-128": 4, - "128-256": 5, - "256-512": 6, - "512-inf": 7, - } - area_ranges = [ - [0 ** 2, 1e5 ** 2], # all - [0 ** 2, 32 ** 2], # small - [32 ** 2, 96 ** 2], # medium - [96 ** 2, 1e5 ** 2], # large - [96 ** 2, 128 ** 2], # 96-128 - [128 ** 2, 256 ** 2], # 128-256 - [256 ** 2, 512 ** 2], # 256-512 - [512 ** 2, 1e5 ** 2], - ] # 512-inf - assert area in areas, "Unknown area range: {}".format(area) - area_range = area_ranges[areas[area]] - gt_overlaps = [] - num_pos = 0 - - for prediction_dict in dataset_predictions: - predictions = prediction_dict["proposals"] - - # sort predictions in descending order - # TODO maybe remove this and make it explicit in the documentation - inds = predictions.objectness_logits.sort(descending=True)[1] - predictions = predictions[inds] - - ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) - anno = coco_api.loadAnns(ann_ids) - gt_boxes = [ - BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - for obj in anno - if obj["iscrowd"] == 0 - ] - gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes - gt_boxes = Boxes(gt_boxes) - gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) - - if len(gt_boxes) == 0 or len(predictions) == 0: - continue - - valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) - gt_boxes = gt_boxes[valid_gt_inds] - - num_pos += len(gt_boxes) - - if len(gt_boxes) == 0: - continue - - if limit is not None and len(predictions) > limit: - predictions = predictions[:limit] - - overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) - - _gt_overlaps = torch.zeros(len(gt_boxes)) - for j in range(min(len(predictions), len(gt_boxes))): - # find which proposal box maximally covers each gt box - # and get the iou amount of coverage for each gt box - max_overlaps, argmax_overlaps = overlaps.max(dim=0) - - # find which gt box is 'best' covered (i.e. 'best' = most iou) - gt_ovr, gt_ind = max_overlaps.max(dim=0) - assert gt_ovr >= 0 - # find the proposal box that covers the best covered gt box - box_ind = argmax_overlaps[gt_ind] - # record the iou coverage of this gt box - _gt_overlaps[j] = overlaps[box_ind, gt_ind] - assert _gt_overlaps[j] == gt_ovr - # mark the proposal box and the gt box as used - overlaps[box_ind, :] = -1 - overlaps[:, gt_ind] = -1 - - # append recorded iou coverage level - gt_overlaps.append(_gt_overlaps) - gt_overlaps = ( - torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) - ) - gt_overlaps, _ = torch.sort(gt_overlaps) - - if thresholds is None: - step = 0.05 - thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) - recalls = torch.zeros_like(thresholds) - # compute recall for each iou threshold - for i, t in enumerate(thresholds): - recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) - # ar = 2 * np.trapz(recalls, thresholds) - ar = recalls.mean() - return { - "ar": ar, - "recalls": recalls, - "thresholds": thresholds, - "gt_overlaps": gt_overlaps, - "num_pos": num_pos, - } - - -def _evaluate_predictions_on_coco(coco_gt, coco_results, iou_type, kpt_oks_sigmas=None): - """ - Evaluate the coco results using COCOEval API. - """ - assert len(coco_results) > 0 - - if iou_type == "segm": - coco_results = copy.deepcopy(coco_results) - # When evaluating mask AP, if the results contain bbox, cocoapi will - # use the box area as the area of the instance, instead of the mask area. - # This leads to a different definition of small/medium/large. - # We remove the bbox field to let mask AP use mask area. - for c in coco_results: - c.pop("bbox", None) - - coco_dt = coco_gt.loadRes(coco_results) - coco_eval = COCOeval(coco_gt, coco_dt, iou_type) - # Use the COCO default keypoint OKS sigmas unless overrides are specified - if kpt_oks_sigmas: - coco_eval.params.kpt_oks_sigmas = np.array(kpt_oks_sigmas) - - if iou_type == "keypoints": - num_keypoints = len(coco_results[0]["keypoints"]) // 3 - assert len(coco_eval.params.kpt_oks_sigmas) == num_keypoints, ( - "[COCOEvaluator] The length of cfg.TEST.KEYPOINT_OKS_SIGMAS (default: 17) " - "must be equal to the number of keypoints. However the prediction has {} " - "keypoints! For more information please refer to " - "http://cocodataset.org/#keypoints-eval.".format(num_keypoints) - ) - - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - return coco_eval diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py deleted file mode 100644 index dcb9804..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/evaluator.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import datetime -import logging -import time -from collections import OrderedDict -from contextlib import contextmanager -import torch - -from detectron2.utils.comm import get_world_size, is_main_process -from detectron2.utils.logger import log_every_n_seconds - - -class DatasetEvaluator: - """ - Base class for a dataset evaluator. - - The function :func:`inference_on_dataset` runs the model over - all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. - - This class will accumulate information of the inputs/outputs (by :meth:`process`), - and produce evaluation results in the end (by :meth:`evaluate`). - """ - - def reset(self): - """ - Preparation for a new round of evaluation. - Should be called before starting a round of evaluation. - """ - pass - - def process(self, inputs, outputs): - """ - Process the pair of inputs and outputs. - If they contain batches, the pairs can be consumed one-by-one using `zip`: - - .. code-block:: python - - for input_, output in zip(inputs, outputs): - # do evaluation on single input/output pair - ... - - Args: - inputs (list): the inputs that's used to call the model. - outputs (list): the return value of `model(inputs)` - """ - pass - - def evaluate(self): - """ - Evaluate/summarize the performance, after processing all input/output pairs. - - Returns: - dict: - A new evaluator class can return a dict of arbitrary format - as long as the user can process the results. - In our train_net.py, we expect the following format: - - * key: the name of the task (e.g., bbox) - * value: a dict of {metric name: score}, e.g.: {"AP50": 80} - """ - pass - - -class DatasetEvaluators(DatasetEvaluator): - """ - Wrapper class to combine multiple :class:`DatasetEvaluator` instances. - - This class dispatches every evaluation call to - all of its :class:`DatasetEvaluator`. - """ - - def __init__(self, evaluators): - """ - Args: - evaluators (list): the evaluators to combine. - """ - super().__init__() - self._evaluators = evaluators - - def reset(self): - for evaluator in self._evaluators: - evaluator.reset() - - def process(self, inputs, outputs): - for evaluator in self._evaluators: - evaluator.process(inputs, outputs) - - def evaluate(self): - results = OrderedDict() - for evaluator in self._evaluators: - result = evaluator.evaluate() - if is_main_process() and result is not None: - for k, v in result.items(): - assert ( - k not in results - ), "Different evaluators produce results with the same key {}".format(k) - results[k] = v - return results - - -def inference_on_dataset(model, data_loader, evaluator): - """ - Run model on the data_loader and evaluate the metrics with evaluator. - Also benchmark the inference speed of `model.forward` accurately. - The model will be used in eval mode. - - Args: - model (nn.Module): a module which accepts an object from - `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. - - If you wish to evaluate a model in `training` mode instead, you can - wrap the given model and override its behavior of `.eval()` and `.train()`. - data_loader: an iterable object with a length. - The elements it generates will be the inputs to the model. - evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want - to benchmark, but don't want to do any evaluation. - - Returns: - The return value of `evaluator.evaluate()` - """ - num_devices = get_world_size() - logger = logging.getLogger(__name__) - logger.info("Start inference on {} images".format(len(data_loader))) - - total = len(data_loader) # inference data loader must have a fixed length - if evaluator is None: - # create a no-op evaluator - evaluator = DatasetEvaluators([]) - evaluator.reset() - - num_warmup = min(5, total - 1) - start_time = time.perf_counter() - total_compute_time = 0 - with inference_context(model), torch.no_grad(): - for idx, inputs in enumerate(data_loader): - if idx == num_warmup: - start_time = time.perf_counter() - total_compute_time = 0 - - start_compute_time = time.perf_counter() - outputs = model(inputs) - if torch.cuda.is_available(): - torch.cuda.synchronize() - total_compute_time += time.perf_counter() - start_compute_time - evaluator.process(inputs, outputs) - - iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) - seconds_per_img = total_compute_time / iters_after_start - if idx >= num_warmup * 2 or seconds_per_img > 5: - total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start - eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) - log_every_n_seconds( - logging.INFO, - "Inference done {}/{}. {:.4f} s / demo. ETA={}".format( - idx + 1, total, seconds_per_img, str(eta) - ), - n=5, - ) - - # Measure the time only for this worker (before the synchronization barrier) - total_time = time.perf_counter() - start_time - total_time_str = str(datetime.timedelta(seconds=total_time)) - # NOTE this format is parsed by grep - logger.info( - "Total inference time: {} ({:.6f} s / demo per device, on {} devices)".format( - total_time_str, total_time / (total - num_warmup), num_devices - ) - ) - total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) - logger.info( - "Total inference pure compute time: {} ({:.6f} s / demo per device, on {} devices)".format( - total_compute_time_str, total_compute_time / (total - num_warmup), num_devices - ) - ) - - results = evaluator.evaluate() - # An evaluator may return None when not in main process. - # Replace it by an empty dict instead to make it easier for downstream code to handle - if results is None: - results = {} - return results - - -@contextmanager -def inference_context(model): - """ - A context where the model is temporarily changed to eval mode, - and restored to previous mode afterwards. - - Args: - model: a torch Module - """ - training_mode = model.training - model.eval() - yield - model.train(training_mode) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py deleted file mode 100644 index e55f50f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/lvis_evaluation.py +++ /dev/null @@ -1,350 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import itertools -import json -import logging -import os -import pickle -from collections import OrderedDict -import torch -from fvcore.common.file_io import PathManager - -import detectron2.utils.comm as comm -from detectron2.data import MetadataCatalog -from detectron2.structures import Boxes, BoxMode, pairwise_iou -from detectron2.utils.logger import create_small_table - -from .coco_evaluation import instances_to_coco_json -from .evaluator import DatasetEvaluator - - -class LVISEvaluator(DatasetEvaluator): - """ - Evaluate object proposal and instance detection/segmentation outputs using - LVIS's metrics and evaluation API. - """ - - def __init__(self, dataset_name, cfg, distributed, output_dir=None): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - It must have the following corresponding metadata: - "json_file": the path to the LVIS format annotation - cfg (CfgNode): config instance - distributed (True): if True, will collect results from all ranks for evaluation. - Otherwise, will evaluate the results in the current process. - output_dir (str): optional, an output directory to dump results. - """ - from lvis import LVIS - - self._tasks = self._tasks_from_config(cfg) - self._distributed = distributed - self._output_dir = output_dir - - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - self._metadata = MetadataCatalog.get(dataset_name) - json_file = PathManager.get_local_path(self._metadata.json_file) - self._lvis_api = LVIS(json_file) - # Test set json files do not contain annotations (evaluation must be - # performed using the LVIS evaluation server). - self._do_evaluation = len(self._lvis_api.get_ann_ids()) > 0 - - def reset(self): - self._predictions = [] - - def _tasks_from_config(self, cfg): - """ - Returns: - tuple[str]: tasks that can be evaluated under the given configuration. - """ - tasks = ("bbox",) - if cfg.MODEL.MASK_ON: - tasks = tasks + ("segm",) - return tasks - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a LVIS model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a LVIS model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - prediction["instances"] = instances_to_coco_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - self._predictions.append(prediction) - - def evaluate(self): - if self._distributed: - comm.synchronize() - predictions = comm.gather(self._predictions, dst=0) - predictions = list(itertools.chain(*predictions)) - - if not comm.is_main_process(): - return - else: - predictions = self._predictions - - if len(predictions) == 0: - self._logger.warning("[LVISEvaluator] Did not receive valid predictions.") - return {} - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "instances_predictions.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(predictions, f) - - self._results = OrderedDict() - if "proposals" in predictions[0]: - self._eval_box_proposals(predictions) - if "instances" in predictions[0]: - self._eval_predictions(set(self._tasks), predictions) - # Copy so the caller can do whatever with results - return copy.deepcopy(self._results) - - def _eval_predictions(self, tasks, predictions): - """ - Evaluate predictions on the given tasks. - Fill self._results with the metrics of the tasks. - - Args: - predictions (list[dict]): list of outputs from the model - """ - self._logger.info("Preparing results in the LVIS format ...") - lvis_results = list(itertools.chain(*[x["instances"] for x in predictions])) - - # LVIS evaluator can be used to evaluate results for COCO dataset categories. - # In this case `_metadata` variable will have a field with COCO-specific category mapping. - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - for result in lvis_results: - result["category_id"] = reverse_id_mapping[result["category_id"]] - else: - # unmap the category ids for LVIS (from 0-indexed to 1-indexed) - for result in lvis_results: - result["category_id"] += 1 - - if self._output_dir: - file_path = os.path.join(self._output_dir, "lvis_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(lvis_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating predictions ...") - for task in sorted(tasks): - res = _evaluate_predictions_on_lvis( - self._lvis_api, lvis_results, task, class_names=self._metadata.get("thing_classes") - ) - self._results[task] = res - - def _eval_box_proposals(self, predictions): - """ - Evaluate the box proposals in predictions. - Fill self._results with the metrics for "box_proposals" task. - """ - if self._output_dir: - # Saving generated box proposals to file. - # Predicted box_proposals are in XYXY_ABS mode. - bbox_mode = BoxMode.XYXY_ABS.value - ids, boxes, objectness_logits = [], [], [] - for prediction in predictions: - ids.append(prediction["image_id"]) - boxes.append(prediction["proposals"].proposal_boxes.tensor.numpy()) - objectness_logits.append(prediction["proposals"].objectness_logits.numpy()) - - proposal_data = { - "boxes": boxes, - "objectness_logits": objectness_logits, - "ids": ids, - "bbox_mode": bbox_mode, - } - with PathManager.open(os.path.join(self._output_dir, "box_proposals.pkl"), "wb") as f: - pickle.dump(proposal_data, f) - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating bbox proposals ...") - res = {} - areas = {"all": "", "small": "s", "medium": "m", "large": "l"} - for limit in [100, 1000]: - for area, suffix in areas.items(): - stats = _evaluate_box_proposals(predictions, self._lvis_api, area=area, limit=limit) - key = "AR{}@{:d}".format(suffix, limit) - res[key] = float(stats["ar"].item() * 100) - self._logger.info("Proposal metrics: \n" + create_small_table(res)) - self._results["box_proposals"] = res - - -# inspired from Detectron: -# https://github.com/facebookresearch/Detectron/blob/a6a835f5b8208c45d0dce217ce9bbda915f44df7/detectron/datasets/json_dataset_evaluator.py#L255 # noqa -def _evaluate_box_proposals(dataset_predictions, lvis_api, thresholds=None, area="all", limit=None): - """ - Evaluate detection proposal recall metrics. This function is a much - faster alternative to the official LVIS API recall evaluation code. However, - it produces slightly different results. - """ - # Record max overlap value for each gt box - # Return vector of overlap values - areas = { - "all": 0, - "small": 1, - "medium": 2, - "large": 3, - "96-128": 4, - "128-256": 5, - "256-512": 6, - "512-inf": 7, - } - area_ranges = [ - [0 ** 2, 1e5 ** 2], # all - [0 ** 2, 32 ** 2], # small - [32 ** 2, 96 ** 2], # medium - [96 ** 2, 1e5 ** 2], # large - [96 ** 2, 128 ** 2], # 96-128 - [128 ** 2, 256 ** 2], # 128-256 - [256 ** 2, 512 ** 2], # 256-512 - [512 ** 2, 1e5 ** 2], - ] # 512-inf - assert area in areas, "Unknown area range: {}".format(area) - area_range = area_ranges[areas[area]] - gt_overlaps = [] - num_pos = 0 - - for prediction_dict in dataset_predictions: - predictions = prediction_dict["proposals"] - - # sort predictions in descending order - # TODO maybe remove this and make it explicit in the documentation - inds = predictions.objectness_logits.sort(descending=True)[1] - predictions = predictions[inds] - - ann_ids = lvis_api.get_ann_ids(img_ids=[prediction_dict["image_id"]]) - anno = lvis_api.load_anns(ann_ids) - gt_boxes = [ - BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in anno - ] - gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes - gt_boxes = Boxes(gt_boxes) - gt_areas = torch.as_tensor([obj["area"] for obj in anno]) - - if len(gt_boxes) == 0 or len(predictions) == 0: - continue - - valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) - gt_boxes = gt_boxes[valid_gt_inds] - - num_pos += len(gt_boxes) - - if len(gt_boxes) == 0: - continue - - if limit is not None and len(predictions) > limit: - predictions = predictions[:limit] - - overlaps = pairwise_iou(predictions.proposal_boxes, gt_boxes) - - _gt_overlaps = torch.zeros(len(gt_boxes)) - for j in range(min(len(predictions), len(gt_boxes))): - # find which proposal box maximally covers each gt box - # and get the iou amount of coverage for each gt box - max_overlaps, argmax_overlaps = overlaps.max(dim=0) - - # find which gt box is 'best' covered (i.e. 'best' = most iou) - gt_ovr, gt_ind = max_overlaps.max(dim=0) - assert gt_ovr >= 0 - # find the proposal box that covers the best covered gt box - box_ind = argmax_overlaps[gt_ind] - # record the iou coverage of this gt box - _gt_overlaps[j] = overlaps[box_ind, gt_ind] - assert _gt_overlaps[j] == gt_ovr - # mark the proposal box and the gt box as used - overlaps[box_ind, :] = -1 - overlaps[:, gt_ind] = -1 - - # append recorded iou coverage level - gt_overlaps.append(_gt_overlaps) - gt_overlaps = ( - torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32) - ) - gt_overlaps, _ = torch.sort(gt_overlaps) - - if thresholds is None: - step = 0.05 - thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) - recalls = torch.zeros_like(thresholds) - # compute recall for each iou threshold - for i, t in enumerate(thresholds): - recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) - # ar = 2 * np.trapz(recalls, thresholds) - ar = recalls.mean() - return { - "ar": ar, - "recalls": recalls, - "thresholds": thresholds, - "gt_overlaps": gt_overlaps, - "num_pos": num_pos, - } - - -def _evaluate_predictions_on_lvis(lvis_gt, lvis_results, iou_type, class_names=None): - """ - Args: - iou_type (str): - kpt_oks_sigmas (list[float]): - class_names (None or list[str]): if provided, will use it to predict - per-category AP. - - Returns: - a dict of {metric name: score} - """ - metrics = { - "bbox": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], - "segm": ["AP", "AP50", "AP75", "APs", "APm", "APl", "APr", "APc", "APf"], - }[iou_type] - - logger = logging.getLogger(__name__) - - if len(lvis_results) == 0: # TODO: check if needed - logger.warn("No predictions from the model!") - return {metric: float("nan") for metric in metrics} - - if iou_type == "segm": - lvis_results = copy.deepcopy(lvis_results) - # When evaluating mask AP, if the results contain bbox, LVIS API will - # use the box area as the area of the instance, instead of the mask area. - # This leads to a different definition of small/medium/large. - # We remove the bbox field to let mask AP use mask area. - for c in lvis_results: - c.pop("bbox", None) - - from lvis import LVISEval, LVISResults - - lvis_results = LVISResults(lvis_gt, lvis_results) - lvis_eval = LVISEval(lvis_gt, lvis_results, iou_type) - lvis_eval.run() - lvis_eval.print_results() - - # Pull the standard metrics from the LVIS results - results = lvis_eval.get_results() - results = {metric: float(results[metric] * 100) for metric in metrics} - logger.info("Evaluation results for {}: \n".format(iou_type) + create_small_table(results)) - return results diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py deleted file mode 100644 index fb5e7ab..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/panoptic_evaluation.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import contextlib -import io -import itertools -import json -import logging -import os -import tempfile -from collections import OrderedDict -from fvcore.common.file_io import PathManager -from PIL import Image -from tabulate import tabulate - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm - -from .evaluator import DatasetEvaluator - -logger = logging.getLogger(__name__) - - -class COCOPanopticEvaluator(DatasetEvaluator): - """ - Evaluate Panoptic Quality metrics on COCO using PanopticAPI. - It saves panoptic segmentation prediction in `output_dir` - - It contains a synchronize call and has to be called from all workers. - """ - - def __init__(self, dataset_name, output_dir): - """ - Args: - dataset_name (str): name of the dataset - output_dir (str): output directory to save results for evaluation - """ - self._metadata = MetadataCatalog.get(dataset_name) - self._thing_contiguous_id_to_dataset_id = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - self._stuff_contiguous_id_to_dataset_id = { - v: k for k, v in self._metadata.stuff_dataset_id_to_contiguous_id.items() - } - - self._predictions_json = os.path.join(output_dir, "predictions.json") - - def reset(self): - self._predictions = [] - - def _convert_category_id(self, segment_info): - isthing = segment_info.pop("isthing", None) - if isthing is None: - # the model produces panoptic category id directly. No more conversion needed - return segment_info - if isthing is True: - segment_info["category_id"] = self._thing_contiguous_id_to_dataset_id[ - segment_info["category_id"] - ] - else: - segment_info["category_id"] = self._stuff_contiguous_id_to_dataset_id[ - segment_info["category_id"] - ] - return segment_info - - def process(self, inputs, outputs): - from panopticapi.utils import id2rgb - - for input, output in zip(inputs, outputs): - panoptic_img, segments_info = output["panoptic_seg"] - panoptic_img = panoptic_img.cpu().numpy() - - file_name = os.path.basename(input["file_name"]) - file_name_png = os.path.splitext(file_name)[0] + ".png" - with io.BytesIO() as out: - Image.fromarray(id2rgb(panoptic_img)).save(out, format="PNG") - segments_info = [self._convert_category_id(x) for x in segments_info] - self._predictions.append( - { - "image_id": input["image_id"], - "file_name": file_name_png, - "png_string": out.getvalue(), - "segments_info": segments_info, - } - ) - - def evaluate(self): - comm.synchronize() - - self._predictions = comm.gather(self._predictions) - self._predictions = list(itertools.chain(*self._predictions)) - if not comm.is_main_process(): - return - - # PanopticApi requires local files - gt_json = PathManager.get_local_path(self._metadata.panoptic_json) - gt_folder = PathManager.get_local_path(self._metadata.panoptic_root) - - with tempfile.TemporaryDirectory(prefix="panoptic_eval") as pred_dir: - logger.info("Writing all panoptic predictions to {} ...".format(pred_dir)) - for p in self._predictions: - with open(os.path.join(pred_dir, p["file_name"]), "wb") as f: - f.write(p.pop("png_string")) - - with open(gt_json, "r") as f: - json_data = json.load(f) - json_data["annotations"] = self._predictions - with PathManager.open(self._predictions_json, "w") as f: - f.write(json.dumps(json_data)) - - from panopticapi.evaluation import pq_compute - - with contextlib.redirect_stdout(io.StringIO()): - pq_res = pq_compute( - gt_json, - PathManager.get_local_path(self._predictions_json), - gt_folder=gt_folder, - pred_folder=pred_dir, - ) - - res = {} - res["PQ"] = 100 * pq_res["All"]["pq"] - res["SQ"] = 100 * pq_res["All"]["sq"] - res["RQ"] = 100 * pq_res["All"]["rq"] - res["PQ_th"] = 100 * pq_res["Things"]["pq"] - res["SQ_th"] = 100 * pq_res["Things"]["sq"] - res["RQ_th"] = 100 * pq_res["Things"]["rq"] - res["PQ_st"] = 100 * pq_res["Stuff"]["pq"] - res["SQ_st"] = 100 * pq_res["Stuff"]["sq"] - res["RQ_st"] = 100 * pq_res["Stuff"]["rq"] - - results = OrderedDict({"panoptic_seg": res}) - _print_panoptic_results(pq_res) - - return results - - -def _print_panoptic_results(pq_res): - headers = ["", "PQ", "SQ", "RQ", "#categories"] - data = [] - for name in ["All", "Things", "Stuff"]: - row = [name] + [pq_res[name][k] * 100 for k in ["pq", "sq", "rq"]] + [pq_res[name]["n"]] - data.append(row) - table = tabulate( - data, headers=headers, tablefmt="pipe", floatfmt=".3f", stralign="center", numalign="center" - ) - logger.info("Panoptic Evaluation Results:\n" + table) - - -if __name__ == "__main__": - from detectron2.utils.logger import setup_logger - - logger = setup_logger() - import argparse - - parser = argparse.ArgumentParser() - parser.add_argument("--gt-json") - parser.add_argument("--gt-dir") - parser.add_argument("--pred-json") - parser.add_argument("--pred-dir") - args = parser.parse_args() - - from panopticapi.evaluation import pq_compute - - with contextlib.redirect_stdout(io.StringIO()): - pq_res = pq_compute( - args.gt_json, args.pred_json, gt_folder=args.gt_dir, pred_folder=args.pred_dir - ) - _print_panoptic_results(pq_res) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py deleted file mode 100644 index 22d2e52..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/pascal_voc_evaluation.py +++ /dev/null @@ -1,294 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import logging -import numpy as np -import os -import tempfile -import xml.etree.ElementTree as ET -from collections import OrderedDict, defaultdict -from functools import lru_cache -import torch -from fvcore.common.file_io import PathManager - -from detectron2.data import MetadataCatalog -from detectron2.utils import comm - -from .evaluator import DatasetEvaluator - - -class PascalVOCDetectionEvaluator(DatasetEvaluator): - """ - Evaluate Pascal VOC AP. - It contains a synchronization, therefore has to be called from all ranks. - - Note that this is a rewrite of the official Matlab API. - The results should be similar, but not identical to the one produced by - the official API. - """ - - def __init__(self, dataset_name): - """ - Args: - dataset_name (str): name of the dataset, e.g., "voc_2007_test" - """ - self._dataset_name = dataset_name - meta = MetadataCatalog.get(dataset_name) - self._anno_file_template = os.path.join(meta.dirname, "Annotations", "{}.xml") - self._image_set_path = os.path.join(meta.dirname, "ImageSets", "Main", meta.split + ".txt") - self._class_names = meta.thing_classes - assert meta.year in [2007, 2012], meta.year - self._is_2007 = meta.year == 2007 - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - def reset(self): - self._predictions = defaultdict(list) # class name -> list of prediction strings - - def process(self, inputs, outputs): - for input, output in zip(inputs, outputs): - image_id = input["image_id"] - instances = output["instances"].to(self._cpu_device) - boxes = instances.pred_boxes.tensor.numpy() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - for box, score, cls in zip(boxes, scores, classes): - xmin, ymin, xmax, ymax = box - # The inverse of data loading logic in `data/pascal_voc.py` - xmin += 1 - ymin += 1 - self._predictions[cls].append( - f"{image_id} {score:.3f} {xmin:.1f} {ymin:.1f} {xmax:.1f} {ymax:.1f}" - ) - - def evaluate(self): - """ - Returns: - dict: has a key "segm", whose value is a dict of "AP", "AP50", and "AP75". - """ - all_predictions = comm.gather(self._predictions, dst=0) - if not comm.is_main_process(): - return - predictions = defaultdict(list) - for predictions_per_rank in all_predictions: - for clsid, lines in predictions_per_rank.items(): - predictions[clsid].extend(lines) - del all_predictions - - self._logger.info( - "Evaluating {} using {} metric. " - "Note that results do not use the official Matlab API.".format( - self._dataset_name, 2007 if self._is_2007 else 2012 - ) - ) - - with tempfile.TemporaryDirectory(prefix="pascal_voc_eval_") as dirname: - res_file_template = os.path.join(dirname, "{}.txt") - - aps = defaultdict(list) # iou -> ap per class - for cls_id, cls_name in enumerate(self._class_names): - lines = predictions.get(cls_id, [""]) - - with open(res_file_template.format(cls_name), "w") as f: - f.write("\n".join(lines)) - - for thresh in range(50, 100, 5): - rec, prec, ap = voc_eval( - res_file_template, - self._anno_file_template, - self._image_set_path, - cls_name, - ovthresh=thresh / 100.0, - use_07_metric=self._is_2007, - ) - aps[thresh].append(ap * 100) - - ret = OrderedDict() - mAP = {iou: np.mean(x) for iou, x in aps.items()} - ret["bbox"] = {"AP": np.mean(list(mAP.values())), "AP50": mAP[50], "AP75": mAP[75]} - return ret - - -############################################################################## -# -# Below code is modified from -# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/voc_eval.py -# -------------------------------------------------------- -# Fast/er R-CNN -# Licensed under The MIT License [see LICENSE for details] -# Written by Bharath Hariharan -# -------------------------------------------------------- - -"""Python implementation of the PASCAL VOC devkit's AP evaluation code.""" - - -@lru_cache(maxsize=None) -def parse_rec(filename): - """Parse a PASCAL VOC xml file.""" - with PathManager.open(filename) as f: - tree = ET.parse(f) - objects = [] - for obj in tree.findall("object"): - obj_struct = {} - obj_struct["name"] = obj.find("name").text - obj_struct["pose"] = obj.find("pose").text - obj_struct["truncated"] = int(obj.find("truncated").text) - obj_struct["difficult"] = int(obj.find("difficult").text) - bbox = obj.find("bndbox") - obj_struct["bbox"] = [ - int(bbox.find("xmin").text), - int(bbox.find("ymin").text), - int(bbox.find("xmax").text), - int(bbox.find("ymax").text), - ] - objects.append(obj_struct) - - return objects - - -def voc_ap(rec, prec, use_07_metric=False): - """Compute VOC AP given precision and recall. If use_07_metric is true, uses - the VOC 07 11-point method (default:False). - """ - if use_07_metric: - # 11 point metric - ap = 0.0 - for t in np.arange(0.0, 1.1, 0.1): - if np.sum(rec >= t) == 0: - p = 0 - else: - p = np.max(prec[rec >= t]) - ap = ap + p / 11.0 - else: - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.0], rec, [1.0])) - mpre = np.concatenate(([0.0], prec, [0.0])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def voc_eval(detpath, annopath, imagesetfile, classname, ovthresh=0.5, use_07_metric=False): - """rec, prec, ap = voc_eval(detpath, - annopath, - imagesetfile, - classname, - [ovthresh], - [use_07_metric]) - - Top level function that does the PASCAL VOC evaluation. - - detpath: Path to detections - detpath.format(classname) should produce the detection results file. - annopath: Path to annotations - annopath.format(imagename) should be the xml annotations file. - imagesetfile: Text file containing the list of images, one image per line. - classname: Category name (duh) - [ovthresh]: Overlap threshold (default = 0.5) - [use_07_metric]: Whether to use VOC07's 11 point AP computation - (default False) - """ - # assumes detections are in detpath.format(classname) - # assumes annotations are in annopath.format(imagename) - # assumes imagesetfile is a text file with each line an image name - - # first load gt - # read list of images - with PathManager.open(imagesetfile, "r") as f: - lines = f.readlines() - imagenames = [x.strip() for x in lines] - - # load annots - recs = {} - for imagename in imagenames: - recs[imagename] = parse_rec(annopath.format(imagename)) - - # extract gt objects for this class - class_recs = {} - npos = 0 - for imagename in imagenames: - R = [obj for obj in recs[imagename] if obj["name"] == classname] - bbox = np.array([x["bbox"] for x in R]) - difficult = np.array([x["difficult"] for x in R]).astype(np.bool) - # difficult = np.array([False for x in R]).astype(np.bool) # treat all "difficult" as GT - det = [False] * len(R) - npos = npos + sum(~difficult) - class_recs[imagename] = {"bbox": bbox, "difficult": difficult, "det": det} - - # read dets - detfile = detpath.format(classname) - with open(detfile, "r") as f: - lines = f.readlines() - - splitlines = [x.strip().split(" ") for x in lines] - image_ids = [x[0] for x in splitlines] - confidence = np.array([float(x[1]) for x in splitlines]) - BB = np.array([[float(z) for z in x[2:]] for x in splitlines]).reshape(-1, 4) - - # sort by confidence - sorted_ind = np.argsort(-confidence) - BB = BB[sorted_ind, :] - image_ids = [image_ids[x] for x in sorted_ind] - - # go down dets and mark TPs and FPs - nd = len(image_ids) - tp = np.zeros(nd) - fp = np.zeros(nd) - for d in range(nd): - R = class_recs[image_ids[d]] - bb = BB[d, :].astype(float) - ovmax = -np.inf - BBGT = R["bbox"].astype(float) - - if BBGT.size > 0: - # compute overlaps - # intersection - ixmin = np.maximum(BBGT[:, 0], bb[0]) - iymin = np.maximum(BBGT[:, 1], bb[1]) - ixmax = np.minimum(BBGT[:, 2], bb[2]) - iymax = np.minimum(BBGT[:, 3], bb[3]) - iw = np.maximum(ixmax - ixmin + 1.0, 0.0) - ih = np.maximum(iymax - iymin + 1.0, 0.0) - inters = iw * ih - - # union - uni = ( - (bb[2] - bb[0] + 1.0) * (bb[3] - bb[1] + 1.0) - + (BBGT[:, 2] - BBGT[:, 0] + 1.0) * (BBGT[:, 3] - BBGT[:, 1] + 1.0) - - inters - ) - - overlaps = inters / uni - ovmax = np.max(overlaps) - jmax = np.argmax(overlaps) - - if ovmax > ovthresh: - if not R["difficult"][jmax]: - if not R["det"][jmax]: - tp[d] = 1.0 - R["det"][jmax] = 1 - else: - fp[d] = 1.0 - else: - fp[d] = 1.0 - - # compute precision recall - fp = np.cumsum(fp) - tp = np.cumsum(tp) - rec = tp / float(npos) - # avoid divide by zero in case the first detection matches a difficult - # ground truth - prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) - ap = voc_ap(rec, prec, use_07_metric) - - return rec, prec, ap diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py deleted file mode 100644 index 30746e1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/rotated_coco_evaluation.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -import json -import numpy as np -import os -import torch -from fvcore.common.file_io import PathManager -from pycocotools.cocoeval import COCOeval, maskUtils - -from detectron2.structures import BoxMode, RotatedBoxes, pairwise_iou_rotated - -from .coco_evaluation import COCOEvaluator - - -class RotatedCOCOeval(COCOeval): - @staticmethod - def is_rotated(box_list): - if type(box_list) == np.ndarray: - return box_list.shape[1] == 5 - elif type(box_list) == list: - if box_list == []: # cannot decide the box_dim - return False - return np.all( - np.array( - [ - (len(obj) == 5) and ((type(obj) == list) or (type(obj) == np.ndarray)) - for obj in box_list - ] - ) - ) - return False - - @staticmethod - def boxlist_to_tensor(boxlist, output_box_dim): - if type(boxlist) == np.ndarray: - box_tensor = torch.from_numpy(boxlist) - elif type(boxlist) == list: - if boxlist == []: - return torch.zeros((0, output_box_dim), dtype=torch.float32) - else: - box_tensor = torch.FloatTensor(boxlist) - else: - raise Exception("Unrecognized boxlist type") - - input_box_dim = box_tensor.shape[1] - if input_box_dim != output_box_dim: - if input_box_dim == 4 and output_box_dim == 5: - box_tensor = BoxMode.convert(box_tensor, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) - else: - raise Exception( - "Unable to convert from {}-dim box to {}-dim box".format( - input_box_dim, output_box_dim - ) - ) - return box_tensor - - def compute_iou_dt_gt(self, dt, gt, is_crowd): - if self.is_rotated(dt) or self.is_rotated(gt): - # TODO: take is_crowd into consideration - assert all(c == 0 for c in is_crowd) - dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) - gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) - return pairwise_iou_rotated(dt, gt) - else: - # This is the same as the classical COCO evaluation - return maskUtils.iou(dt, gt, is_crowd) - - def computeIoU(self, imgId, catId): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return [] - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - assert p.iouType == "bbox", "unsupported iouType for iou computation" - - g = [g["bbox"] for g in gt] - d = [d["bbox"] for d in dt] - - # compute iou between each dt and gt region - iscrowd = [int(o["iscrowd"]) for o in gt] - - # Note: this function is copied from cocoeval.py in cocoapi - # and the major difference is here. - ious = self.compute_iou_dt_gt(d, g, iscrowd) - return ious - - -class RotatedCOCOEvaluator(COCOEvaluator): - """ - Evaluate object proposal/instance detection outputs using COCO-like metrics and APIs, - with rotated boxes support. - Note: this uses IOU only and does not consider angle differences. - """ - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - """ - for input, output in zip(inputs, outputs): - prediction = {"image_id": input["image_id"]} - - if "instances" in output: - instances = output["instances"].to(self._cpu_device) - - prediction["instances"] = self.instances_to_json(instances, input["image_id"]) - if "proposals" in output: - prediction["proposals"] = output["proposals"].to(self._cpu_device) - self._predictions.append(prediction) - - def instances_to_json(self, instances, img_id): - num_instance = len(instances) - if num_instance == 0: - return [] - - boxes = instances.pred_boxes.tensor.numpy() - if boxes.shape[1] == 4: - boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - boxes = boxes.tolist() - scores = instances.scores.tolist() - classes = instances.pred_classes.tolist() - - results = [] - for k in range(num_instance): - result = { - "image_id": img_id, - "category_id": classes[k], - "bbox": boxes[k], - "score": scores[k], - } - - results.append(result) - return results - - def _eval_predictions(self, tasks, predictions): - """ - Evaluate predictions on the given tasks. - Fill self._results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) - - # unmap the category ids for COCO - if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): - reverse_id_mapping = { - v: k for k, v in self._metadata.thing_dataset_id_to_contiguous_id.items() - } - for result in coco_results: - result["category_id"] = reverse_id_mapping[result["category_id"]] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "coco_instances_results.json") - self._logger.info("Saving results to {}".format(file_path)) - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(coco_results)) - f.flush() - - if not self._do_evaluation: - self._logger.info("Annotations are not available for evaluation.") - return - - self._logger.info("Evaluating predictions ...") - for task in sorted(tasks): - assert task == "bbox", "Task {} is not supported".format(task) - coco_eval = ( - self._evaluate_predictions_on_coco(self._coco_api, coco_results) - if len(coco_results) > 0 - else None # cocoapi does not handle empty results very well - ) - - res = self._derive_coco_results( - coco_eval, task, class_names=self._metadata.get("thing_classes") - ) - self._results[task] = res - - def _evaluate_predictions_on_coco(self, coco_gt, coco_results): - """ - Evaluate the coco results using COCOEval API. - """ - assert len(coco_results) > 0 - - coco_dt = coco_gt.loadRes(coco_results) - - # Only bbox is supported for now - coco_eval = RotatedCOCOeval(coco_gt, coco_dt, iouType="bbox") - - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - - return coco_eval diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py deleted file mode 100644 index fb3b28d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/sem_seg_evaluation.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -import json -import logging -import numpy as np -import os -from collections import OrderedDict -import PIL.Image as Image -import pycocotools.mask as mask_util -import torch -from fvcore.common.file_io import PathManager - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.utils.comm import all_gather, is_main_process, synchronize - -from .evaluator import DatasetEvaluator - - -class SemSegEvaluator(DatasetEvaluator): - """ - Evaluate semantic segmentation - """ - - def __init__(self, dataset_name, distributed, num_classes, ignore_label=255, output_dir=None): - """ - Args: - dataset_name (str): name of the dataset to be evaluated. - distributed (True): if True, will collect results from all ranks for evaluation. - Otherwise, will evaluate the results in the current process. - num_classes (int): number of classes - ignore_label (int): value in semantic segmentation ground truth. Predictions for the - corresponding pixels should be ignored. - output_dir (str): an output directory to dump results. - """ - self._dataset_name = dataset_name - self._distributed = distributed - self._output_dir = output_dir - self._num_classes = num_classes - self._ignore_label = ignore_label - self._N = num_classes + 1 - - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - self.input_file_to_gt_file = { - dataset_record["file_name"]: dataset_record["sem_seg_file_name"] - for dataset_record in DatasetCatalog.get(dataset_name) - } - - meta = MetadataCatalog.get(dataset_name) - # Dict that maps contiguous training ids to COCO category ids - try: - c2d = meta.stuff_dataset_id_to_contiguous_id - self._contiguous_id_to_dataset_id = {v: k for k, v in c2d.items()} - except AttributeError: - self._contiguous_id_to_dataset_id = None - self._class_names = meta.stuff_classes - - def reset(self): - self._conf_matrix = np.zeros((self._N, self._N), dtype=np.int64) - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a model. - It is a list of dicts. Each dict corresponds to an image and - contains keys like "height", "width", "file_name". - outputs: the outputs of a model. It is either list of semantic segmentation predictions - (Tensor [H, W]) or list of dicts with key "sem_seg" that contains semantic - segmentation prediction in the same format. - """ - for input, output in zip(inputs, outputs): - output = output["sem_seg"].argmax(dim=0).to(self._cpu_device) - pred = np.array(output, dtype=np.int) - with PathManager.open(self.input_file_to_gt_file[input["file_name"]], "rb") as f: - gt = np.array(Image.open(f), dtype=np.int) - - gt[gt == self._ignore_label] = self._num_classes - - self._conf_matrix += np.bincount( - self._N * pred.reshape(-1) + gt.reshape(-1), minlength=self._N ** 2 - ).reshape(self._N, self._N) - - self._predictions.extend(self.encode_json_sem_seg(pred, input["file_name"])) - - def evaluate(self): - """ - Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): - - * Mean intersection-over-union averaged across classes (mIoU) - * Frequency Weighted IoU (fwIoU) - * Mean pixel accuracy averaged across classes (mACC) - * Pixel Accuracy (pACC) - """ - if self._distributed: - synchronize() - conf_matrix_list = all_gather(self._conf_matrix) - self._predictions = all_gather(self._predictions) - self._predictions = list(itertools.chain(*self._predictions)) - if not is_main_process(): - return - - self._conf_matrix = np.zeros_like(self._conf_matrix) - for conf_matrix in conf_matrix_list: - self._conf_matrix += conf_matrix - - if self._output_dir: - PathManager.mkdirs(self._output_dir) - file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") - with PathManager.open(file_path, "w") as f: - f.write(json.dumps(self._predictions)) - - acc = np.full(self._num_classes, np.nan, dtype=np.float) - iou = np.full(self._num_classes, np.nan, dtype=np.float) - tp = self._conf_matrix.diagonal()[:-1].astype(np.float) - pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) - class_weights = pos_gt / np.sum(pos_gt) - pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) - acc_valid = pos_gt > 0 - acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] - iou_valid = (pos_gt + pos_pred) > 0 - union = pos_gt + pos_pred - tp - iou[acc_valid] = tp[acc_valid] / union[acc_valid] - macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) - miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) - fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) - pacc = np.sum(tp) / np.sum(pos_gt) - - res = {} - res["mIoU"] = 100 * miou - res["fwIoU"] = 100 * fiou - for i, name in enumerate(self._class_names): - res["IoU-{}".format(name)] = 100 * iou[i] - res["mACC"] = 100 * macc - res["pACC"] = 100 * pacc - for i, name in enumerate(self._class_names): - res["ACC-{}".format(name)] = 100 * acc[i] - - if self._output_dir: - file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") - with PathManager.open(file_path, "wb") as f: - torch.save(res, f) - results = OrderedDict({"sem_seg": res}) - self._logger.info(results) - return results - - def encode_json_sem_seg(self, sem_seg, input_file_name): - """ - Convert semantic segmentation to COCO stuff format with segments encoded as RLEs. - See http://cocodataset.org/#format-results - """ - json_list = [] - for label in np.unique(sem_seg): - if self._contiguous_id_to_dataset_id is not None: - assert ( - label in self._contiguous_id_to_dataset_id - ), "Label {} is not in the metadata info for {}".format(label, self._dataset_name) - dataset_id = self._contiguous_id_to_dataset_id[label] - else: - dataset_id = int(label) - mask = (sem_seg == label).astype(np.uint8) - mask_rle = mask_util.encode(np.array(mask[:, :, None], order="F"))[0] - mask_rle["counts"] = mask_rle["counts"].decode("utf-8") - json_list.append( - {"file_name": input_file_name, "category_id": dataset_id, "segmentation": mask_rle} - ) - return json_list diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py deleted file mode 100644 index 95addeb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/evaluation/testing.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import numpy as np -import pprint -import sys -from collections import OrderedDict -from collections.abc import Mapping - - -def print_csv_format(results): - """ - Print main metrics in a format similar to Detectron, - so that they are easy to copypaste into a spreadsheet. - - Args: - results (OrderedDict[dict]): task_name -> {metric -> score} - """ - assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed - logger = logging.getLogger(__name__) - for task, res in results.items(): - # Don't print "AP-category" metrics since they are usually not tracked. - important_res = [(k, v) for k, v in res.items() if "-" not in k] - logger.info("copypaste: Task: {}".format(task)) - logger.info("copypaste: " + ",".join([k[0] for k in important_res])) - logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) - - -def verify_results(cfg, results): - """ - Args: - results (OrderedDict[dict]): task_name -> {metric -> score} - - Returns: - bool: whether the verification succeeds or not - """ - expected_results = cfg.TEST.EXPECTED_RESULTS - if not len(expected_results): - return True - - ok = True - for task, metric, expected, tolerance in expected_results: - actual = results[task][metric] - if not np.isfinite(actual): - ok = False - diff = abs(actual - expected) - if diff > tolerance: - ok = False - - logger = logging.getLogger(__name__) - if not ok: - logger.error("Result verification failed!") - logger.error("Expected Results: " + str(expected_results)) - logger.error("Actual Results: " + pprint.pformat(results)) - - sys.exit(1) - else: - logger.info("Results verification passed.") - return ok - - -def flatten_results_dict(results): - """ - Expand a hierarchical dict of scalars into a flat dict of scalars. - If results[k1][k2][k3] = v, the returned dict will have the entry - {"k1/k2/k3": v}. - - Args: - results (dict): - """ - r = {} - for k, v in results.items(): - if isinstance(v, Mapping): - v = flatten_results_dict(v) - for kk, vv in v.items(): - r[k + "/" + kk] = vv - else: - r[k] = v - return r diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md deleted file mode 100644 index 9bd8b57..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/README.md +++ /dev/null @@ -1,10 +0,0 @@ - -This directory contains code to prepare a detectron2 model for deployment. -Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. - -Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. - - -### Acknowledgements - -Thanks to Mobile Vision team at Facebook for developing the conversion tools. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py deleted file mode 100644 index 1e2bf4d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- - -from .api import * - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py deleted file mode 100644 index a760071..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/api.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -import copy -import logging -import os -import torch -from caffe2.proto import caffe2_pb2 -from torch import nn - -from detectron2.config import CfgNode as CN - -from .caffe2_export import export_caffe2_detection_model -from .caffe2_export import export_onnx_model as export_onnx_model_impl -from .caffe2_export import run_and_save_graph -from .caffe2_inference import ProtobufDetectionModel -from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format -from .shared import get_pb_arg_vali, get_pb_arg_vals, save_graph - -__all__ = [ - "add_export_config", - "export_caffe2_model", - "Caffe2Model", - "export_onnx_model", - "Caffe2Tracer", -] - - -def add_export_config(cfg): - """ - Args: - cfg (CfgNode): a detectron2 config - - Returns: - CfgNode: an updated config with new options that will be used - by :class:`Caffe2Tracer`. - """ - is_frozen = cfg.is_frozen() - cfg.defrost() - cfg.EXPORT_CAFFE2 = CN() - cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False - if is_frozen: - cfg.freeze() - return cfg - - -class Caffe2Tracer: - """ - Make a detectron2 model traceable with caffe2 style. - - An original detectron2 model may not be traceable, or - cannot be deployed directly after being traced, due to some reasons: - 1. control flow in some ops - 2. custom ops - 3. complicated pre/post processing - - This class provides a traceable version of a detectron2 model by: - 1. Rewrite parts of the model using ops in caffe2. Note that some ops do - not have GPU implementation. - 2. Define the inputs "after pre-processing" as inputs to the model - 3. Remove post-processing and produce raw layer outputs - - More specifically about inputs: all builtin models take two input tensors. - (1) NCHW float "data" which is an image (usually in [0, 255]) - (2) Nx3 float "im_info", each row of which is (height, width, 1.0) - - After making a traceable model, the class provide methods to export such a - model to different deployment formats. - - The class currently only supports models using builtin meta architectures. - """ - - def __init__(self, cfg, model, inputs): - """ - Args: - cfg (CfgNode): a detectron2 config, with extra export-related options - added by :func:`add_export_config`. - model (nn.Module): a model built by - :func:`detectron2.modeling.build_model`. - inputs: sample inputs that the given model takes for inference. - Will be used to trace the model. - """ - assert isinstance(cfg, CN), cfg - assert isinstance(model, torch.nn.Module), type(model) - if "EXPORT_CAFFE2" not in cfg: - cfg = add_export_config(cfg) # will just the defaults - - self.cfg = cfg - self.model = model - self.inputs = inputs - - def _get_traceable(self): - # TODO how to make it extensible to support custom models - C2MetaArch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[self.cfg.MODEL.META_ARCHITECTURE] - traceable_model = C2MetaArch(self.cfg, copy.deepcopy(self.model)) - traceable_inputs = traceable_model.get_caffe2_inputs(self.inputs) - return traceable_model, traceable_inputs - - def export_caffe2(self): - """ - Export the model to Caffe2's protobuf format. - The returned object can be saved with `.save_protobuf()` method. - The result can be loaded and executed using Caffe2 runtime. - - Returns: - Caffe2Model - """ - model, inputs = self._get_traceable() - predict_net, init_net = export_caffe2_detection_model(model, inputs) - return Caffe2Model(predict_net, init_net) - - def export_onnx(self): - """ - Export the model to ONNX format. - Note that the exported model contains custom ops only available in caffe2, therefore it - cannot be directly executed by other runtime. Post-processing or transformation passes - may be applied on the model to accommodate different runtimes. - - Returns: - onnx.ModelProto: an onnx model. - """ - model, inputs = self._get_traceable() - return export_onnx_model_impl(model, (inputs,)) - - def export_torchscript(self): - """ - Export the model to a `torch.jit.TracedModule` by tracing. - The returned object can be saved to a file by ".save()". - - Returns: - torch.jit.TracedModule: a torch TracedModule - """ - model, inputs = self._get_traceable() - logger = logging.getLogger(__name__) - logger.info("Tracing the model with torch.jit.trace ...") - with torch.no_grad(): - return torch.jit.trace(model, (inputs,), optimize=True) - - -def export_caffe2_model(cfg, model, inputs): - """ - Export a detectron2 model to caffe2 format. - - Args: - cfg (CfgNode): a detectron2 config, with extra export-related options - added by :func:`add_export_config`. - model (nn.Module): a model built by - :func:`detectron2.modeling.build_model`. - It will be modified by this function. - inputs: sample inputs that the given model takes for inference. - Will be used to trace the model. - - Returns: - Caffe2Model - """ - return Caffe2Tracer(cfg, model, inputs).export_caffe2() - - -def export_onnx_model(cfg, model, inputs): - """ - Export a detectron2 model to ONNX format. - Note that the exported model contains custom ops only available in caffe2, therefore it - cannot be directly executed by other runtime. Post-processing or transformation passes - may be applied on the model to accommodate different runtimes. - Args: - cfg (CfgNode): a detectron2 config, with extra export-related options - added by :func:`add_export_config`. - model (nn.Module): a model built by - :func:`detectron2.modeling.build_model`. - It will be modified by this function. - inputs: sample inputs that the given model takes for inference. - Will be used to trace the model. - Returns: - onnx.ModelProto: an onnx model. - """ - return Caffe2Tracer(cfg, model, inputs).export_onnx() - - -class Caffe2Model(nn.Module): - """ - A wrapper around the traced model in caffe2's pb format. - """ - - def __init__(self, predict_net, init_net): - super().__init__() - self.eval() # always in eval mode - self._predict_net = predict_net - self._init_net = init_net - self._predictor = None - - @property - def predict_net(self): - """ - Returns: - core.Net: the underlying caffe2 predict net - """ - return self._predict_net - - @property - def init_net(self): - """ - Returns: - core.Net: the underlying caffe2 init net - """ - return self._init_net - - __init__.__HIDE_SPHINX_DOC__ = True - - def save_protobuf(self, output_dir): - """ - Save the model as caffe2's protobuf format. - - Args: - output_dir (str): the output directory to save protobuf files. - """ - logger = logging.getLogger(__name__) - logger.info("Saving model to {} ...".format(output_dir)) - os.makedirs(output_dir, exist_ok=True) - - with open(os.path.join(output_dir, "model.pb"), "wb") as f: - f.write(self._predict_net.SerializeToString()) - with open(os.path.join(output_dir, "model.pbtxt"), "w") as f: - f.write(str(self._predict_net)) - with open(os.path.join(output_dir, "model_init.pb"), "wb") as f: - f.write(self._init_net.SerializeToString()) - - def save_graph(self, output_file, inputs=None): - """ - Save the graph as SVG format. - - Args: - output_file (str): a SVG file - inputs: optional inputs given to the model. - If given, the inputs will be used to run the graph to record - shape of every tensor. The shape information will be - saved together with the graph. - """ - if inputs is None: - save_graph(self._predict_net, output_file, op_only=False) - else: - size_divisibility = get_pb_arg_vali(self._predict_net, "size_divisibility", 0) - device = get_pb_arg_vals(self._predict_net, "device", b"cpu").decode("ascii") - inputs = convert_batched_inputs_to_c2_format(inputs, size_divisibility, device) - inputs = [x.cpu().numpy() for x in inputs] - run_and_save_graph(self._predict_net, self._init_net, inputs, output_file) - - @staticmethod - def load_protobuf(dir): - """ - Args: - dir (str): a directory used to save Caffe2Model with - :meth:`save_protobuf`. - The files "model.pb" and "model_init.pb" are needed. - - Returns: - Caffe2Model: the caffe2 model loaded from this directory. - """ - predict_net = caffe2_pb2.NetDef() - with open(os.path.join(dir, "model.pb"), "rb") as f: - predict_net.ParseFromString(f.read()) - - init_net = caffe2_pb2.NetDef() - with open(os.path.join(dir, "model_init.pb"), "rb") as f: - init_net.ParseFromString(f.read()) - - return Caffe2Model(predict_net, init_net) - - def __call__(self, inputs): - """ - An interface that wraps around a caffe2 model and mimics detectron2's models' - input & output format. This is used to compare the outputs of caffe2 model - with its original torch model. - - Due to the extra conversion between torch/caffe2, - this method is not meant for benchmark. - """ - if self._predictor is None: - self._predictor = ProtobufDetectionModel(self._predict_net, self._init_net) - return self._predictor(inputs) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py deleted file mode 100644 index 6e3cbe3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/c10.py +++ /dev/null @@ -1,503 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import math -import torch -import torch.nn.functional as F - -from detectron2.layers import cat -from detectron2.layers.roi_align_rotated import ROIAlignRotated -from detectron2.modeling import poolers -from detectron2.modeling.proposal_generator import rpn -from detectron2.modeling.roi_heads.mask_head import mask_rcnn_inference -from detectron2.structures import Boxes, ImageList, Instances, Keypoints - -from .shared import alias, to_device - - -""" -This file contains caffe2-compatible implementation of several detectrno2 components. -""" - - -class Caffe2Boxes(Boxes): - """ - Representing a list of detectron2.structures.Boxes from minibatch, each box - is represented by a 5d vector (batch index + 4 coordinates), or a 6d vector - (batch index + 5 coordinates) for RotatedBoxes. - """ - - def __init__(self, tensor): - assert isinstance(tensor, torch.Tensor) - assert tensor.dim() == 2 and tensor.size(-1) in [4, 5, 6], tensor.size() - # TODO: make tensor immutable when dim is Nx5 for Boxes, - # and Nx6 for RotatedBoxes? - self.tensor = tensor - - -# TODO clean up this class, maybe just extend Instances -class InstancesList(object): - """ - Tensor representation of a list of Instances object for a batch of images. - - When dealing with a batch of images with Caffe2 ops, a list of bboxes - (instances) are usually represented by single Tensor with size - (sigma(Ni), 5) or (sigma(Ni), 4) plus a batch split Tensor. This class is - for providing common functions to convert between these two representations. - """ - - def __init__(self, im_info, indices, extra_fields=None): - # [N, 3] -> (H, W, Scale) - self.im_info = im_info - # [N,] -> indice of batch to which the instance belongs - self.indices = indices - # [N, ...] - self.batch_extra_fields = extra_fields or {} - - self.image_size = self.im_info - - def get_fields(self): - """ like `get_fields` in the Instances object, - but return each field in tensor representations """ - ret = {} - for k, v in self.batch_extra_fields.items(): - # if isinstance(v, torch.Tensor): - # tensor_rep = v - # elif isinstance(v, (Boxes, Keypoints)): - # tensor_rep = v.tensor - # else: - # raise ValueError("Can't find tensor representation for: {}".format()) - ret[k] = v - return ret - - def has(self, name): - return name in self.batch_extra_fields - - def set(self, name, value): - data_len = len(value) - if len(self.batch_extra_fields): - assert ( - len(self) == data_len - ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) - self.batch_extra_fields[name] = value - - def __setattr__(self, name, val): - if name in ["im_info", "indices", "batch_extra_fields", "image_size"]: - super().__setattr__(name, val) - else: - self.set(name, val) - - def __getattr__(self, name): - if name not in self.batch_extra_fields: - raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) - return self.batch_extra_fields[name] - - def __len__(self): - return len(self.indices) - - def flatten(self): - ret = [] - for _, v in self.batch_extra_fields.items(): - if isinstance(v, (Boxes, Keypoints)): - ret.append(v.tensor) - else: - ret.append(v) - return ret - - @staticmethod - def to_d2_instances_list(instances_list): - """ - Convert InstancesList to List[Instances]. The input `instances_list` can - also be a List[Instances], in this case this method is a non-op. - """ - if not isinstance(instances_list, InstancesList): - assert all(isinstance(x, Instances) for x in instances_list) - return instances_list - - ret = [] - for i, info in enumerate(instances_list.im_info): - instances = Instances(torch.Size([int(info[0].item()), int(info[1].item())])) - - ids = instances_list.indices == i - for k, v in instances_list.batch_extra_fields.items(): - if isinstance(v, torch.Tensor): - instances.set(k, v[ids]) - continue - elif isinstance(v, Boxes): - instances.set(k, v[ids, -4:]) - continue - - target_type, tensor_source = v - assert isinstance(tensor_source, torch.Tensor) - assert tensor_source.shape[0] == instances_list.indices.shape[0] - tensor_source = tensor_source[ids] - - if issubclass(target_type, Boxes): - instances.set(k, Boxes(tensor_source[:, -4:])) - elif issubclass(target_type, Keypoints): - instances.set(k, Keypoints(tensor_source)) - elif issubclass(target_type, torch.Tensor): - instances.set(k, tensor_source) - else: - raise ValueError("Can't handle targe type: {}".format(target_type)) - - ret.append(instances) - return ret - - -class Caffe2Compatible(object): - def _get_tensor_mode(self): - return self._tensor_mode - - def _set_tensor_mode(self, v): - self._tensor_mode = v - - tensor_mode = property(_get_tensor_mode, _set_tensor_mode) - """ - If true, the model expects C2-style tensor only inputs/outputs format. - """ - - -class Caffe2RPN(Caffe2Compatible, rpn.RPN): - def forward(self, images, features, gt_instances=None): - assert not self.training - - features = [features[f] for f in self.in_features] - objectness_logits_pred, anchor_deltas_pred = self.rpn_head(features) - - assert isinstance(images, ImageList) - if self.tensor_mode: - im_info = images.image_sizes - else: - im_info = torch.Tensor( - [[im_sz[0], im_sz[1], torch.Tensor([1.0])] for im_sz in images.image_sizes] - ).to(images.tensor.device) - assert isinstance(im_info, torch.Tensor) - - rpn_rois_list = [] - rpn_roi_probs_list = [] - for scores, bbox_deltas, cell_anchors_tensor, feat_stride in zip( - objectness_logits_pred, - anchor_deltas_pred, - iter(self.anchor_generator.cell_anchors), - self.anchor_generator.strides, - ): - scores = scores.detach() - bbox_deltas = bbox_deltas.detach() - - rpn_rois, rpn_roi_probs = torch.ops._caffe2.GenerateProposals( - scores, - bbox_deltas, - im_info, - cell_anchors_tensor, - spatial_scale=1.0 / feat_stride, - pre_nms_topN=self.pre_nms_topk[self.training], - post_nms_topN=self.post_nms_topk[self.training], - nms_thresh=self.nms_thresh, - min_size=self.min_box_side_len, - # correct_transform_coords=True, # deprecated argument - angle_bound_on=True, # Default - angle_bound_lo=-180, - angle_bound_hi=180, - clip_angle_thresh=1.0, # Default - legacy_plus_one=False, - ) - rpn_rois_list.append(rpn_rois) - rpn_roi_probs_list.append(rpn_roi_probs) - - # For FPN in D2, in RPN all proposals from different levels are concated - # together, ranked and picked by top post_nms_topk. Then in ROIPooler - # it calculates level_assignments and calls the RoIAlign from - # the corresponding level. - - if len(objectness_logits_pred) == 1: - rpn_rois = rpn_rois_list[0] - rpn_roi_probs = rpn_roi_probs_list[0] - else: - assert len(rpn_rois_list) == len(rpn_roi_probs_list) - rpn_post_nms_topN = self.post_nms_topk[self.training] - - device = rpn_rois_list[0].device - input_list = [to_device(x, "cpu") for x in (rpn_rois_list + rpn_roi_probs_list)] - - # TODO remove this after confirming rpn_max_level/rpn_min_level - # is not needed in CollectRpnProposals. - feature_strides = list(self.anchor_generator.strides) - rpn_min_level = int(math.log2(feature_strides[0])) - rpn_max_level = int(math.log2(feature_strides[-1])) - assert (rpn_max_level - rpn_min_level + 1) == len( - rpn_rois_list - ), "CollectRpnProposals requires continuous levels" - - rpn_rois = torch.ops._caffe2.CollectRpnProposals( - input_list, - # NOTE: in current implementation, rpn_max_level and rpn_min_level - # are not needed, only the subtraction of two matters and it - # can be infer from the number of inputs. Keep them now for - # consistency. - rpn_max_level=2 + len(rpn_rois_list) - 1, - rpn_min_level=2, - rpn_post_nms_topN=rpn_post_nms_topN, - ) - rpn_rois = to_device(rpn_rois, device) - rpn_roi_probs = [] - - proposals = self.c2_postprocess(im_info, rpn_rois, rpn_roi_probs, self.tensor_mode) - return proposals, {} - - @staticmethod - def c2_postprocess(im_info, rpn_rois, rpn_roi_probs, tensor_mode): - proposals = InstancesList( - im_info=im_info, - indices=rpn_rois[:, 0], - extra_fields={ - "proposal_boxes": Caffe2Boxes(rpn_rois), - "objectness_logits": (torch.Tensor, rpn_roi_probs), - }, - ) - if not tensor_mode: - proposals = InstancesList.to_d2_instances_list(proposals) - else: - proposals = [proposals] - return proposals - - -class Caffe2ROIPooler(Caffe2Compatible, poolers.ROIPooler): - @staticmethod - def c2_preprocess(box_lists): - assert all(isinstance(x, Boxes) for x in box_lists) - if all(isinstance(x, Caffe2Boxes) for x in box_lists): - # input is pure-tensor based - assert len(box_lists) == 1 - pooler_fmt_boxes = box_lists[0].tensor - else: - pooler_fmt_boxes = poolers.convert_boxes_to_pooler_format(box_lists) - return pooler_fmt_boxes - - def forward(self, x, box_lists): - assert not self.training - - pooler_fmt_boxes = self.c2_preprocess(box_lists) - num_level_assignments = len(self.level_poolers) - - if num_level_assignments == 1: - if isinstance(self.level_poolers[0], ROIAlignRotated): - c2_roi_align = torch.ops._caffe2.RoIAlignRotated - aligned = True - else: - c2_roi_align = torch.ops._caffe2.RoIAlign - aligned = self.level_poolers[0].aligned - - out = c2_roi_align( - x[0], - pooler_fmt_boxes, - order="NCHW", - spatial_scale=float(self.level_poolers[0].spatial_scale), - pooled_h=int(self.output_size[0]), - pooled_w=int(self.output_size[1]), - sampling_ratio=int(self.level_poolers[0].sampling_ratio), - aligned=aligned, - ) - return out - - device = pooler_fmt_boxes.device - assert ( - self.max_level - self.min_level + 1 == 4 - ), "Currently DistributeFpnProposals only support 4 levels" - fpn_outputs = torch.ops._caffe2.DistributeFpnProposals( - to_device(pooler_fmt_boxes, "cpu"), - roi_canonical_scale=self.canonical_box_size, - roi_canonical_level=self.canonical_level, - roi_max_level=self.max_level, - roi_min_level=self.min_level, - legacy_plus_one=False, - ) - fpn_outputs = [to_device(x, device) for x in fpn_outputs] - - rois_fpn_list = fpn_outputs[:-1] - rois_idx_restore_int32 = fpn_outputs[-1] - - roi_feat_fpn_list = [] - for roi_fpn, x_level, pooler in zip(rois_fpn_list, x, self.level_poolers): - if isinstance(pooler, ROIAlignRotated): - c2_roi_align = torch.ops._caffe2.RoIAlignRotated - aligned = True - else: - c2_roi_align = torch.ops._caffe2.RoIAlign - aligned = bool(pooler.aligned) - - roi_feat_fpn = c2_roi_align( - x_level, - roi_fpn, - order="NCHW", - spatial_scale=float(pooler.spatial_scale), - pooled_h=int(self.output_size[0]), - pooled_w=int(self.output_size[1]), - sampling_ratio=int(pooler.sampling_ratio), - aligned=aligned, - ) - roi_feat_fpn_list.append(roi_feat_fpn) - - roi_feat_shuffled = cat(roi_feat_fpn_list, dim=0) - roi_feat = torch.ops._caffe2.BatchPermutation(roi_feat_shuffled, rois_idx_restore_int32) - return roi_feat - - -class Caffe2FastRCNNOutputsInference: - def __init__(self, tensor_mode): - self.tensor_mode = tensor_mode # whether the output is caffe2 tensor mode - - def __call__(self, box_predictor, predictions, proposals): - """ equivalent to FastRCNNOutputLayers.inference """ - score_thresh = box_predictor.test_score_thresh - nms_thresh = box_predictor.test_nms_thresh - topk_per_image = box_predictor.test_topk_per_image - is_rotated = len(box_predictor.box2box_transform.weights) == 5 - - if is_rotated: - box_dim = 5 - assert box_predictor.box2box_transform.weights[4] == 1, ( - "The weights for Rotated BBoxTransform in C2 have only 4 dimensions," - + " thus enforcing the angle weight to be 1 for now" - ) - box2box_transform_weights = box_predictor.box2box_transform.weights[:4] - else: - box_dim = 4 - box2box_transform_weights = box_predictor.box2box_transform.weights - - class_logits, box_regression = predictions - class_prob = F.softmax(class_logits, -1) - - assert box_regression.shape[1] % box_dim == 0 - cls_agnostic_bbox_reg = box_regression.shape[1] // box_dim == 1 - - input_tensor_mode = proposals[0].proposal_boxes.tensor.shape[1] == box_dim + 1 - - rois = type(proposals[0].proposal_boxes).cat([p.proposal_boxes for p in proposals]) - device, dtype = rois.tensor.device, rois.tensor.dtype - if input_tensor_mode: - im_info = proposals[0].image_size - rois = rois.tensor - else: - im_info = torch.Tensor( - [[sz[0], sz[1], 1.0] for sz in [x.image_size for x in proposals]] - ) - batch_ids = cat( - [ - torch.full((b, 1), i, dtype=dtype, device=device) - for i, b in enumerate(len(p) for p in proposals) - ], - dim=0, - ) - rois = torch.cat([batch_ids, rois.tensor], dim=1) - - roi_pred_bbox, roi_batch_splits = torch.ops._caffe2.BBoxTransform( - to_device(rois, "cpu"), - to_device(box_regression, "cpu"), - to_device(im_info, "cpu"), - weights=box2box_transform_weights, - apply_scale=True, - rotated=is_rotated, - angle_bound_on=True, - angle_bound_lo=-180, - angle_bound_hi=180, - clip_angle_thresh=1.0, - legacy_plus_one=False, - ) - roi_pred_bbox = to_device(roi_pred_bbox, device) - roi_batch_splits = to_device(roi_batch_splits, device) - - nms_outputs = torch.ops._caffe2.BoxWithNMSLimit( - to_device(class_prob, "cpu"), - to_device(roi_pred_bbox, "cpu"), - to_device(roi_batch_splits, "cpu"), - score_thresh=float(score_thresh), - nms=float(nms_thresh), - detections_per_im=int(topk_per_image), - soft_nms_enabled=False, - soft_nms_method="linear", - soft_nms_sigma=0.5, - soft_nms_min_score_thres=0.001, - rotated=is_rotated, - cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, - input_boxes_include_bg_cls=False, - output_classes_include_bg_cls=False, - legacy_plus_one=False, - ) - roi_score_nms = to_device(nms_outputs[0], device) - roi_bbox_nms = to_device(nms_outputs[1], device) - roi_class_nms = to_device(nms_outputs[2], device) - roi_batch_splits_nms = to_device(nms_outputs[3], device) - roi_keeps_nms = to_device(nms_outputs[4], device) - roi_keeps_size_nms = to_device(nms_outputs[5], device) - if not self.tensor_mode: - roi_class_nms = roi_class_nms.to(torch.int64) - - roi_batch_ids = cat( - [ - torch.full((b, 1), i, dtype=dtype, device=device) - for i, b in enumerate(int(x.item()) for x in roi_batch_splits_nms) - ], - dim=0, - ) - - roi_class_nms = alias(roi_class_nms, "class_nms") - roi_score_nms = alias(roi_score_nms, "score_nms") - roi_bbox_nms = alias(roi_bbox_nms, "bbox_nms") - roi_batch_splits_nms = alias(roi_batch_splits_nms, "batch_splits_nms") - roi_keeps_nms = alias(roi_keeps_nms, "keeps_nms") - roi_keeps_size_nms = alias(roi_keeps_size_nms, "keeps_size_nms") - - results = InstancesList( - im_info=im_info, - indices=roi_batch_ids[:, 0], - extra_fields={ - "pred_boxes": Caffe2Boxes(roi_bbox_nms), - "scores": roi_score_nms, - "pred_classes": roi_class_nms, - }, - ) - - if not self.tensor_mode: - results = InstancesList.to_d2_instances_list(results) - batch_splits = roi_batch_splits_nms.int().tolist() - kept_indices = list(roi_keeps_nms.to(torch.int64).split(batch_splits)) - else: - results = [results] - kept_indices = [roi_keeps_nms] - - return results, kept_indices - - -class Caffe2MaskRCNNInference: - def __call__(self, pred_mask_logits, pred_instances): - """ equivalent to mask_head.mask_rcnn_inference """ - if all(isinstance(x, InstancesList) for x in pred_instances): - assert len(pred_instances) == 1 - mask_probs_pred = pred_mask_logits.sigmoid() - mask_probs_pred = alias(mask_probs_pred, "mask_fcn_probs") - pred_instances[0].pred_masks = mask_probs_pred - else: - mask_rcnn_inference(pred_mask_logits, pred_instances) - - -class Caffe2KeypointRCNNInference: - def __init__(self, use_heatmap_max_keypoint): - self.use_heatmap_max_keypoint = use_heatmap_max_keypoint - - def __call__(self, pred_keypoint_logits, pred_instances): - # just return the keypoint heatmap for now, - # there will be option to call HeatmapMaxKeypointOp - output = alias(pred_keypoint_logits, "kps_score") - if all(isinstance(x, InstancesList) for x in pred_instances): - assert len(pred_instances) == 1 - if self.use_heatmap_max_keypoint: - device = output.device - output = torch.ops._caffe2.HeatmapMaxKeypoint( - to_device(output, "cpu"), - pred_instances[0].pred_boxes.tensor, - should_output_softmax=True, # worth make it configerable? - ) - output = to_device(output, device) - output = alias(output, "keypoints_out") - pred_instances[0].pred_keypoints = output - return pred_keypoint_logits diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py deleted file mode 100644 index ccac809..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_export.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import copy -import io -import logging -import numpy as np -from typing import List -import onnx -import torch -from caffe2.proto import caffe2_pb2 -from caffe2.python import core -from caffe2.python.onnx.backend import Caffe2Backend -from tabulate import tabulate -from termcolor import colored -from torch.onnx import OperatorExportTypes - -from .shared import ( - ScopedWS, - construct_init_net_from_params, - fuse_alias_placeholder, - fuse_copy_between_cpu_and_gpu, - get_params_from_init_net, - group_norm_replace_aten_with_caffe2, - infer_device_type, - remove_dead_end_ops, - remove_reshape_for_fc, - save_graph, -) - -logger = logging.getLogger(__name__) - - -def export_onnx_model(model, inputs): - """ - Trace and export a model to onnx format. - - Args: - model (nn.Module): - inputs (tuple[args]): the model will be called by `model(*inputs)` - - Returns: - an onnx model - """ - assert isinstance(model, torch.nn.Module) - - # make sure all modules are in eval mode, onnx may change the training state - # of the module if the states are not consistent - def _check_eval(module): - assert not module.training - - model.apply(_check_eval) - - # Export the model to ONNX - with torch.no_grad(): - with io.BytesIO() as f: - torch.onnx.export( - model, - inputs, - f, - operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK, - # verbose=True, # NOTE: uncomment this for debugging - # export_params=True, - ) - onnx_model = onnx.load_from_string(f.getvalue()) - - # Apply ONNX's Optimization - all_passes = onnx.optimizer.get_available_passes() - passes = ["fuse_bn_into_conv"] - assert all(p in all_passes for p in passes) - onnx_model = onnx.optimizer.optimize(onnx_model, passes) - return onnx_model - - -def _op_stats(net_def): - type_count = {} - for t in [op.type for op in net_def.op]: - type_count[t] = type_count.get(t, 0) + 1 - type_count_list = sorted(type_count.items(), key=lambda kv: kv[0]) # alphabet - type_count_list = sorted(type_count_list, key=lambda kv: -kv[1]) # count - return "\n".join("{:>4}x {}".format(count, name) for name, count in type_count_list) - - -def _assign_device_option( - predict_net: caffe2_pb2.NetDef, init_net: caffe2_pb2.NetDef, tensor_inputs: List[torch.Tensor] -): - """ - ONNX exported network doesn't have concept of device, assign necessary - device option for each op in order to make it runable on GPU runtime. - """ - - def _get_device_type(torch_tensor): - assert torch_tensor.device.type in ["cpu", "cuda"] - assert torch_tensor.device.index == 0 - return torch_tensor.device.type - - def _assign_op_device_option(net_proto, net_ssa, blob_device_types): - for op, ssa_i in zip(net_proto.op, net_ssa): - if op.type in ["CopyCPUToGPU", "CopyGPUToCPU"]: - op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) - else: - devices = [blob_device_types[b] for b in ssa_i[0] + ssa_i[1]] - assert all(d == devices[0] for d in devices) - if devices[0] == "cuda": - op.device_option.CopyFrom(core.DeviceOption(caffe2_pb2.CUDA, 0)) - - # update ops in predict_net - predict_net_input_device_types = { - (name, 0): _get_device_type(tensor) - for name, tensor in zip(predict_net.external_input, tensor_inputs) - } - predict_net_device_types = infer_device_type( - predict_net, known_status=predict_net_input_device_types, device_name_style="pytorch" - ) - predict_net_ssa, _ = core.get_ssa(predict_net) - _assign_op_device_option(predict_net, predict_net_ssa, predict_net_device_types) - - # update ops in init_net - init_net_ssa, versions = core.get_ssa(init_net) - init_net_output_device_types = { - (name, versions[name]): predict_net_device_types[(name, 0)] - for name in init_net.external_output - } - init_net_device_types = infer_device_type( - init_net, known_status=init_net_output_device_types, device_name_style="pytorch" - ) - _assign_op_device_option(init_net, init_net_ssa, init_net_device_types) - - -def export_caffe2_detection_model(model: torch.nn.Module, tensor_inputs: List[torch.Tensor]): - """ - Export a caffe2-compatible Detectron2 model to caffe2 format via ONNX. - - Arg: - model: a caffe2-compatible version of detectron2 model, defined in caffe2_modeling.py - tensor_inputs: a list of tensors that caffe2 model takes as input. - """ - model = copy.deepcopy(model) - assert isinstance(model, torch.nn.Module) - assert hasattr(model, "encode_additional_info") - - # Export via ONNX - logger.info("Exporting a {} model via ONNX ...".format(type(model).__name__)) - onnx_model = export_onnx_model(model, (tensor_inputs,)) - # Convert ONNX model to Caffe2 protobuf - init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) - ops_table = [[op.type, op.input, op.output] for op in predict_net.op] - table = tabulate(ops_table, headers=["type", "input", "output"], tablefmt="pipe") - logger.info( - "ONNX export Done. Exported predict_net (before optimizations):\n" + colored(table, "cyan") - ) - - # Apply protobuf optimization - fuse_alias_placeholder(predict_net, init_net) - if any(t.device.type != "cpu" for t in tensor_inputs): - fuse_copy_between_cpu_and_gpu(predict_net) - remove_dead_end_ops(init_net) - _assign_device_option(predict_net, init_net, tensor_inputs) - params, device_options = get_params_from_init_net(init_net) - predict_net, params = remove_reshape_for_fc(predict_net, params) - init_net = construct_init_net_from_params(params, device_options) - group_norm_replace_aten_with_caffe2(predict_net) - - # Record necessary information for running the pb model in Detectron2 system. - model.encode_additional_info(predict_net, init_net) - - logger.info("Operators used in predict_net: \n{}".format(_op_stats(predict_net))) - logger.info("Operators used in init_net: \n{}".format(_op_stats(init_net))) - - return predict_net, init_net - - -def run_and_save_graph(predict_net, init_net, tensor_inputs, graph_save_path): - """ - Run the caffe2 model on given inputs, recording the shape and draw the graph. - - predict_net/init_net: caffe2 model. - tensor_inputs: a list of tensors that caffe2 model takes as input. - graph_save_path: path for saving graph of exported model. - """ - - logger.info("Saving graph of ONNX exported model to {} ...".format(graph_save_path)) - save_graph(predict_net, graph_save_path, op_only=False) - - # Run the exported Caffe2 net - logger.info("Running ONNX exported model ...") - with ScopedWS("__ws_tmp__", True) as ws: - ws.RunNetOnce(init_net) - initialized_blobs = set(ws.Blobs()) - uninitialized = [inp for inp in predict_net.external_input if inp not in initialized_blobs] - for name, blob in zip(uninitialized, tensor_inputs): - ws.FeedBlob(name, blob) - - try: - ws.RunNetOnce(predict_net) - except RuntimeError as e: - logger.warning("Encountered RuntimeError: \n{}".format(str(e))) - - ws_blobs = {b: ws.FetchBlob(b) for b in ws.Blobs()} - blob_sizes = {b: ws_blobs[b].shape for b in ws_blobs if isinstance(ws_blobs[b], np.ndarray)} - - logger.info("Saving graph with blob shapes to {} ...".format(graph_save_path)) - save_graph(predict_net, graph_save_path, op_only=False, blob_sizes=blob_sizes) - - return ws_blobs diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py deleted file mode 100644 index 92718d0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_inference.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import collections -import logging -import numpy as np -import torch -from caffe2.proto import caffe2_pb2 -from caffe2.python import core - -from .caffe2_modeling import META_ARCH_CAFFE2_EXPORT_TYPE_MAP, convert_batched_inputs_to_c2_format -from .shared import ScopedWS, get_pb_arg_vali, get_pb_arg_vals, infer_device_type - -logger = logging.getLogger(__name__) - - -class ProtobufModel(torch.nn.Module): - """ - A class works just like nn.Module in terms of inference, but running - caffe2 model under the hood. Input/Output are Dict[str, tensor] whose keys - are in external_input/output. - """ - - def __init__(self, predict_net, init_net): - logger.info("Initializing ProtobufModel ...") - super().__init__() - assert isinstance(predict_net, caffe2_pb2.NetDef) - assert isinstance(init_net, caffe2_pb2.NetDef) - self.ws_name = "__ws_tmp__" - self.net = core.Net(predict_net) - - with ScopedWS(self.ws_name, is_reset=True, is_cleanup=False) as ws: - ws.RunNetOnce(init_net) - for blob in self.net.Proto().external_input: - if blob not in ws.Blobs(): - ws.CreateBlob(blob) - ws.CreateNet(self.net) - - self._error_msgs = set() - - def forward(self, inputs_dict): - assert all(inp in self.net.Proto().external_input for inp in inputs_dict) - with ScopedWS(self.ws_name, is_reset=False, is_cleanup=False) as ws: - for b, tensor in inputs_dict.items(): - ws.FeedBlob(b, tensor) - try: - ws.RunNet(self.net.Proto().name) - except RuntimeError as e: - if not str(e) in self._error_msgs: - self._error_msgs.add(str(e)) - logger.warning("Encountered new RuntimeError: \n{}".format(str(e))) - logger.warning("Catch the error and use partial results.") - - outputs_dict = collections.OrderedDict( - [(b, ws.FetchBlob(b)) for b in self.net.Proto().external_output] - ) - # Remove outputs of current run, this is necessary in order to - # prevent fetching the result from previous run if the model fails - # in the middle. - for b in self.net.Proto().external_output: - # Needs to create uninitialized blob to make the net runable. - # This is "equivalent" to: ws.RemoveBlob(b) then ws.CreateBlob(b), - # but there'no such API. - ws.FeedBlob(b, "{}, a C++ native class of type nullptr (uninitialized).".format(b)) - - return outputs_dict - - -class ProtobufDetectionModel(torch.nn.Module): - """ - A class works just like a pytorch meta arch in terms of inference, but running - caffe2 model under the hood. - """ - - def __init__(self, predict_net, init_net, *, convert_outputs=None): - """ - Args: - predict_net, init_net (core.Net): caffe2 nets - convert_outptus (callable): a function that converts caffe2 - outputs to the same format of the original pytorch model. - By default, use the one defined in the caffe2 meta_arch. - """ - super().__init__() - self.protobuf_model = ProtobufModel(predict_net, init_net) - self.size_divisibility = get_pb_arg_vali(predict_net, "size_divisibility", 0) - self.device = get_pb_arg_vals(predict_net, "device", b"cpu").decode("ascii") - - if convert_outputs is None: - meta_arch = get_pb_arg_vals(predict_net, "meta_architecture", b"GeneralizedRCNN") - meta_arch = META_ARCH_CAFFE2_EXPORT_TYPE_MAP[meta_arch.decode("ascii")] - self._convert_outputs = meta_arch.get_outputs_converter(predict_net, init_net) - else: - self._convert_outputs = convert_outputs - - def _infer_output_devices(self, inputs_dict): - def _get_device_type(torch_tensor): - assert torch_tensor.device.type in ["cpu", "cuda"] - assert torch_tensor.device.index == 0 - return torch_tensor.device.type - - predict_net = self.protobuf_model.net.Proto() - input_device_types = { - (name, 0): _get_device_type(tensor) for name, tensor in inputs_dict.items() - } - device_type_map = infer_device_type( - predict_net, known_status=input_device_types, device_name_style="pytorch" - ) - ssa, versions = core.get_ssa(predict_net) - versioned_outputs = [(name, versions[name]) for name in predict_net.external_output] - output_devices = [device_type_map[outp] for outp in versioned_outputs] - return output_devices - - def _convert_inputs(self, batched_inputs): - # currently all models convert inputs in the same way - data, im_info = convert_batched_inputs_to_c2_format( - batched_inputs, self.size_divisibility, self.device - ) - return {"data": data, "im_info": im_info} - - def forward(self, batched_inputs): - c2_inputs = self._convert_inputs(batched_inputs) - c2_results = self.protobuf_model(c2_inputs) - - if any(t.device.type != "cpu" for _, t in c2_inputs.items()): - output_devices = self._infer_output_devices(c2_inputs) - else: - output_devices = ["cpu" for _ in self.protobuf_model.net.Proto().external_output] - - def _cast_caffe2_blob_to_torch_tensor(blob, device): - return torch.Tensor(blob).to(device) if isinstance(blob, np.ndarray) else None - - c2_results = { - name: _cast_caffe2_blob_to_torch_tensor(c2_results[name], device) - for name, device in zip(self.protobuf_model.net.Proto().external_output, output_devices) - } - - return self._convert_outputs(batched_inputs, c2_inputs, c2_results) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py deleted file mode 100644 index 1732b32..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/caffe2_modeling.py +++ /dev/null @@ -1,493 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import functools -import io -import struct -import types -import torch - -from detectron2.modeling import meta_arch -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.meta_arch.panoptic_fpn import combine_semantic_and_instance_outputs -from detectron2.modeling.postprocessing import detector_postprocess, sem_seg_postprocess -from detectron2.modeling.roi_heads import keypoint_head -from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes - -from .c10 import Caffe2Compatible -from .patcher import ROIHeadsPatcher, patch_generalized_rcnn -from .shared import ( - alias, - check_set_pb_arg, - get_pb_arg_floats, - get_pb_arg_valf, - get_pb_arg_vali, - get_pb_arg_vals, - mock_torch_nn_functional_interpolate, -) - - -def assemble_rcnn_outputs_by_name(image_sizes, tensor_outputs, force_mask_on=False): - """ - A function to assemble caffe2 model's outputs (i.e. Dict[str, Tensor]) - to detectron2's format (i.e. list of Instances instance). - This only works when the model follows the Caffe2 detectron's naming convention. - - Args: - image_sizes (List[List[int, int]]): [H, W] of every image. - tensor_outputs (Dict[str, Tensor]): external_output to its tensor. - - force_mask_on (Bool): if true, the it make sure there'll be pred_masks even - if the mask is not found from tensor_outputs (usually due to model crash) - """ - - results = [Instances(image_size) for image_size in image_sizes] - - batch_splits = tensor_outputs.get("batch_splits", None) - if batch_splits: - raise NotImplementedError() - assert len(image_sizes) == 1 - result = results[0] - - bbox_nms = tensor_outputs["bbox_nms"] - score_nms = tensor_outputs["score_nms"] - class_nms = tensor_outputs["class_nms"] - # Detection will always success because Conv support 0-batch - assert bbox_nms is not None - assert score_nms is not None - assert class_nms is not None - if bbox_nms.shape[1] == 5: - result.pred_boxes = RotatedBoxes(bbox_nms) - else: - result.pred_boxes = Boxes(bbox_nms) - result.scores = score_nms - result.pred_classes = class_nms.to(torch.int64) - - mask_fcn_probs = tensor_outputs.get("mask_fcn_probs", None) - if mask_fcn_probs is not None: - # finish the mask pred - mask_probs_pred = mask_fcn_probs - num_masks = mask_probs_pred.shape[0] - class_pred = result.pred_classes - indices = torch.arange(num_masks, device=class_pred.device) - mask_probs_pred = mask_probs_pred[indices, class_pred][:, None] - result.pred_masks = mask_probs_pred - elif force_mask_on: - # NOTE: there's no way to know the height/width of mask here, it won't be - # used anyway when batch size is 0, so just set them to 0. - result.pred_masks = torch.zeros([0, 1, 0, 0], dtype=torch.uint8) - - keypoints_out = tensor_outputs.get("keypoints_out", None) - kps_score = tensor_outputs.get("kps_score", None) - if keypoints_out is not None: - # keypoints_out: [N, 4, #kypoints], where 4 is in order of (x, y, score, prob) - keypoints_tensor = keypoints_out - # NOTE: it's possible that prob is not calculated if "should_output_softmax" - # is set to False in HeatmapMaxKeypoint, so just using raw score, seems - # it doesn't affect mAP. TODO: check more carefully. - keypoint_xyp = keypoints_tensor.transpose(1, 2)[:, :, [0, 1, 2]] - result.pred_keypoints = keypoint_xyp - elif kps_score is not None: - # keypoint heatmap to sparse data structure - pred_keypoint_logits = kps_score - keypoint_head.keypoint_rcnn_inference(pred_keypoint_logits, [result]) - - return results - - -def _cast_to_f32(f64): - return struct.unpack("f", struct.pack("f", f64))[0] - - -def set_caffe2_compatible_tensor_mode(model, enable=True): - def _fn(m): - if isinstance(m, Caffe2Compatible): - m.tensor_mode = enable - - model.apply(_fn) - - -def convert_batched_inputs_to_c2_format(batched_inputs, size_divisibility, device): - """ - See get_caffe2_inputs() below. - """ - assert all(isinstance(x, dict) for x in batched_inputs) - assert all(x["image"].dim() == 3 for x in batched_inputs) - - images = [x["image"] for x in batched_inputs] - images = ImageList.from_tensors(images, size_divisibility) - - im_info = [] - for input_per_image, image_size in zip(batched_inputs, images.image_sizes): - target_height = input_per_image.get("height", image_size[0]) - target_width = input_per_image.get("width", image_size[1]) # noqa - # NOTE: The scale inside im_info is kept as convention and for providing - # post-processing information if further processing is needed. For - # current Caffe2 model definitions that don't include post-processing inside - # the model, this number is not used. - # NOTE: There can be a slight difference between width and height - # scales, using a single number can results in numerical difference - # compared with D2's post-processing. - scale = target_height / image_size[0] - im_info.append([image_size[0], image_size[1], scale]) - im_info = torch.Tensor(im_info) - - return images.tensor.to(device), im_info.to(device) - - -class Caffe2MetaArch(Caffe2Compatible, torch.nn.Module): - """ - Base class for caffe2-compatible implementation of a meta architecture. - The forward is traceable and its traced graph can be converted to caffe2 - graph through ONNX. - """ - - def __init__(self, cfg, torch_model): - """ - Args: - cfg (CfgNode): - torch_model (nn.Module): the detectron2 model (meta_arch) to be - converted. - """ - super().__init__() - self._wrapped_model = torch_model - self.eval() - set_caffe2_compatible_tensor_mode(self, True) - - def get_caffe2_inputs(self, batched_inputs): - """ - Convert pytorch-style structured inputs to caffe2-style inputs that - are tuples of tensors. - - Args: - batched_inputs (list[dict]): inputs to a detectron2 model - in its standard format. Each dict has "image" (CHW tensor), and optionally - "height" and "width". - - Returns: - tuple[Tensor]: - tuple of tensors that will be the inputs to the - :meth:`forward` method. For existing models, the first - is an NCHW tensor (padded and batched); the second is - a im_info Nx3 tensor, where the rows are - (height, width, unused legacy parameter) - """ - return convert_batched_inputs_to_c2_format( - batched_inputs, - self._wrapped_model.backbone.size_divisibility, - self._wrapped_model.device, - ) - - def encode_additional_info(self, predict_net, init_net): - """ - Save extra metadata that will be used by inference in the output protobuf. - """ - pass - - def forward(self, inputs): - """ - Run the forward in caffe2-style. It has to use caffe2-compatible ops - and the method will be used for tracing. - - Args: - inputs (tuple[Tensor]): inputs defined by :meth:`get_caffe2_input`. - They will be the inputs of the converted caffe2 graph. - - Returns: - tuple[Tensor]: output tensors. They will be the outputs of the - converted caffe2 graph. - """ - raise NotImplementedError - - def _caffe2_preprocess_image(self, inputs): - """ - Caffe2 implementation of preprocess_image, which is called inside each MetaArch's forward. - It normalizes the input images, and the final caffe2 graph assumes the - inputs have been batched already. - """ - data, im_info = inputs - data = alias(data, "data") - im_info = alias(im_info, "im_info") - mean, std = self._wrapped_model.pixel_mean, self._wrapped_model.pixel_std - normalized_data = (data - mean) / std - normalized_data = alias(normalized_data, "normalized_data") - - # Pack (data, im_info) into ImageList which is recognized by self.inference. - images = ImageList(tensor=normalized_data, image_sizes=im_info) - return images - - @staticmethod - def get_outputs_converter(predict_net, init_net): - """ - Creates a function that converts outputs of the caffe2 model to - detectron2's standard format. - The function uses information in `predict_net` and `init_net` that are - available at inferene time. Therefore the function logic can be used in inference. - - The returned function has the following signature: - - def convert(batched_inputs, c2_inputs, c2_results) -> detectron2_outputs - - Where - - * batched_inputs (list[dict]): the original input format of the meta arch - * c2_inputs (dict[str, Tensor]): the caffe2 inputs. - * c2_results (dict[str, Tensor]): the caffe2 output format, - corresponding to the outputs of the :meth:`forward` function. - * detectron2_outputs: the original output format of the meta arch. - - This function can be used to compare the outputs of the original meta arch and - the converted caffe2 graph. - - Returns: - callable: a callable of the above signature. - """ - raise NotImplementedError - - -class Caffe2GeneralizedRCNN(Caffe2MetaArch): - def __init__(self, cfg, torch_model): - assert isinstance(torch_model, meta_arch.GeneralizedRCNN) - torch_model = patch_generalized_rcnn(torch_model) - super().__init__(cfg, torch_model) - - self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads) - - def encode_additional_info(self, predict_net, init_net): - size_divisibility = self._wrapped_model.backbone.size_divisibility - check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) - check_set_pb_arg( - predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") - ) - check_set_pb_arg(predict_net, "meta_architecture", "s", b"GeneralizedRCNN") - - @mock_torch_nn_functional_interpolate() - def forward(self, inputs): - if not self.tensor_mode: - return self._wrapped_model.inference(inputs) - images = self._caffe2_preprocess_image(inputs) - features = self._wrapped_model.backbone(images.tensor) - proposals, _ = self._wrapped_model.proposal_generator(images, features) - with self.roi_heads_patcher.mock_roi_heads(): - detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) - return tuple(detector_results[0].flatten()) - - @staticmethod - def get_outputs_converter(predict_net, init_net): - def f(batched_inputs, c2_inputs, c2_results): - image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] - results = assemble_rcnn_outputs_by_name(image_sizes, c2_results) - return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) - - return f - - -class Caffe2PanopticFPN(Caffe2MetaArch): - def __init__(self, cfg, torch_model): - assert isinstance(torch_model, meta_arch.PanopticFPN) - torch_model = patch_generalized_rcnn(torch_model) - super().__init__(cfg, torch_model) - - self.roi_heads_patcher = ROIHeadsPatcher(cfg, self._wrapped_model.roi_heads) - - @mock_torch_nn_functional_interpolate() - def forward(self, inputs): - assert self.tensor_mode - images = self._caffe2_preprocess_image(inputs) - features = self._wrapped_model.backbone(images.tensor) - - sem_seg_results, _ = self._wrapped_model.sem_seg_head(features) - sem_seg_results = alias(sem_seg_results, "sem_seg") - - proposals, _ = self._wrapped_model.proposal_generator(images, features) - - with self.roi_heads_patcher.mock_roi_heads(self.tensor_mode): - detector_results, _ = self._wrapped_model.roi_heads(images, features, proposals) - - return tuple(detector_results[0].flatten()) + (sem_seg_results,) - - def encode_additional_info(self, predict_net, init_net): - size_divisibility = self._wrapped_model.backbone.size_divisibility - check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) - check_set_pb_arg( - predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") - ) - check_set_pb_arg(predict_net, "meta_architecture", "s", b"PanopticFPN") - - # Inference parameters: - check_set_pb_arg(predict_net, "combine_on", "i", self._wrapped_model.combine_on) - check_set_pb_arg( - predict_net, - "combine_overlap_threshold", - "f", - _cast_to_f32(self._wrapped_model.combine_overlap_threshold), - ) - check_set_pb_arg( - predict_net, - "combine_stuff_area_limit", - "i", - self._wrapped_model.combine_stuff_area_limit, - ) - check_set_pb_arg( - predict_net, - "combine_instances_confidence_threshold", - "f", - _cast_to_f32(self._wrapped_model.combine_instances_confidence_threshold), - ) - - @staticmethod - def get_outputs_converter(predict_net, init_net): - combine_on = get_pb_arg_vali(predict_net, "combine_on", None) - combine_overlap_threshold = get_pb_arg_valf(predict_net, "combine_overlap_threshold", None) - combine_stuff_area_limit = get_pb_arg_vali(predict_net, "combine_stuff_area_limit", None) - combine_instances_confidence_threshold = get_pb_arg_valf( - predict_net, "combine_instances_confidence_threshold", None - ) - - def f(batched_inputs, c2_inputs, c2_results): - image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] - detector_results = assemble_rcnn_outputs_by_name( - image_sizes, c2_results, force_mask_on=True - ) - sem_seg_results = c2_results["sem_seg"] - - # copied from meta_arch/panoptic_fpn.py ... - processed_results = [] - for sem_seg_result, detector_result, input_per_image, image_size in zip( - sem_seg_results, detector_results, batched_inputs, image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) - detector_r = detector_postprocess(detector_result, height, width) - - processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) - - if combine_on: - panoptic_r = combine_semantic_and_instance_outputs( - detector_r, - sem_seg_r.argmax(dim=0), - combine_overlap_threshold, - combine_stuff_area_limit, - combine_instances_confidence_threshold, - ) - processed_results[-1]["panoptic_seg"] = panoptic_r - return processed_results - - return f - - -class Caffe2RetinaNet(Caffe2MetaArch): - def __init__(self, cfg, torch_model): - assert isinstance(torch_model, meta_arch.RetinaNet) - super().__init__(cfg, torch_model) - - @mock_torch_nn_functional_interpolate() - def forward(self, inputs): - assert self.tensor_mode - images = self._caffe2_preprocess_image(inputs) - - # explicitly return the images sizes to avoid removing "im_info" by ONNX - # since it's not used in the forward path - return_tensors = [images.image_sizes] - - features = self._wrapped_model.backbone(images.tensor) - features = [features[f] for f in self._wrapped_model.in_features] - for i, feature_i in enumerate(features): - features[i] = alias(feature_i, "feature_{}".format(i), is_backward=True) - return_tensors.append(features[i]) - - box_cls, box_delta = self._wrapped_model.head(features) - for i, (box_cls_i, box_delta_i) in enumerate(zip(box_cls, box_delta)): - return_tensors.append(alias(box_cls_i, "box_cls_{}".format(i))) - return_tensors.append(alias(box_delta_i, "box_delta_{}".format(i))) - - return tuple(return_tensors) - - def encode_additional_info(self, predict_net, init_net): - size_divisibility = self._wrapped_model.backbone.size_divisibility - check_set_pb_arg(predict_net, "size_divisibility", "i", size_divisibility) - check_set_pb_arg( - predict_net, "device", "s", str.encode(str(self._wrapped_model.device), "ascii") - ) - check_set_pb_arg(predict_net, "meta_architecture", "s", b"RetinaNet") - - # Inference parameters: - check_set_pb_arg( - predict_net, "score_threshold", "f", _cast_to_f32(self._wrapped_model.score_threshold) - ) - check_set_pb_arg(predict_net, "topk_candidates", "i", self._wrapped_model.topk_candidates) - check_set_pb_arg( - predict_net, "nms_threshold", "f", _cast_to_f32(self._wrapped_model.nms_threshold) - ) - check_set_pb_arg( - predict_net, - "max_detections_per_image", - "i", - self._wrapped_model.max_detections_per_image, - ) - - check_set_pb_arg( - predict_net, - "bbox_reg_weights", - "floats", - [_cast_to_f32(w) for w in self._wrapped_model.box2box_transform.weights], - ) - self._encode_anchor_generator_cfg(predict_net) - - def _encode_anchor_generator_cfg(self, predict_net): - # serialize anchor_generator for future use - serialized_anchor_generator = io.BytesIO() - torch.save(self._wrapped_model.anchor_generator, serialized_anchor_generator) - # Ideally we can put anchor generating inside the model, then we don't - # need to store this information. - bytes = serialized_anchor_generator.getvalue() - check_set_pb_arg(predict_net, "serialized_anchor_generator", "s", bytes) - - @staticmethod - def get_outputs_converter(predict_net, init_net): - self = types.SimpleNamespace() - serialized_anchor_generator = io.BytesIO( - get_pb_arg_vals(predict_net, "serialized_anchor_generator", None) - ) - self.anchor_generator = torch.load(serialized_anchor_generator) - bbox_reg_weights = get_pb_arg_floats(predict_net, "bbox_reg_weights", None) - self.box2box_transform = Box2BoxTransform(weights=tuple(bbox_reg_weights)) - self.score_threshold = get_pb_arg_valf(predict_net, "score_threshold", None) - self.topk_candidates = get_pb_arg_vali(predict_net, "topk_candidates", None) - self.nms_threshold = get_pb_arg_valf(predict_net, "nms_threshold", None) - self.max_detections_per_image = get_pb_arg_vali( - predict_net, "max_detections_per_image", None - ) - - # hack to reuse inference code from RetinaNet - self.inference = functools.partial(meta_arch.RetinaNet.inference, self) - self.inference_single_image = functools.partial( - meta_arch.RetinaNet.inference_single_image, self - ) - - def f(batched_inputs, c2_inputs, c2_results): - image_sizes = [[int(im[0]), int(im[1])] for im in c2_inputs["im_info"]] - - num_features = len([x for x in c2_results.keys() if x.startswith("box_cls_")]) - box_cls = [c2_results["box_cls_{}".format(i)] for i in range(num_features)] - box_delta = [c2_results["box_delta_{}".format(i)] for i in range(num_features)] - - # For each feature level, feature should have the same batch size and - # spatial dimension as the box_cls and box_delta. - dummy_features = [box_delta[i].clone()[:, 0:0, :, :] for i in range(num_features)] - anchors = self.anchor_generator(dummy_features) - - # self.num_classess can be inferred - self.num_classes = box_cls[0].shape[1] // (box_delta[0].shape[1] // 4) - - results = self.inference(box_cls, box_delta, anchors, image_sizes) - return meta_arch.GeneralizedRCNN._postprocess(results, batched_inputs, image_sizes) - - return f - - -META_ARCH_CAFFE2_EXPORT_TYPE_MAP = { - "GeneralizedRCNN": Caffe2GeneralizedRCNN, - "PanopticFPN": Caffe2PanopticFPN, - "RetinaNet": Caffe2RetinaNet, -} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py deleted file mode 100644 index 3f0b0fd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/patcher.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import contextlib -import mock -import torch - -from detectron2.modeling import poolers -from detectron2.modeling.proposal_generator import rpn -from detectron2.modeling.roi_heads import keypoint_head, mask_head -from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers - -from .c10 import ( - Caffe2Compatible, - Caffe2FastRCNNOutputsInference, - Caffe2KeypointRCNNInference, - Caffe2MaskRCNNInference, - Caffe2ROIPooler, - Caffe2RPN, -) - - -class GenericMixin(object): - pass - - -class Caffe2CompatibleConverter(object): - """ - A GenericUpdater which implements the `create_from` interface, by modifying - module object and assign it with another class replaceCls. - """ - - def __init__(self, replaceCls): - self.replaceCls = replaceCls - - def create_from(self, module): - # update module's class to the new class - assert isinstance(module, torch.nn.Module) - if issubclass(self.replaceCls, GenericMixin): - # replaceCls should act as mixin, create a new class on-the-fly - new_class = type( - "{}MixedWith{}".format(self.replaceCls.__name__, module.__class__.__name__), - (self.replaceCls, module.__class__), - {}, # {"new_method": lambda self: ...}, - ) - module.__class__ = new_class - else: - # replaceCls is complete class, this allow arbitrary class swap - module.__class__ = self.replaceCls - - # initialize Caffe2Compatible - if isinstance(module, Caffe2Compatible): - module.tensor_mode = False - - return module - - -def patch(model, target, updater, *args, **kwargs): - """ - recursively (post-order) update all modules with the target type and its - subclasses, make a initialization/composition/inheritance/... via the - updater.create_from. - """ - for name, module in model.named_children(): - model._modules[name] = patch(module, target, updater, *args, **kwargs) - if isinstance(model, target): - return updater.create_from(model, *args, **kwargs) - return model - - -def patch_generalized_rcnn(model): - ccc = Caffe2CompatibleConverter - model = patch(model, rpn.RPN, ccc(Caffe2RPN)) - model = patch(model, poolers.ROIPooler, ccc(Caffe2ROIPooler)) - - return model - - -@contextlib.contextmanager -def mock_fastrcnn_outputs_inference( - tensor_mode, check=True, box_predictor_type=FastRCNNOutputLayers -): - with mock.patch.object( - box_predictor_type, - "inference", - autospec=True, - side_effect=Caffe2FastRCNNOutputsInference(tensor_mode), - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -@contextlib.contextmanager -def mock_mask_rcnn_inference(tensor_mode, patched_module, check=True): - with mock.patch( - "{}.mask_rcnn_inference".format(patched_module), side_effect=Caffe2MaskRCNNInference() - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -@contextlib.contextmanager -def mock_keypoint_rcnn_inference(tensor_mode, patched_module, use_heatmap_max_keypoint, check=True): - with mock.patch( - "{}.keypoint_rcnn_inference".format(patched_module), - side_effect=Caffe2KeypointRCNNInference(use_heatmap_max_keypoint), - ) as mocked_func: - yield - if check: - assert mocked_func.call_count > 0 - - -class ROIHeadsPatcher: - def __init__(self, cfg, heads): - self.heads = heads - - self.use_heatmap_max_keypoint = cfg.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT - - @contextlib.contextmanager - def mock_roi_heads(self, tensor_mode=True): - """ - Patching several inference functions inside ROIHeads and its subclasses - - Args: - tensor_mode (bool): whether the inputs/outputs are caffe2's tensor - format or not. Default to True. - """ - # NOTE: this requries the `keypoint_rcnn_inference` and `mask_rcnn_inference` - # are called inside the same file as BaseXxxHead due to using mock.patch. - kpt_heads_mod = keypoint_head.BaseKeypointRCNNHead.__module__ - mask_head_mod = mask_head.BaseMaskRCNNHead.__module__ - - mock_ctx_managers = [ - mock_fastrcnn_outputs_inference( - tensor_mode=tensor_mode, - check=True, - box_predictor_type=type(self.heads.box_predictor), - ) - ] - if getattr(self.heads, "keypoint_on", False): - mock_ctx_managers += [ - mock_keypoint_rcnn_inference( - tensor_mode, kpt_heads_mod, self.use_heatmap_max_keypoint - ) - ] - if getattr(self.heads, "mask_on", False): - mock_ctx_managers += [mock_mask_rcnn_inference(tensor_mode, mask_head_mod)] - - with contextlib.ExitStack() as stack: # python 3.3+ - for mgr in mock_ctx_managers: - stack.enter_context(mgr) - yield diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py deleted file mode 100644 index cb7ffeb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/export/shared.py +++ /dev/null @@ -1,1034 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import collections -import contextlib -import copy -import functools -import logging -import mock -import numpy as np -import os -from typing import Any, Callable, Dict, List, Optional, Tuple, Union -import caffe2.python.utils as putils -import torch -import torch.nn.functional as F -from caffe2.proto import caffe2_pb2 -from caffe2.python import core, net_drawer, workspace -from torch.nn.functional import interpolate as interp - -logger = logging.getLogger(__name__) - - -# ==== torch/utils_toffee/cast.py ======================================= - - -def to_device(t, device_str): - """ - This function is a replacement of .to(another_device) such that it allows the - casting to be traced properly by explicitly calling the underlying copy ops. - It also avoids introducing unncessary op when casting to the same device. - """ - src = t.device - dst = torch.device(device_str) - - if src == dst: - return t - elif src.type == "cuda" and dst.type == "cpu": - return torch.ops._caffe2.CopyGPUToCPU(t) - elif src.type == "cpu" and dst.type == "cuda": - return torch.ops._caffe2.CopyCPUToGPU(t) - else: - raise RuntimeError("Can't cast tensor from device {} to device {}".format(src, dst)) - - -# ==== torch/utils_toffee/interpolate.py ======================================= - - -# Note: borrowed from vision/detection/fair/detectron/detectron/modeling/detector.py -def BilinearInterpolation(tensor_in, up_scale): - assert up_scale % 2 == 0, "Scale should be even" - - def upsample_filt(size): - factor = (size + 1) // 2 - if size % 2 == 1: - center = factor - 1 - else: - center = factor - 0.5 - - og = np.ogrid[:size, :size] - return (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) - - kernel_size = int(up_scale) * 2 - bil_filt = upsample_filt(kernel_size) - - dim = int(tensor_in.shape[1]) - kernel = np.zeros((dim, dim, kernel_size, kernel_size), dtype=np.float32) - kernel[range(dim), range(dim), :, :] = bil_filt - - tensor_out = F.conv_transpose2d( - tensor_in, - weight=to_device(torch.Tensor(kernel), tensor_in.device), - bias=None, - stride=int(up_scale), - padding=int(up_scale / 2), - ) - - return tensor_out - - -# NOTE: ONNX is incompatible with traced torch.nn.functional.interpolate if -# using dynamic `scale_factor` rather than static `size`. (T43166860) -# NOTE: Caffe2 Int8 conversion might not be able to quantize `size` properly. -def onnx_compatibale_interpolate( - input, size=None, scale_factor=None, mode="nearest", align_corners=None -): - # NOTE: The input dimensions are interpreted in the form: - # `mini-batch x channels x [optional depth] x [optional height] x width`. - if size is None and scale_factor is not None: - if input.dim() == 4: - if isinstance(scale_factor, (int, float)): - height_scale, width_scale = (scale_factor, scale_factor) - else: - assert isinstance(scale_factor, (tuple, list)) - assert len(scale_factor) == 2 - height_scale, width_scale = scale_factor - - assert not align_corners, "No matching C2 op for align_corners == True" - if mode == "nearest": - return torch.ops._caffe2.ResizeNearest( - input, order="NCHW", width_scale=width_scale, height_scale=height_scale - ) - elif mode == "bilinear": - logger.warning( - "Use F.conv_transpose2d for bilinear interpolate" - " because there's no such C2 op, this may cause significant" - " slowdown and the boundary pixels won't be as same as" - " using F.interpolate due to padding." - ) - assert height_scale == width_scale - return BilinearInterpolation(input, up_scale=height_scale) - logger.warning("Output size is not static, it might cause ONNX conversion issue") - - return interp(input, size, scale_factor, mode, align_corners) - - -@contextlib.contextmanager -def mock_torch_nn_functional_interpolate(): - if torch.onnx.is_in_onnx_export(): - with mock.patch( - "torch.nn.functional.interpolate", side_effect=onnx_compatibale_interpolate - ): - yield - else: - yield - - -# ==== torch/utils_caffe2/ws_utils.py ========================================== - - -class ScopedWS(object): - def __init__(self, ws_name, is_reset, is_cleanup=False): - self.ws_name = ws_name - self.is_reset = is_reset - self.is_cleanup = is_cleanup - self.org_ws = "" - - def __enter__(self): - self.org_ws = workspace.CurrentWorkspace() - if self.ws_name is not None: - workspace.SwitchWorkspace(self.ws_name, True) - if self.is_reset: - workspace.ResetWorkspace() - - return workspace - - def __exit__(self, *args): - if self.is_cleanup: - workspace.ResetWorkspace() - if self.ws_name is not None: - workspace.SwitchWorkspace(self.org_ws) - - -def fetch_any_blob(name): - bb = None - try: - bb = workspace.FetchBlob(name) - except TypeError: - bb = workspace.FetchInt8Blob(name) - except Exception as e: - logger.error("Get blob {} error: {}".format(name, e)) - - return bb - - -# ==== torch/utils_caffe2/protobuf.py ========================================== - - -def get_pb_arg(pb, arg_name): - for x in pb.arg: - if x.name == arg_name: - return x - return None - - -def get_pb_arg_valf(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.f if arg is not None else default_val - - -def get_pb_arg_floats(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(map(float, arg.floats)) if arg is not None else default_val - - -def get_pb_arg_ints(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(map(int, arg.ints)) if arg is not None else default_val - - -def get_pb_arg_vali(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.i if arg is not None else default_val - - -def get_pb_arg_vals(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return arg.s if arg is not None else default_val - - -def get_pb_arg_valstrings(pb, arg_name, default_val): - arg = get_pb_arg(pb, arg_name) - return list(arg.strings) if arg is not None else default_val - - -def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False): - arg = get_pb_arg(pb, arg_name) - if arg is None: - arg = putils.MakeArgument(arg_name, arg_value) - assert hasattr(arg, arg_attr) - pb.arg.extend([arg]) - if allow_override and getattr(arg, arg_attr) != arg_value: - logger.warning( - "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value) - ) - setattr(arg, arg_attr, arg_value) - else: - assert arg is not None - assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format( - getattr(arg, arg_attr), arg_value - ) - - -def _create_const_fill_op_from_numpy(name, tensor, device_option=None): - assert type(tensor) == np.ndarray - kTypeNameMapper = { - np.dtype("float32"): "GivenTensorFill", - np.dtype("int32"): "GivenTensorIntFill", - np.dtype("int64"): "GivenTensorInt64Fill", - np.dtype("uint8"): "GivenTensorStringFill", - } - - args_dict = {} - if tensor.dtype == np.dtype("uint8"): - args_dict.update({"values": [str(tensor.data)], "shape": [1]}) - else: - args_dict.update({"values": tensor, "shape": tensor.shape}) - - if device_option is not None: - args_dict["device_option"] = device_option - - return core.CreateOperator(kTypeNameMapper[tensor.dtype], [], [name], **args_dict) - - -def _create_const_fill_op_from_c2_int8_tensor(name, int8_tensor): - assert type(int8_tensor) == workspace.Int8Tensor - kTypeNameMapper = { - np.dtype("int32"): "Int8GivenIntTensorFill", - np.dtype("uint8"): "Int8GivenTensorFill", - } - - tensor = int8_tensor.data - assert tensor.dtype in [np.dtype("uint8"), np.dtype("int32")] - values = tensor.tobytes() if tensor.dtype == np.dtype("uint8") else tensor - - return core.CreateOperator( - kTypeNameMapper[tensor.dtype], - [], - [name], - values=values, - shape=tensor.shape, - Y_scale=int8_tensor.scale, - Y_zero_point=int8_tensor.zero_point, - ) - - -def create_const_fill_op( - name: str, - blob: Union[np.ndarray, workspace.Int8Tensor], - device_option: Optional[caffe2_pb2.DeviceOption] = None, -) -> caffe2_pb2.OperatorDef: - """ - Given a blob object, return the Caffe2 operator that creates this blob - as constant. Currently support NumPy tensor and Caffe2 Int8Tensor. - """ - - tensor_type = type(blob) - assert tensor_type in [ - np.ndarray, - workspace.Int8Tensor, - ], 'Error when creating const fill op for "{}", unsupported blob type: {}'.format( - name, type(blob) - ) - - if tensor_type == np.ndarray: - return _create_const_fill_op_from_numpy(name, blob, device_option) - elif tensor_type == workspace.Int8Tensor: - assert device_option is None - return _create_const_fill_op_from_c2_int8_tensor(name, blob) - - -def construct_init_net_from_params( - params: Dict[str, Any], device_options: Optional[Dict[str, caffe2_pb2.DeviceOption]] = None -) -> caffe2_pb2.NetDef: - """ - Construct the init_net from params dictionary - """ - init_net = caffe2_pb2.NetDef() - device_options = device_options or {} - for name, blob in params.items(): - if isinstance(blob, str): - logger.warning( - ( - "Blob {} with type {} is not supported in generating init net," - " skipped.".format(name, type(blob)) - ) - ) - continue - init_net.op.extend( - [create_const_fill_op(name, blob, device_option=device_options.get(name, None))] - ) - init_net.external_output.append(name) - return init_net - - -def get_producer_map(ssa): - """ - Return dict from versioned blob to (i, j), - where i is index of producer op, j is the index of output of that op. - """ - producer_map = {} - for i in range(len(ssa)): - outputs = ssa[i][1] - for j, outp in enumerate(outputs): - producer_map[outp] = (i, j) - return producer_map - - -def get_consumer_map(ssa): - """ - Return dict from versioned blob to list of (i, j), - where i is index of consumer op, j is the index of input of that op. - """ - consumer_map = collections.defaultdict(list) - for i in range(len(ssa)): - inputs = ssa[i][0] - for j, inp in enumerate(inputs): - consumer_map[inp].append((i, j)) - return consumer_map - - -def get_params_from_init_net( - init_net: caffe2_pb2.NetDef, -) -> [Dict[str, Any], Dict[str, caffe2_pb2.DeviceOption]]: - """ - Take the output blobs from init_net by running it. - Outputs: - params: dict from blob name to numpy array - device_options: dict from blob name to the device option of its creating op - """ - # NOTE: this assumes that the params is determined by producer op with the - # only exception be CopyGPUToCPU which is CUDA op but returns CPU tensor. - def _get_device_option(producer_op): - if producer_op.type == "CopyGPUToCPU": - return caffe2_pb2.DeviceOption() - else: - return producer_op.device_option - - with ScopedWS("__get_params_from_init_net__", is_reset=True, is_cleanup=True) as ws: - ws.RunNetOnce(init_net) - params = {b: fetch_any_blob(b) for b in init_net.external_output} - ssa, versions = core.get_ssa(init_net) - producer_map = get_producer_map(ssa) - device_options = { - b: _get_device_option(init_net.op[producer_map[(b, versions[b])][0]]) - for b in init_net.external_output - } - return params, device_options - - -def _updater_raise(op, input_types, output_types): - raise RuntimeError( - "Failed to apply updater for op {} given input_types {} and" - " output_types {}".format(op, input_types, output_types) - ) - - -def _generic_status_identifier( - predict_net: caffe2_pb2.NetDef, - status_updater: Callable, - known_status: Dict[Tuple[str, int], Any], -) -> Dict[Tuple[str, int], Any]: - """ - Statically infer the status of each blob, the status can be such as device type - (CPU/GPU), layout (NCHW/NHWC), data type (float32/int8), etc. "Blob" here - is versioned blob (Tuple[str, int]) in the format compatible with ssa. - Inputs: - predict_net: the caffe2 network - status_updater: a callable, given an op and the status of its input/output, - it returns the updated status of input/output. `None` is used for - representing unknown status. - known_status: a dict containing known status, used as initialization. - Outputs: - A dict mapping from versioned blob to its status - """ - ssa, versions = core.get_ssa(predict_net) - versioned_ext_input = [(b, 0) for b in predict_net.external_input] - versioned_ext_output = [(b, versions[b]) for b in predict_net.external_output] - all_versioned_blobs = set().union(*[set(x[0] + x[1]) for x in ssa]) - - allowed_vbs = all_versioned_blobs.union(versioned_ext_input).union(versioned_ext_output) - assert all(k in allowed_vbs for k in known_status) - assert all(v is not None for v in known_status.values()) - _known_status = copy.deepcopy(known_status) - - def _check_and_update(key, value): - assert value is not None - if key in _known_status: - if not _known_status[key] == value: - raise RuntimeError( - "Confilict status for {}, existing status {}, new status {}".format( - key, _known_status[key], value - ) - ) - _known_status[key] = value - - def _update_i(op, ssa_i): - versioned_inputs = ssa_i[0] - versioned_outputs = ssa_i[1] - - inputs_status = [_known_status.get(b, None) for b in versioned_inputs] - outputs_status = [_known_status.get(b, None) for b in versioned_outputs] - - new_inputs_status, new_outputs_status = status_updater(op, inputs_status, outputs_status) - - for versioned_blob, status in zip( - versioned_inputs + versioned_outputs, new_inputs_status + new_outputs_status - ): - if status is not None: - _check_and_update(versioned_blob, status) - - for op, ssa_i in zip(predict_net.op, ssa): - _update_i(op, ssa_i) - for op, ssa_i in zip(reversed(predict_net.op), reversed(ssa)): - _update_i(op, ssa_i) - - # NOTE: This strictly checks all the blob from predict_net must be assgined - # a known status. However sometimes it's impossible (eg. having deadend op), - # we may relax this constraint if - for k in all_versioned_blobs: - if k not in _known_status: - raise NotImplementedError( - "Can not infer the status for {}. Currently only support the case where" - " a single forward and backward pass can identify status for all blobs.".format(k) - ) - - return _known_status - - -def infer_device_type( - predict_net: caffe2_pb2.NetDef, - known_status: Dict[Tuple[str, int], Any], - device_name_style: str = "caffe2", -) -> Dict[Tuple[str, int], str]: - """ Return the device type ("cpu" or "gpu"/"cuda") of each (versioned) blob """ - - assert device_name_style in ["caffe2", "pytorch"] - _CPU_STR = "cpu" - _GPU_STR = "gpu" if device_name_style == "caffe2" else "cuda" - - def _copy_cpu_to_gpu_updater(op, input_types, output_types): - if input_types[0] == _GPU_STR or output_types[0] == _CPU_STR: - _updater_raise(op, input_types, output_types) - return ([_CPU_STR], [_GPU_STR]) - - def _copy_gpu_to_cpu_updater(op, input_types, output_types): - if input_types[0] == _CPU_STR or output_types[0] == _GPU_STR: - _updater_raise(op, input_types, output_types) - return ([_GPU_STR], [_CPU_STR]) - - def _other_ops_updater(op, input_types, output_types): - non_none_types = [x for x in input_types + output_types if x is not None] - if len(non_none_types) > 0: - the_type = non_none_types[0] - if not all(x == the_type for x in non_none_types): - _updater_raise(op, input_types, output_types) - else: - the_type = None - return ([the_type for _ in op.input], [the_type for _ in op.output]) - - def _device_updater(op, *args, **kwargs): - return { - "CopyCPUToGPU": _copy_cpu_to_gpu_updater, - "CopyGPUToCPU": _copy_gpu_to_cpu_updater, - }.get(op.type, _other_ops_updater)(op, *args, **kwargs) - - return _generic_status_identifier(predict_net, _device_updater, known_status) - - -# ==== torch/utils_caffe2/vis.py =============================================== - - -def _modify_blob_names(ops, blob_rename_f): - ret = [] - - def _replace_list(blob_list, replaced_list): - del blob_list[:] - blob_list.extend(replaced_list) - - for x in ops: - cur = copy.deepcopy(x) - _replace_list(cur.input, list(map(blob_rename_f, cur.input))) - _replace_list(cur.output, list(map(blob_rename_f, cur.output))) - ret.append(cur) - - return ret - - -def _rename_blob(name, blob_sizes, blob_ranges): - def _list_to_str(bsize): - ret = ", ".join([str(x) for x in bsize]) - ret = "[" + ret + "]" - return ret - - ret = name - if blob_sizes is not None and name in blob_sizes: - ret += "\n" + _list_to_str(blob_sizes[name]) - if blob_ranges is not None and name in blob_ranges: - ret += "\n" + _list_to_str(blob_ranges[name]) - - return ret - - -# graph_name could not contain word 'graph' -def save_graph(net, file_name, graph_name="net", op_only=True, blob_sizes=None, blob_ranges=None): - blob_rename_f = functools.partial(_rename_blob, blob_sizes=blob_sizes, blob_ranges=blob_ranges) - return save_graph_base(net, file_name, graph_name, op_only, blob_rename_f) - - -def save_graph_base(net, file_name, graph_name="net", op_only=True, blob_rename_func=None): - graph = None - ops = net.op - if blob_rename_func is not None: - ops = _modify_blob_names(ops, blob_rename_func) - if not op_only: - graph = net_drawer.GetPydotGraph(ops, graph_name, rankdir="TB") - else: - graph = net_drawer.GetPydotGraphMinimal( - ops, graph_name, rankdir="TB", minimal_dependency=True - ) - - try: - par_dir = os.path.dirname(file_name) - if not os.path.exists(par_dir): - os.makedirs(par_dir) - - format = os.path.splitext(os.path.basename(file_name))[-1] - if format == ".png": - graph.write_png(file_name) - elif format == ".pdf": - graph.write_pdf(file_name) - elif format == ".svg": - graph.write_svg(file_name) - else: - print("Incorrect format {}".format(format)) - except Exception as e: - print("Error when writing graph to image {}".format(e)) - - return graph - - -# ==== torch/utils_toffee/aten_to_caffe2.py ==================================== - - -def group_norm_replace_aten_with_caffe2(predict_net: caffe2_pb2.NetDef): - """ - For ONNX exported model, GroupNorm will be represented as ATen op, - this can be a drop in replacement from ATen to GroupNorm - """ - count = 0 - for op in predict_net.op: - if op.type == "ATen": - op_name = get_pb_arg_vals(op, "operator", None) # return byte in py3 - if op_name and op_name.decode() == "group_norm": - op.arg.remove(get_pb_arg(op, "operator")) - - if get_pb_arg_vali(op, "cudnn_enabled", None): - op.arg.remove(get_pb_arg(op, "cudnn_enabled")) - - num_groups = get_pb_arg_vali(op, "num_groups", None) - if num_groups is not None: - op.arg.remove(get_pb_arg(op, "num_groups")) - check_set_pb_arg(op, "group", "i", num_groups) - - op.type = "GroupNorm" - count += 1 - if count > 1: - logger.info("Replaced {} ATen operator to GroupNormOp".format(count)) - - -# ==== torch/utils_toffee/alias.py ============================================= - - -def alias(x, name, is_backward=False): - if not torch.onnx.is_in_onnx_export(): - return x - assert isinstance(x, torch.Tensor) - return torch.ops._caffe2.AliasWithName(x, name, is_backward=is_backward) - - -def fuse_alias_placeholder(predict_net, init_net): - """ Remove AliasWithName placeholder and rename the input/output of it """ - # First we finish all the re-naming - for i, op in enumerate(predict_net.op): - if op.type == "AliasWithName": - assert len(op.input) == 1 - assert len(op.output) == 1 - name = get_pb_arg_vals(op, "name", None).decode() - is_backward = bool(get_pb_arg_vali(op, "is_backward", 0)) - rename_op_input(predict_net, init_net, i, 0, name, from_producer=is_backward) - rename_op_output(predict_net, i, 0, name) - - # Remove AliasWithName, should be very safe since it's a non-op - new_ops = [] - for op in predict_net.op: - if op.type != "AliasWithName": - new_ops.append(op) - else: - # safety check - assert op.input == op.output - assert op.input[0] == op.arg[0].s.decode() - del predict_net.op[:] - predict_net.op.extend(new_ops) - - -# ==== torch/utils_caffe2/graph_transform.py =================================== - - -class IllegalGraphTransformError(ValueError): - """ When a graph transform function call can't be executed. """ - - -def _rename_versioned_blob_in_proto( - proto: caffe2_pb2.NetDef, - old_name: str, - new_name: str, - version: int, - ssa: List[Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]], - start_versions: Dict[str, int], - end_versions: Dict[str, int], -): - """ In given proto, rename all blobs with matched version """ - # Operater list - for op, i_th_ssa in zip(proto.op, ssa): - versioned_inputs, versioned_outputs = i_th_ssa - for i in range(len(op.input)): - if versioned_inputs[i] == (old_name, version): - op.input[i] = new_name - for i in range(len(op.output)): - if versioned_outputs[i] == (old_name, version): - op.output[i] = new_name - # external_input - if start_versions.get(old_name, 0) == version: - for i in range(len(proto.external_input)): - if proto.external_input[i] == old_name: - proto.external_input[i] = new_name - # external_output - if end_versions.get(old_name, 0) == version: - for i in range(len(proto.external_output)): - if proto.external_output[i] == old_name: - proto.external_output[i] = new_name - - -def rename_op_input( - predict_net: caffe2_pb2.NetDef, - init_net: caffe2_pb2.NetDef, - op_id: int, - input_id: int, - new_name: str, - from_producer: bool = False, -): - """ - Rename the op_id-th operator in predict_net, change it's input_id-th input's - name to the new_name. It also does automatic re-route and change - external_input and init_net if necessary. - - It requires the input is only consumed by this op. - - This function modifies predict_net and init_net in-place. - - When from_producer is enable, this also updates other operators that consumes - the same input. Be cautious because may trigger unintended behavior. - """ - assert isinstance(predict_net, caffe2_pb2.NetDef) - assert isinstance(init_net, caffe2_pb2.NetDef) - - init_net_ssa, init_net_versions = core.get_ssa(init_net) - predict_net_ssa, predict_net_versions = core.get_ssa( - predict_net, copy.deepcopy(init_net_versions) - ) - - versioned_inputs, versioned_outputs = predict_net_ssa[op_id] - old_name, version = versioned_inputs[input_id] - - if from_producer: - producer_map = get_producer_map(predict_net_ssa) - if not (old_name, version) in producer_map: - raise NotImplementedError( - "Can't find producer, the input {} is probably from" - " init_net, this is not supported yet.".format(old_name) - ) - producer = producer_map[(old_name, version)] - rename_op_output(predict_net, producer[0], producer[1], new_name) - return - - def contain_targets(op_ssa): - return (old_name, version) in op_ssa[0] - - is_consumer = [contain_targets(op_ssa) for op_ssa in predict_net_ssa] - if sum(is_consumer) > 1: - raise IllegalGraphTransformError( - ( - "Input '{}' of operator(#{}) are consumed by other ops, please use" - + " rename_op_output on the producer instead. Offending op: \n{}" - ).format(old_name, op_id, predict_net.op[op_id]) - ) - - # update init_net - _rename_versioned_blob_in_proto( - init_net, old_name, new_name, version, init_net_ssa, {}, init_net_versions - ) - # update predict_net - _rename_versioned_blob_in_proto( - predict_net, - old_name, - new_name, - version, - predict_net_ssa, - init_net_versions, - predict_net_versions, - ) - - -def rename_op_output(predict_net: caffe2_pb2.NetDef, op_id: int, output_id: int, new_name: str): - """ - Rename the op_id-th operator in predict_net, change it's output_id-th input's - name to the new_name. It also does automatic re-route and change - external_output and if necessary. - - It allows multiple consumers of its output. - - This function modifies predict_net in-place, doesn't need init_net. - """ - assert isinstance(predict_net, caffe2_pb2.NetDef) - - ssa, blob_versions = core.get_ssa(predict_net) - - versioned_inputs, versioned_outputs = ssa[op_id] - old_name, version = versioned_outputs[output_id] - - # update predict_net - _rename_versioned_blob_in_proto( - predict_net, old_name, new_name, version, ssa, {}, blob_versions - ) - - -def get_sub_graph_external_input_output( - predict_net: caffe2_pb2.NetDef, sub_graph_op_indices: List[int] -) -> Tuple[List[Tuple[str, int]], List[Tuple[str, int]]]: - """ - Return the list of external input/output of sub-graph, - each element is tuple of the name and corresponding version in predict_net. - - external input/output is defined the same way as caffe2 NetDef. - """ - ssa, versions = core.get_ssa(predict_net) - - all_inputs = [] - all_outputs = [] - for op_id in sub_graph_op_indices: - all_inputs += [inp for inp in ssa[op_id][0] if inp not in all_inputs] - all_outputs += list(ssa[op_id][1]) # ssa output won't repeat - - # for versioned blobs, external inputs are just those blob in all_inputs - # but not in all_outputs - ext_inputs = [inp for inp in all_inputs if inp not in all_outputs] - - # external outputs are essentially outputs of this subgraph that are used - # outside of this sub-graph (including predict_net.external_output) - all_other_inputs = sum( - (ssa[i][0] for i in range(len(ssa)) if i not in sub_graph_op_indices), - [(outp, versions[outp]) for outp in predict_net.external_output], - ) - ext_outputs = [outp for outp in all_outputs if outp in set(all_other_inputs)] - - return ext_inputs, ext_outputs - - -class DiGraph: - """ A DAG representation of caffe2 graph, each vertice is a versioned blob. """ - - def __init__(self): - self.vertices = set() - self.graph = collections.defaultdict(list) - - def add_edge(self, u, v): - self.graph[u].append(v) - self.vertices.add(u) - self.vertices.add(v) - - # grab from https://www.geeksforgeeks.org/find-paths-given-source-destination/ - def get_all_paths(self, s, d): - visited = {k: False for k in self.vertices} - path = [] - all_paths = [] - - def _get_all_paths_util(graph, u, d, visited, path): - visited[u] = True - path.append(u) - if u == d: - all_paths.append(copy.deepcopy(path)) - else: - for i in graph[u]: - if not visited[i]: - _get_all_paths_util(graph, i, d, visited, path) - path.pop() - visited[u] = False - - _get_all_paths_util(self.graph, s, d, visited, path) - return all_paths - - @staticmethod - def from_ssa(ssa): - graph = DiGraph() - for op_id in range(len(ssa)): - for inp in ssa[op_id][0]: - for outp in ssa[op_id][1]: - graph.add_edge(inp, outp) - return graph - - -def _get_dependency_chain(ssa, versioned_target, versioned_source): - """ - Return the index list of relevant operator to produce target blob from source blob, - if there's no dependency, return empty list. - """ - - # finding all paths between nodes can be O(N!), thus we can only search - # in the subgraph using the op starting from the first consumer of source blob - # to the producer of the target blob. - consumer_map = get_consumer_map(ssa) - producer_map = get_producer_map(ssa) - start_op = min(x[0] for x in consumer_map[versioned_source]) - 15 - end_op = ( - producer_map[versioned_target][0] + 15 if versioned_target in producer_map else start_op - ) - sub_graph_ssa = ssa[start_op : end_op + 1] - if len(sub_graph_ssa) > 30: - logger.warning( - "Subgraph bebetween {} and {} is large (from op#{} to op#{}), it" - " might take non-trival time to find all paths between them.".format( - versioned_source, versioned_target, start_op, end_op - ) - ) - - dag = DiGraph.from_ssa(sub_graph_ssa) - paths = dag.get_all_paths(versioned_source, versioned_target) # include two ends - ops_in_paths = [[producer_map[blob][0] for blob in path[1:]] for path in paths] - return sorted(set().union(*[set(ops) for ops in ops_in_paths])) - - -def identify_reshape_sub_graph(predict_net: caffe2_pb2.NetDef) -> List[List[int]]: - """ - Idenfity the reshape sub-graph in a protobuf. - The reshape sub-graph is defined as matching the following pattern: - - (input_blob) -> Op_1 -> ... -> Op_N -> (new_shape) -─┐ - └-------------------------------------------> Reshape -> (output_blob) - - Return: - List of sub-graphs, each sub-graph is represented as a list of indices - of the relavent ops, [Op_1, Op_2, ..., Op_N, Reshape] - """ - - ssa, _ = core.get_ssa(predict_net) - - ret = [] - for i, op in enumerate(predict_net.op): - if op.type == "Reshape": - assert len(op.input) == 2 - input_ssa = ssa[i][0] - data_source = input_ssa[0] - shape_source = input_ssa[1] - op_indices = _get_dependency_chain(ssa, shape_source, data_source) - ret.append(op_indices + [i]) - return ret - - -def remove_reshape_for_fc(predict_net, params): - """ - In PyTorch nn.Linear has to take 2D tensor, this often leads to reshape - a 4D tensor to 2D by calling .view(). However this (dynamic) reshaping - doesn't work well with ONNX and Int8 tools, and cause using extra - ops (eg. ExpandDims) that might not be available on mobile. - Luckily Caffe2 supports 4D tensor for FC, so we can remove those reshape - after exporting ONNX model. - """ - from caffe2.python import core - - # find all reshape sub-graph that can be removed, which is now all Reshape - # sub-graph whose output is only consumed by FC. - # TODO: to make it safer, we may need the actually value to better determine - # if a Reshape before FC is removable. - reshape_sub_graphs = identify_reshape_sub_graph(predict_net) - sub_graphs_to_remove = [] - for reshape_sub_graph in reshape_sub_graphs: - reshape_op_id = reshape_sub_graph[-1] - assert predict_net.op[reshape_op_id].type == "Reshape" - ssa, _ = core.get_ssa(predict_net) - reshape_output = ssa[reshape_op_id][1][0] - consumers = [i for i in range(len(ssa)) if reshape_output in ssa[i][0]] - if all(predict_net.op[consumer].type == "FC" for consumer in consumers): - # safety check if the sub-graph is isolated, for this reshape sub-graph, - # it means it has one non-param external input and one external output. - ext_inputs, ext_outputs = get_sub_graph_external_input_output( - predict_net, reshape_sub_graph - ) - non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] - if len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1: - sub_graphs_to_remove.append(reshape_sub_graph) - - # perform removing subgraph by: - # 1: rename the Reshape's output to its input, then the graph can be - # seen as in-place itentify, meaning whose external input/output are the same. - # 2: simply remove those ops. - remove_op_ids = [] - params_to_remove = [] - for sub_graph in sub_graphs_to_remove: - logger.info( - "Remove Reshape sub-graph:\n{}".format( - "".join(["(#{:>4})\n{}".format(i, predict_net.op[i]) for i in sub_graph]) - ) - ) - reshape_op_id = sub_graph[-1] - new_reshap_output = predict_net.op[reshape_op_id].input[0] - rename_op_output(predict_net, reshape_op_id, 0, new_reshap_output) - ext_inputs, ext_outputs = get_sub_graph_external_input_output(predict_net, sub_graph) - non_params_ext_inputs = [inp for inp in ext_inputs if inp[1] != 0] - params_ext_inputs = [inp for inp in ext_inputs if inp[1] == 0] - assert len(non_params_ext_inputs) == 1 and len(ext_outputs) == 1 - assert ext_outputs[0][0] == non_params_ext_inputs[0][0] - assert ext_outputs[0][1] == non_params_ext_inputs[0][1] + 1 - remove_op_ids.extend(sub_graph) - params_to_remove.extend(params_ext_inputs) - - predict_net = copy.deepcopy(predict_net) - new_ops = [op for i, op in enumerate(predict_net.op) if i not in remove_op_ids] - del predict_net.op[:] - predict_net.op.extend(new_ops) - for versioned_params in params_to_remove: - name = versioned_params[0] - logger.info("Remove params: {} from init_net and predict_net.external_input".format(name)) - del params[name] - predict_net.external_input.remove(name) - - return predict_net, params - - -def fuse_copy_between_cpu_and_gpu(predict_net: caffe2_pb2.NetDef): - """ - In-place fuse extra copy ops between cpu/gpu for the following case: - a -CopyAToB-> b -CopyBToA> c1 -NextOp1-> d1 - -CopyBToA> c2 -NextOp2-> d2 - The fused network will look like: - a -NextOp1-> d1 - -NextOp2-> d2 - """ - - _COPY_OPS = ["CopyCPUToGPU", "CopyGPUToCPU"] - - def _fuse_once(predict_net): - ssa, blob_versions = core.get_ssa(predict_net) - consumer_map = get_consumer_map(ssa) - versioned_external_output = [ - (name, blob_versions[name]) for name in predict_net.external_output - ] - - for op_id, op in enumerate(predict_net.op): - if op.type in _COPY_OPS: - fw_copy_versioned_output = ssa[op_id][1][0] - consumer_ids = [x[0] for x in consumer_map[fw_copy_versioned_output]] - reverse_op_type = _COPY_OPS[1 - _COPY_OPS.index(op.type)] - - is_fusable = ( - len(consumer_ids) > 0 - and fw_copy_versioned_output not in versioned_external_output - and all( - predict_net.op[_op_id].type == reverse_op_type - and ssa[_op_id][1][0] not in versioned_external_output - for _op_id in consumer_ids - ) - ) - - if is_fusable: - for rv_copy_op_id in consumer_ids: - # making each NextOp uses "a" directly and removing Copy ops - rs_copy_versioned_output = ssa[rv_copy_op_id][1][0] - next_op_id, inp_id = consumer_map[rs_copy_versioned_output][0] - predict_net.op[next_op_id].input[inp_id] = op.input[0] - # remove CopyOps - new_ops = [ - op - for i, op in enumerate(predict_net.op) - if i != op_id and i not in consumer_ids - ] - del predict_net.op[:] - predict_net.op.extend(new_ops) - return True - - return False - - # _fuse_once returns False is nothing can be fused - while _fuse_once(predict_net): - pass - - -def remove_dead_end_ops(net_def: caffe2_pb2.NetDef): - """ remove ops if its output is not used or not in external_output """ - ssa, versions = core.get_ssa(net_def) - versioned_external_output = [(name, versions[name]) for name in net_def.external_output] - consumer_map = get_consumer_map(ssa) - removed_op_ids = set() - - def _is_dead_end(versioned_blob): - return not ( - versioned_blob in versioned_external_output - or ( - len(consumer_map[versioned_blob]) > 0 - and all(x[0] not in removed_op_ids for x in consumer_map[versioned_blob]) - ) - ) - - for i, ssa_i in reversed(list(enumerate(ssa))): - versioned_outputs = ssa_i[1] - if all(_is_dead_end(outp) for outp in versioned_outputs): - removed_op_ids.add(i) - - # simply removing those deadend ops should have no effect to external_output - new_ops = [op for i, op in enumerate(net_def.op) if i not in removed_op_ids] - del net_def.op[:] - net_def.op.extend(new_ops) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py deleted file mode 100644 index 2753739..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm -from .deform_conv import DeformConv, ModulatedDeformConv -from .mask_ops import paste_masks_in_image -from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated -from .roi_align import ROIAlign, roi_align -from .roi_align_rotated import ROIAlignRotated, roi_align_rotated -from .shape_spec import ShapeSpec -from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate, Linear -from .blocks import CNNBlockBase - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py deleted file mode 100644 index 1339c6e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/batch_norm.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import torch -import torch.distributed as dist -from torch import nn -from torch.autograd.function import Function -from torch.nn import functional as F - -from detectron2.utils import comm - -from .wrappers import BatchNorm2d - -TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) - - -class FrozenBatchNorm2d(nn.Module): - """ - BatchNorm2d where the batch statistics and the affine parameters are fixed. - - It contains non-trainable buffers called - "weight" and "bias", "running_mean", "running_var", - initialized to perform identity transformation. - - The pre-trained backbone models from Caffe2 only contain "weight" and "bias", - which are computed from the original four parameters of BN. - The affine transform `x * weight + bias` will perform the equivalent - computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. - When loading a backbone model from Caffe2, "running_mean" and "running_var" - will be left unchanged as identity transformation. - - Other pre-trained backbone models may contain all 4 parameters. - - The forward is implemented by `F.batch_norm(..., training=False)`. - """ - - _version = 3 - - def __init__(self, num_features, eps=1e-5): - super().__init__() - self.num_features = num_features - self.eps = eps - self.register_buffer("weight", torch.ones(num_features)) - self.register_buffer("bias", torch.zeros(num_features)) - self.register_buffer("running_mean", torch.zeros(num_features)) - self.register_buffer("running_var", torch.ones(num_features) - eps) - - def forward(self, x): - if x.requires_grad: - # When gradients are needed, F.batch_norm will use extra memory - # because its backward op computes gradients for weight/bias as well. - scale = self.weight * (self.running_var + self.eps).rsqrt() - bias = self.bias - self.running_mean * scale - scale = scale.reshape(1, -1, 1, 1) - bias = bias.reshape(1, -1, 1, 1) - return x * scale + bias - else: - # When gradients are not needed, F.batch_norm is a single fused op - # and provide more optimization opportunities. - return F.batch_norm( - x, - self.running_mean, - self.running_var, - self.weight, - self.bias, - training=False, - eps=self.eps, - ) - - def _load_from_state_dict( - self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ): - version = local_metadata.get("version", None) - - if version is None or version < 2: - # No running_mean/var in early versions - # This will silent the warnings - if prefix + "running_mean" not in state_dict: - state_dict[prefix + "running_mean"] = torch.zeros_like(self.running_mean) - if prefix + "running_var" not in state_dict: - state_dict[prefix + "running_var"] = torch.ones_like(self.running_var) - - if version is not None and version < 3: - logger = logging.getLogger(__name__) - logger.info("FrozenBatchNorm {} is upgraded to version 3.".format(prefix.rstrip("."))) - # In version < 3, running_var are used without +eps. - state_dict[prefix + "running_var"] -= self.eps - - super()._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ) - - def __repr__(self): - return "FrozenBatchNorm2d(num_features={}, eps={})".format(self.num_features, self.eps) - - @classmethod - def convert_frozen_batchnorm(cls, module): - """ - Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. - - Args: - module (torch.nn.Module): - - Returns: - If module is BatchNorm/SyncBatchNorm, returns a new module. - Otherwise, in-place convert module and return it. - - Similar to convert_sync_batchnorm in - https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py - """ - bn_module = nn.modules.batchnorm - bn_module = (bn_module.BatchNorm2d, bn_module.SyncBatchNorm) - res = module - if isinstance(module, bn_module): - res = cls(module.num_features) - if module.affine: - res.weight.data = module.weight.data.clone().detach() - res.bias.data = module.bias.data.clone().detach() - res.running_mean.data = module.running_mean.data - res.running_var.data = module.running_var.data - res.eps = module.eps - else: - for name, child in module.named_children(): - new_child = cls.convert_frozen_batchnorm(child) - if new_child is not child: - res.add_module(name, new_child) - return res - - -def get_norm(norm, out_channels): - """ - Args: - norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; - or a callable that takes a channel number and returns - the normalization layer as a nn.Module. - - Returns: - nn.Module or None: the normalization layer - """ - if isinstance(norm, str): - if len(norm) == 0: - return None - norm = { - "BN": BatchNorm2d, - # Fixed in https://github.com/pytorch/pytorch/pull/36382 - "SyncBN": NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, - "FrozenBN": FrozenBatchNorm2d, - "GN": lambda channels: nn.GroupNorm(32, channels), - # for debugging: - "nnSyncBN": nn.SyncBatchNorm, - "naiveSyncBN": NaiveSyncBatchNorm, - }[norm] - return norm(out_channels) - - -class AllReduce(Function): - @staticmethod - def forward(ctx, input): - input_list = [torch.zeros_like(input) for k in range(dist.get_world_size())] - # Use allgather instead of allreduce since I don't trust in-place operations .. - dist.all_gather(input_list, input, async_op=False) - inputs = torch.stack(input_list, dim=0) - return torch.sum(inputs, dim=0) - - @staticmethod - def backward(ctx, grad_output): - dist.all_reduce(grad_output, async_op=False) - return grad_output - - -class NaiveSyncBatchNorm(BatchNorm2d): - """ - In PyTorch<=1.5, `nn.SyncBatchNorm` has incorrect gradient - when the batch size on each worker is different. - (e.g., when scale augmentation is used, or when it is applied to mask head). - - This is a slower but correct alternative to `nn.SyncBatchNorm`. - - Note: - There isn't a single definition of Sync BatchNorm. - - When ``stats_mode==""``, this module computes overall statistics by using - statistics of each worker with equal weight. The result is true statistics - of all samples (as if they are all on one worker) only when all workers - have the same (N, H, W). This mode does not support inputs with zero batch size. - - When ``stats_mode=="N"``, this module computes overall statistics by weighting - the statistics of each worker by their ``N``. The result is true statistics - of all samples (as if they are all on one worker) only when all workers - have the same (H, W). It is slower than ``stats_mode==""``. - - Even though the result of this module may not be the true statistics of all samples, - it may still be reasonable because it might be preferrable to assign equal weights - to all workers, regardless of their (H, W) dimension, instead of putting larger weight - on larger images. From preliminary experiments, little difference is found between such - a simplified implementation and an accurate computation of overall mean & variance. - """ - - def __init__(self, *args, stats_mode="", **kwargs): - super().__init__(*args, **kwargs) - assert stats_mode in ["", "N"] - self._stats_mode = stats_mode - - def forward(self, input): - if comm.get_world_size() == 1 or not self.training: - return super().forward(input) - - B, C = input.shape[0], input.shape[1] - - mean = torch.mean(input, dim=[0, 2, 3]) - meansqr = torch.mean(input * input, dim=[0, 2, 3]) - - if self._stats_mode == "": - assert B > 0, 'SyncBatchNorm(stats_mode="") does not support zero batch size.' - vec = torch.cat([mean, meansqr], dim=0) - vec = AllReduce.apply(vec) * (1.0 / dist.get_world_size()) - mean, meansqr = torch.split(vec, C) - momentum = self.momentum - else: - if B == 0: - vec = torch.zeros([2 * C + 1], device=mean.device, dtype=mean.dtype) - vec = vec + input.sum() # make sure there is gradient w.r.t input - else: - vec = torch.cat( - [mean, meansqr, torch.ones([1], device=mean.device, dtype=mean.dtype)], dim=0 - ) - vec = AllReduce.apply(vec * B) - - total_batch = vec[-1].detach() - momentum = total_batch.clamp(max=1) * self.momentum # no update if total_batch is 0 - total_batch = torch.max(total_batch, torch.ones_like(total_batch)) # avoid div-by-zero - mean, meansqr, _ = torch.split(vec / total_batch, C) - - var = meansqr - mean * mean - invstd = torch.rsqrt(var + self.eps) - scale = self.weight * invstd - bias = self.bias - mean * scale - scale = scale.reshape(1, -1, 1, 1) - bias = bias.reshape(1, -1, 1, 1) - - self.running_mean += momentum * (mean.detach() - self.running_mean) - self.running_var += momentum * (var.detach() - self.running_var) - return input * scale + bias diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py deleted file mode 100644 index 1d06fec..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/blocks.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from torch import nn - -from .batch_norm import FrozenBatchNorm2d - - -class CNNBlockBase(nn.Module): - """ - A CNN block is assumed to have input channels, output channels and a stride. - The input and output of `forward()` method must be NCHW tensors. - The method can perform arbitrary computation but must match the given - channels and stride specification. - - Attribute: - in_channels (int): - out_channels (int): - stride (int): - """ - - def __init__(self, in_channels, out_channels, stride): - """ - The `__init__` method of any subclass should also contain these arguments. - - Args: - in_channels (int): - out_channels (int): - stride (int): - """ - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.stride = stride - - def freeze(self): - """ - Make this block not trainable. - This method sets all parameters to `requires_grad=False`, - and convert all BatchNorm layers to FrozenBatchNorm - - Returns: - the block itself - """ - for p in self.parameters(): - p.requires_grad = False - FrozenBatchNorm2d.convert_frozen_batchnorm(self) - return self diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md deleted file mode 100644 index 778ed3d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/README.md +++ /dev/null @@ -1,7 +0,0 @@ - - -To add a new Op: - -1. Create a new directory -2. Implement new ops there -3. Delcare its Python interface in `vision.cpp`. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h deleted file mode 100644 index 2d95eac..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign.h +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace detectron2 { - -at::Tensor ROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - bool aligned); - -at::Tensor ROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio, - bool aligned); - -#ifdef WITH_CUDA -at::Tensor ROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - bool aligned); - -at::Tensor ROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio, - bool aligned); -#endif - -// Interface for Python -inline at::Tensor ROIAlign_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - bool aligned) { - if (input.is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - aligned); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_forward_cpu( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - aligned); -} - -inline at::Tensor ROIAlign_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio, - bool aligned) { - if (grad.is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_backward_cuda( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio, - aligned); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_backward_cpu( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio, - aligned); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp deleted file mode 100644 index 52fc83f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cpu.cpp +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include "ROIAlign.h" - -namespace { - -// implementation taken from Caffe2 -template -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - T w1; - T w2; - T w3; - T w4; -}; - -template -void pre_calc_for_bilinear_interpolate( - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int iy_upper, - const int ix_upper, - T roi_start_h, - T roi_start_w, - T bin_size_h, - T bin_size_w, - int roi_bin_grid_h, - int roi_bin_grid_w, - std::vector>& pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T x = xx; - T y = yy; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y <= 0) { - y = 0; - } - if (x <= 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indices - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -template -void ROIAlignForward( - const int nthreads, - const T* input, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* rois, - T* output, - bool aligned) { - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - const T* offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_rois[1] * spatial_scale - offset; - T roi_start_h = offset_rois[2] * spatial_scale - offset; - T roi_end_w = offset_rois[3] * spatial_scale - offset; - T roi_end_h = offset_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (aligned) { - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlign cannot have non-negative size!"); - } else { // for backward-compatibility only - roi_width = std::max(roi_width, (T)1.); - roi_height = std::max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - // When the grid is empty, output zeros == 0/1, instead of NaN. - const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - // we want to precalculate indices and weights shared by all channels, - // this is the key point of optimization - std::vector> pre_calc( - roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); - pre_calc_for_bilinear_interpolate( - height, - width, - pooled_height, - pooled_width, - roi_bin_grid_h, - roi_bin_grid_w, - roi_start_h, - roi_start_w, - bin_size_h, - bin_size_w, - roi_bin_grid_h, - roi_bin_grid_w, - pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - PreCalc pc = pre_calc[pre_calc_index]; - output_val += pc.w1 * offset_input[pc.pos1] + - pc.w2 * offset_input[pc.pos2] + - pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; - - pre_calc_index += 1; - } - } - output_val /= count; - - output[index] = output_val; - } // for pw - } // for ph - } // for c - } // for n -} - -template -void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -inline void add(T* address, const T& val) { - *address += val; -} - -template -void ROIAlignBackward( - const int nthreads, - // may not be contiguous, and should be indexed using n_stride, etc - const T* grad_output, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* grad_input, - const T* rois, - const int n_stride, - const int c_stride, - const int h_stride, - const int w_stride, - bool aligned) { - for (int index = 0; index < nthreads; index++) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_rois[1] * spatial_scale - offset; - T roi_start_h = offset_rois[2] * spatial_scale - offset; - T roi_end_w = offset_rois[3] * spatial_scale - offset; - T roi_end_h = offset_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (aligned) { - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlign do not have non-negative size!"); - } else { // for backward-compatibility only - roi_width = std::max(roi_width, (T)1.); - roi_height = std::max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_grad_input = - grad_input + ((roi_batch_ind * channels + c) * height * width); - - int output_offset = n * n_stride + c * c_stride; - const T* offset_grad_output = grad_output + output_offset; - const T grad_output_this_bin = - offset_grad_output[ph * h_stride + pw * w_stride]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, - width, - y, - x, - w1, - w2, - w3, - w4, - x_low, - x_high, - y_low, - y_high, - index); - - T g1 = grad_output_this_bin * w1 / count; - T g2 = grad_output_this_bin * w2 / count; - T g3 = grad_output_this_bin * w3 / count; - T g4 = grad_output_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - // atomic add is not needed for now since it is single threaded - add(offset_grad_input + y_low * width + x_low, static_cast(g1)); - add(offset_grad_input + y_low * width + x_high, static_cast(g2)); - add(offset_grad_input + y_high * width + x_low, static_cast(g3)); - add(offset_grad_input + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // for -} // ROIAlignBackward - -} // namespace - -namespace detectron2 { - -at::Tensor ROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - bool aligned) { - AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_forward_cpu"; - at::checkAllSameType(c, {input_t, rois_t}); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - at::Tensor output = at::zeros( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - - auto output_size = num_rois * pooled_height * pooled_width * channels; - - if (output.numel() == 0) - return output; - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ROIAlign_forward", [&] { - ROIAlignForward( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr(), - aligned); - }); - return output; -} - -at::Tensor ROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio, - bool aligned) { - AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_backward_cpu"; - at::checkAllSameType(c, {grad_t, rois_t}); - - at::Tensor grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - // handle possibly empty gradients - if (grad.numel() == 0) { - return grad_input; - } - - // get stride values to ensure indexing into gradients is correct. - int n_stride = grad.stride(0); - int c_stride = grad.stride(1); - int h_stride = grad.stride(2); - int w_stride = grad.stride(3); - - auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ROIAlign_forward", [&] { - ROIAlignBackward( - grad.numel(), - grad.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride, - aligned); - }); - return grad_input; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu deleted file mode 100644 index 2e05953..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlign/ROIAlign_cuda.cu +++ /dev/null @@ -1,430 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -template -__device__ T bilinear_interpolate( - const T* bottom_data, - const int height, - const int width, - T y, - T x, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - return 0; - } - - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = bottom_data[y_low * width + x_low]; - T v2 = bottom_data[y_low * width + x_high]; - T v3 = bottom_data[y_high * width + x_low]; - T v4 = bottom_data[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__global__ void RoIAlignForward( - const int nthreads, - const T* bottom_data, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* bottom_rois, - T* top_data, - bool aligned) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; - T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; - T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; - T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T* offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - // When the grid is empty, output zeros == 0/1, instead of NaN. - const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T val = bilinear_interpolate( - offset_bottom_data, height, width, y, x, index); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template -__device__ void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high, - const int index /* index for debug only*/) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y <= 0) - y = 0; - if (x <= 0) - x = 0; - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = bottom_data[y_low * width + x_low]; - // T v2 = bottom_data[y_low * width + x_high]; - // T v3 = bottom_data[y_high * width + x_low]; - // T v4 = bottom_data[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -__global__ void RoIAlignBackwardFeature( - const int nthreads, - const T* top_diff, - const int num_rois, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* bottom_diff, - const T* bottom_rois, - bool aligned) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* offset_bottom_rois = bottom_rois + n * 5; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T roi_start_w = offset_bottom_rois[1] * spatial_scale - offset; - T roi_start_h = offset_bottom_rois[2] * spatial_scale - offset; - T roi_end_w = offset_bottom_rois[3] * spatial_scale - offset; - T roi_end_h = offset_bottom_rois[4] * spatial_scale - offset; - - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, - width, - y, - x, - w1, - w2, - w3, - w4, - x_low, - x_high, - y_low, - y_high, - index); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd( - offset_bottom_diff + y_low * width + x_low, static_cast(g1)); - atomicAdd( - offset_bottom_diff + y_low * width + x_high, static_cast(g2)); - atomicAdd( - offset_bottom_diff + y_high * width + x_low, static_cast(g3)); - atomicAdd( - offset_bottom_diff + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignBackward - -namespace detectron2 { - -at::Tensor ROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - bool aligned) { - AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_forward_cuda"; - at::checkAllSameGPU(c, {input_t, rois_t}); - at::checkAllSameType(c, {input_t, rois_t}); - at::cuda::CUDAGuard device_guard(input.device()); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(output_size), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - if (output.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return output; - } - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { - RoIAlignForward<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr(), - aligned); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return output; -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio, - bool aligned) { - AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cuda"; - at::checkAllSameGPU(c, {grad_t, rois_t}); - at::checkAllSameType(c, {grad_t, rois_t}); - at::cuda::CUDAGuard device_guard(grad.device()); - - auto num_rois = rois.size(0); - auto grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(grad.numel()), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; - } - - auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { - RoIAlignBackwardFeature<<>>( - grad.numel(), - grad_.data_ptr(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr(), - aligned); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h deleted file mode 100644 index a99c8eb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace detectron2 { - -at::Tensor ROIAlignRotated_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlignRotated_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -#ifdef WITH_CUDA -at::Tensor ROIAlignRotated_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlignRotated_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); -#endif - -// Interface for Python -inline at::Tensor ROIAlignRotated_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - if (input.is_cuda()) { -#ifdef WITH_CUDA - return ROIAlignRotated_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlignRotated_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -inline at::Tensor ROIAlignRotated_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - if (grad.is_cuda()) { -#ifdef WITH_CUDA - return ROIAlignRotated_backward_cuda( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlignRotated_backward_cpu( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp deleted file mode 100644 index 7e5e1ff..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cpu.cpp +++ /dev/null @@ -1,522 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include "ROIAlignRotated.h" - -// Note: this implementation originates from the Caffe2 ROIAlignRotated Op -// and PyTorch ROIAlign (non-rotated) Op implementations. -// The key difference between this implementation and those ones is -// we don't do "legacy offset" in this version, as there aren't many previous -// works, if any, using the "legacy" ROIAlignRotated Op. -// This would make the interface a bit cleaner. - -namespace detectron2 { - -namespace { -template -struct PreCalc { - int pos1; - int pos2; - int pos3; - int pos4; - T w1; - T w2; - T w3; - T w4; -}; - -template -void pre_calc_for_bilinear_interpolate( - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int iy_upper, - const int ix_upper, - T roi_start_h, - T roi_start_w, - T bin_size_h, - T bin_size_w, - int roi_bin_grid_h, - int roi_bin_grid_w, - T roi_center_h, - T roi_center_w, - T cos_theta, - T sin_theta, - std::vector>& pre_calc) { - int pre_calc_index = 0; - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - for (int iy = 0; iy < iy_upper; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < ix_upper; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - // In image space, (y, x) is the order for Right Handed System, - // and this is essentially multiplying the point by a rotation matrix - // to rotate it counterclockwise through angle theta. - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - // deal with: inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - PreCalc pc; - pc.pos1 = 0; - pc.pos2 = 0; - pc.pos3 = 0; - pc.pos4 = 0; - pc.w1 = 0; - pc.w2 = 0; - pc.w3 = 0; - pc.w4 = 0; - pre_calc[pre_calc_index] = pc; - pre_calc_index += 1; - continue; - } - - if (y < 0) { - y = 0; - } - if (x < 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - // save weights and indices - PreCalc pc; - pc.pos1 = y_low * width + x_low; - pc.pos2 = y_low * width + x_high; - pc.pos3 = y_high * width + x_low; - pc.pos4 = y_high * width + x_high; - pc.w1 = w1; - pc.w2 = w2; - pc.w3 = w3; - pc.w4 = w4; - pre_calc[pre_calc_index] = pc; - - pre_calc_index += 1; - } - } - } - } -} - -template -void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -template -inline void add(T* address, const T& val) { - *address += val; -} - -} // namespace - -template -void ROIAlignRotatedForward( - const int nthreads, - const T* input, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* rois, - T* output) { - int n_rois = nthreads / channels / pooled_width / pooled_height; - // (n, c, ph, pw) is an element in the pooled output - // can be parallelized using omp - // #pragma omp parallel for num_threads(32) - for (int n = 0; n < n_rois; n++) { - int index_n = n * channels * pooled_width * pooled_height; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlignRotated do not have non-negative size!"); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - // we want to precalculate indices and weights shared by all channels, - // this is the key point of optimization - std::vector> pre_calc( - roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - pre_calc_for_bilinear_interpolate( - height, - width, - pooled_height, - pooled_width, - roi_bin_grid_h, - roi_bin_grid_w, - roi_start_h, - roi_start_w, - bin_size_h, - bin_size_w, - roi_bin_grid_h, - roi_bin_grid_w, - roi_center_h, - roi_center_w, - cos_theta, - sin_theta, - pre_calc); - - for (int c = 0; c < channels; c++) { - int index_n_c = index_n + c * pooled_width * pooled_height; - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - int pre_calc_index = 0; - - for (int ph = 0; ph < pooled_height; ph++) { - for (int pw = 0; pw < pooled_width; pw++) { - int index = index_n_c + ph * pooled_width + pw; - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - PreCalc pc = pre_calc[pre_calc_index]; - output_val += pc.w1 * offset_input[pc.pos1] + - pc.w2 * offset_input[pc.pos2] + - pc.w3 * offset_input[pc.pos3] + pc.w4 * offset_input[pc.pos4]; - - pre_calc_index += 1; - } - } - output_val /= count; - - output[index] = output_val; - } // for pw - } // for ph - } // for c - } // for n -} - -template -void ROIAlignRotatedBackward( - const int nthreads, - // may not be contiguous. should index using n_stride, etc - const T* grad_output, - const T& spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* grad_input, - const T* rois, - const int n_stride, - const int c_stride, - const int h_stride, - const int w_stride) { - for (int index = 0; index < nthreads; index++) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - AT_ASSERTM( - roi_width >= 0 && roi_height >= 0, - "ROIs in ROIAlignRotated do not have non-negative size!"); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_grad_input = - grad_input + ((roi_batch_ind * channels + c) * height * width); - - int output_offset = n * n_stride + c * c_stride; - const T* offset_grad_output = grad_output + output_offset; - const T grad_output_this_bin = - offset_grad_output[ph * h_stride + pw * w_stride]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); - - T g1 = grad_output_this_bin * w1 / count; - T g2 = grad_output_this_bin * w2 / count; - T g3 = grad_output_this_bin * w3 / count; - T g4 = grad_output_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - // atomic add is not needed for now since it is single threaded - add(offset_grad_input + y_low * width + x_low, static_cast(g1)); - add(offset_grad_input + y_low * width + x_high, static_cast(g2)); - add(offset_grad_input + y_high * width + x_low, static_cast(g3)); - add(offset_grad_input + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // for -} // ROIAlignRotatedBackward - -at::Tensor ROIAlignRotated_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlign_forward_cpu"; - at::checkAllSameType(c, {input_t, rois_t}); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - at::Tensor output = at::zeros( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - - auto output_size = num_rois * pooled_height * pooled_width * channels; - - if (output.numel() == 0) { - return output; - } - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "ROIAlignRotated_forward", [&] { - ROIAlignRotatedForward( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr()); - }); - return output; -} - -at::Tensor ROIAlignRotated_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor"); - AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignRotated_backward_cpu"; - at::checkAllSameType(c, {grad_t, rois_t}); - - at::Tensor grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - // handle possibly empty gradients - if (grad.numel() == 0) { - return grad_input; - } - - // get stride values to ensure indexing into gradients is correct. - int n_stride = grad.stride(0); - int c_stride = grad.stride(1); - int h_stride = grad.stride(2); - int w_stride = grad.stride(3); - - auto rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - grad.scalar_type(), "ROIAlignRotated_forward", [&] { - ROIAlignRotatedBackward( - grad.numel(), - grad.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr(), - n_stride, - c_stride, - h_stride, - w_stride); - }); - return grad_input; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu deleted file mode 100644 index 9c376fc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/ROIAlignRotated/ROIAlignRotated_cuda.cu +++ /dev/null @@ -1,443 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -// Note: this implementation originates from the Caffe2 ROIAlignRotated Op -// and PyTorch ROIAlign (non-rotated) Op implementations. -// The key difference between this implementation and those ones is -// we don't do "legacy offset" in this version, as there aren't many previous -// works, if any, using the "legacy" ROIAlignRotated Op. -// This would make the interface a bit cleaner. - -namespace detectron2 { - -namespace { - -template -__device__ T bilinear_interpolate( - const T* input, - const int height, - const int width, - T y, - T x) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - return 0; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - int y_low = (int)y; - int x_low = (int)x; - int y_high; - int x_high; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - // do bilinear interpolation - T v1 = input[y_low * width + x_low]; - T v2 = input[y_low * width + x_high]; - T v3 = input[y_high * width + x_low]; - T v4 = input[y_high * width + x_high]; - T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - return val; -} - -template -__device__ void bilinear_interpolate_gradient( - const int height, - const int width, - T y, - T x, - T& w1, - T& w2, - T& w3, - T& w4, - int& x_low, - int& x_high, - int& y_low, - int& y_high) { - // deal with cases that inverse elements are out of feature map boundary - if (y < -1.0 || y > height || x < -1.0 || x > width) { - // empty - w1 = w2 = w3 = w4 = 0.; - x_low = x_high = y_low = y_high = -1; - return; - } - - if (y < 0) { - y = 0; - } - - if (x < 0) { - x = 0; - } - - y_low = (int)y; - x_low = (int)x; - - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = (T)y_low; - } else { - y_high = y_low + 1; - } - - if (x_low >= width - 1) { - x_high = x_low = width - 1; - x = (T)x_low; - } else { - x_high = x_low + 1; - } - - T ly = y - y_low; - T lx = x - x_low; - T hy = 1. - ly, hx = 1. - lx; - - // reference in forward - // T v1 = input[y_low * width + x_low]; - // T v2 = input[y_low * width + x_high]; - // T v3 = input[y_high * width + x_low]; - // T v4 = input[y_high * width + x_high]; - // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - - w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; - - return; -} - -} // namespace - -template -__global__ void RoIAlignRotatedForward( - const int nthreads, - const T* input, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const T* rois, - T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T* offset_input = - input + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (inte gral) pooling inside a bin - const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T val = bilinear_interpolate(offset_input, height, width, y, x); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template -__global__ void RoIAlignRotatedBackwardFeature( - const int nthreads, - const T* top_diff, - const int num_rois, - const T spatial_scale, - const int channels, - const int height, - const int width, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - T* bottom_diff, - const T* rois) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T* current_roi = rois + n * 6; - int roi_batch_ind = current_roi[0]; - - // Do not use rounding; this implementation detail is critical - // ROIAlignRotated supports align == true, i.e., continuous coordinate - // by default, thus the 0.5 offset - T offset = (T)0.5; - T roi_center_w = current_roi[1] * spatial_scale - offset; - T roi_center_h = current_roi[2] * spatial_scale - offset; - T roi_width = current_roi[3] * spatial_scale; - T roi_height = current_roi[4] * spatial_scale; - T theta = current_roi[5] * M_PI / 180.0; - T cos_theta = cos(theta); - T sin_theta = sin(theta); - - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T* offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T* offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - T roi_start_h = -roi_height / 2.0; - T roi_start_w = -roi_width / 2.0; - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T yy = roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - T y = yy * cos_theta - xx * sin_theta + roi_center_h; - T x = yy * sin_theta + xx * cos_theta + roi_center_w; - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient( - height, width, y, x, w1, w2, w3, w4, x_low, x_high, y_low, y_high); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd( - offset_bottom_diff + y_low * width + x_low, static_cast(g1)); - atomicAdd( - offset_bottom_diff + y_low * width + x_high, static_cast(g2)); - atomicAdd( - offset_bottom_diff + y_high * width + x_low, static_cast(g3)); - atomicAdd( - offset_bottom_diff + y_high * width + x_high, static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RoIAlignRotatedBackward - -at::Tensor ROIAlignRotated_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - AT_ASSERTM(input.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; - - at::CheckedFrom c = "ROIAlignRotated_forward_cuda"; - at::checkAllSameGPU(c, {input_t, rois_t}); - at::checkAllSameType(c, {input_t, rois_t}); - at::cuda::CUDAGuard device_guard(input.device()); - - auto num_rois = rois.size(0); - auto channels = input.size(1); - auto height = input.size(2); - auto width = input.size(3); - - auto output = at::empty( - {num_rois, channels, pooled_height, pooled_width}, input.options()); - auto output_size = num_rois * pooled_height * pooled_width * channels; - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(output_size), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - if (output.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return output; - } - - auto input_ = input.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES( - input.scalar_type(), "ROIAlignRotated_forward", [&] { - RoIAlignRotatedForward<<>>( - output_size, - input_.data_ptr(), - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - rois_.data_ptr(), - output.data_ptr()); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return output; -} - -// TODO remove the dependency on input and use instead its sizes -> save memory -at::Tensor ROIAlignRotated_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - AT_ASSERTM(grad.device().is_cuda(), "grad must be a CUDA tensor"); - AT_ASSERTM(rois.device().is_cuda(), "rois must be a CUDA tensor"); - - at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; - at::CheckedFrom c = "ROIAlign_backward_cuda"; - at::checkAllSameGPU(c, {grad_t, rois_t}); - at::checkAllSameType(c, {grad_t, rois_t}); - at::cuda::CUDAGuard device_guard(grad.device()); - - auto num_rois = rois.size(0); - auto grad_input = - at::zeros({batch_size, channels, height, width}, grad.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min( - at::cuda::ATenCeilDiv( - static_cast(grad.numel()), static_cast(512)), - static_cast(4096))); - dim3 block(512); - - // handle possibly empty gradients - if (grad.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; - } - - auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); - AT_DISPATCH_FLOATING_TYPES( - grad.scalar_type(), "ROIAlignRotated_backward", [&] { - RoIAlignRotatedBackwardFeature<<>>( - grad.numel(), - grad_.data_ptr(), - num_rois, - spatial_scale, - channels, - height, - width, - pooled_height, - pooled_width, - sampling_ratio, - grad_input.data_ptr(), - rois_.data_ptr()); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return grad_input; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h deleted file mode 100644 index 7c389c6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace detectron2 { - -at::Tensor box_iou_rotated_cpu( - const at::Tensor& boxes1, - const at::Tensor& boxes2); - -#ifdef WITH_CUDA -at::Tensor box_iou_rotated_cuda( - const at::Tensor& boxes1, - const at::Tensor& boxes2); -#endif - -// Interface for Python -// inline is needed to prevent multiple function definitions when this header is -// included by different cpps -inline at::Tensor box_iou_rotated( - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); - if (boxes1.device().is_cuda()) { -#ifdef WITH_CUDA - return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - - return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp deleted file mode 100644 index f2b02d1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include "box_iou_rotated.h" -#include "box_iou_rotated_utils.h" - -namespace detectron2 { - -template -void box_iou_rotated_cpu_kernel( - const at::Tensor& boxes1, - const at::Tensor& boxes2, - at::Tensor& ious) { - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - - for (int i = 0; i < num_boxes1; i++) { - for (int j = 0; j < num_boxes2; j++) { - ious[i * num_boxes2 + j] = single_box_iou_rotated( - boxes1[i].data_ptr(), boxes2[j].data_ptr()); - } - } -} - -at::Tensor box_iou_rotated_cpu( - // input must be contiguous: - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - at::Tensor ious = - at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); - - box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); - - // reshape from 1d array to 2d array - auto shape = std::vector{num_boxes1, num_boxes2}; - return ious.reshape(shape); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu deleted file mode 100644 index e3403c1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include -#include -#include -#include "box_iou_rotated_utils.h" - -namespace detectron2 { - -// 2D block with 32 * 16 = 512 threads per block -const int BLOCK_DIM_X = 32; -const int BLOCK_DIM_Y = 16; - -template -__global__ void box_iou_rotated_cuda_kernel( - const int n_boxes1, - const int n_boxes2, - const T* dev_boxes1, - const T* dev_boxes2, - T* dev_ious) { - const int row_start = blockIdx.x * blockDim.x; - const int col_start = blockIdx.y * blockDim.y; - - const int row_size = min(n_boxes1 - row_start, blockDim.x); - const int col_size = min(n_boxes2 - col_start, blockDim.y); - - __shared__ float block_boxes1[BLOCK_DIM_X * 5]; - __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; - - // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y - if (threadIdx.x < row_size && threadIdx.y == 0) { - block_boxes1[threadIdx.x * 5 + 0] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; - block_boxes1[threadIdx.x * 5 + 1] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; - block_boxes1[threadIdx.x * 5 + 2] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; - block_boxes1[threadIdx.x * 5 + 3] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; - block_boxes1[threadIdx.x * 5 + 4] = - dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; - } - - if (threadIdx.x < col_size && threadIdx.y == 0) { - block_boxes2[threadIdx.x * 5 + 0] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; - block_boxes2[threadIdx.x * 5 + 1] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; - block_boxes2[threadIdx.x * 5 + 2] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; - block_boxes2[threadIdx.x * 5 + 3] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; - block_boxes2[threadIdx.x * 5 + 4] = - dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size && threadIdx.y < col_size) { - int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; - dev_ious[offset] = single_box_iou_rotated( - block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); - } -} - -at::Tensor box_iou_rotated_cuda( - // input must be contiguous - const at::Tensor& boxes1, - const at::Tensor& boxes2) { - using scalar_t = float; - AT_ASSERTM( - boxes1.scalar_type() == at::kFloat, "boxes1 must be a float tensor"); - AT_ASSERTM( - boxes2.scalar_type() == at::kFloat, "boxes2 must be a float tensor"); - AT_ASSERTM(boxes1.is_cuda(), "boxes1 must be a CUDA tensor"); - AT_ASSERTM(boxes2.is_cuda(), "boxes2 must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(boxes1.device()); - - auto num_boxes1 = boxes1.size(0); - auto num_boxes2 = boxes2.size(0); - - at::Tensor ious = - at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); - - bool transpose = false; - if (num_boxes1 > 0 && num_boxes2 > 0) { - scalar_t *data1 = boxes1.data_ptr(), - *data2 = boxes2.data_ptr(); - - if (num_boxes2 > 65535 * BLOCK_DIM_Y) { - AT_ASSERTM( - num_boxes1 <= 65535 * BLOCK_DIM_Y, - "Too many boxes for box_iou_rotated_cuda!"); - // x dim is allowed to be large, but y dim cannot, - // so we transpose the two to avoid "invalid configuration argument" - // error. We assume one of them is small. Otherwise the result is hard to - // fit in memory anyway. - std::swap(num_boxes1, num_boxes2); - std::swap(data1, data2); - transpose = true; - } - - const int blocks_x = - at::cuda::ATenCeilDiv(static_cast(num_boxes1), BLOCK_DIM_X); - const int blocks_y = - at::cuda::ATenCeilDiv(static_cast(num_boxes2), BLOCK_DIM_Y); - - dim3 blocks(blocks_x, blocks_y); - dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - box_iou_rotated_cuda_kernel<<>>( - num_boxes1, - num_boxes2, - data1, - data2, - (scalar_t*)ious.data_ptr()); - - AT_CUDA_CHECK(cudaGetLastError()); - } - - // reshape from 1d array to 2d array - auto shape = std::vector{num_boxes1, num_boxes2}; - if (transpose) { - return ious.view(shape).t(); - } else { - return ious.view(shape); - } -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h deleted file mode 100644 index d8757ec..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_utils.h +++ /dev/null @@ -1,363 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once - -#include -#include - -#ifdef __CUDACC__ -// Designates functions callable from the host (CPU) and the device (GPU) -#define HOST_DEVICE __host__ __device__ -#define HOST_DEVICE_INLINE HOST_DEVICE __forceinline__ -#else -#include -#define HOST_DEVICE -#define HOST_DEVICE_INLINE HOST_DEVICE inline -#endif - -namespace detectron2 { - -namespace { - -template -struct RotatedBox { - T x_ctr, y_ctr, w, h, a; -}; - -template -struct Point { - T x, y; - HOST_DEVICE_INLINE Point(const T& px = 0, const T& py = 0) : x(px), y(py) {} - HOST_DEVICE_INLINE Point operator+(const Point& p) const { - return Point(x + p.x, y + p.y); - } - HOST_DEVICE_INLINE Point& operator+=(const Point& p) { - x += p.x; - y += p.y; - return *this; - } - HOST_DEVICE_INLINE Point operator-(const Point& p) const { - return Point(x - p.x, y - p.y); - } - HOST_DEVICE_INLINE Point operator*(const T coeff) const { - return Point(x * coeff, y * coeff); - } -}; - -template -HOST_DEVICE_INLINE T dot_2d(const Point& A, const Point& B) { - return A.x * B.x + A.y * B.y; -} - -// R: result type. can be different from input type -template -HOST_DEVICE_INLINE R cross_2d(const Point& A, const Point& B) { - return static_cast(A.x) * static_cast(B.y) - - static_cast(B.x) * static_cast(A.y); -} - -template -HOST_DEVICE_INLINE void get_rotated_vertices( - const RotatedBox& box, - Point (&pts)[4]) { - // M_PI / 180. == 0.01745329251 - double theta = box.a * 0.01745329251; - T cosTheta2 = (T)cos(theta) * 0.5f; - T sinTheta2 = (T)sin(theta) * 0.5f; - - // y: top --> down; x: left --> right - pts[0].x = box.x_ctr + sinTheta2 * box.h + cosTheta2 * box.w; - pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; - pts[1].x = box.x_ctr - sinTheta2 * box.h + cosTheta2 * box.w; - pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; - pts[2].x = 2 * box.x_ctr - pts[0].x; - pts[2].y = 2 * box.y_ctr - pts[0].y; - pts[3].x = 2 * box.x_ctr - pts[1].x; - pts[3].y = 2 * box.y_ctr - pts[1].y; -} - -template -HOST_DEVICE_INLINE int get_intersection_points( - const Point (&pts1)[4], - const Point (&pts2)[4], - Point (&intersections)[24]) { - // Line vector - // A line from p1 to p2 is: p1 + (p2-p1)*t, t=[0,1] - Point vec1[4], vec2[4]; - for (int i = 0; i < 4; i++) { - vec1[i] = pts1[(i + 1) % 4] - pts1[i]; - vec2[i] = pts2[(i + 1) % 4] - pts2[i]; - } - - // Line test - test all line combos for intersection - int num = 0; // number of intersections - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - // Solve for 2x2 Ax=b - T det = cross_2d(vec2[j], vec1[i]); - - // This takes care of parallel lines - if (fabs(det) <= 1e-14) { - continue; - } - - auto vec12 = pts2[j] - pts1[i]; - - T t1 = cross_2d(vec2[j], vec12) / det; - T t2 = cross_2d(vec1[i], vec12) / det; - - if (t1 >= 0.0f && t1 <= 1.0f && t2 >= 0.0f && t2 <= 1.0f) { - intersections[num++] = pts1[i] + vec1[i] * t1; - } - } - } - - // Check for vertices of rect1 inside rect2 - { - const auto& AB = vec2[0]; - const auto& DA = vec2[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - for (int i = 0; i < 4; i++) { - // assume ABCD is the rectangle, and P is the point to be judged - // P is inside ABCD iff. P's projection on AB lies within AB - // and P's projection on AD lies within AD - - auto AP = pts1[i] - pts2[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && - (APdotAD <= ADdotAD)) { - intersections[num++] = pts1[i]; - } - } - } - - // Reverse the check - check for vertices of rect2 inside rect1 - { - const auto& AB = vec1[0]; - const auto& DA = vec1[3]; - auto ABdotAB = dot_2d(AB, AB); - auto ADdotAD = dot_2d(DA, DA); - for (int i = 0; i < 4; i++) { - auto AP = pts2[i] - pts1[0]; - - auto APdotAB = dot_2d(AP, AB); - auto APdotAD = -dot_2d(AP, DA); - - if ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && - (APdotAD <= ADdotAD)) { - intersections[num++] = pts2[i]; - } - } - } - - return num; -} - -template -HOST_DEVICE_INLINE int convex_hull_graham( - const Point (&p)[24], - const int& num_in, - Point (&q)[24], - bool shift_to_zero = false) { - assert(num_in >= 2); - - // Step 1: - // Find point with minimum y - // if more than 1 points have the same minimum y, - // pick the one with the minimum x. - int t = 0; - for (int i = 1; i < num_in; i++) { - if (p[i].y < p[t].y || (p[i].y == p[t].y && p[i].x < p[t].x)) { - t = i; - } - } - auto& start = p[t]; // starting point - - // Step 2: - // Subtract starting point from every points (for sorting in the next step) - for (int i = 0; i < num_in; i++) { - q[i] = p[i] - start; - } - - // Swap the starting point to position 0 - auto tmp = q[0]; - q[0] = q[t]; - q[t] = tmp; - - // Step 3: - // Sort point 1 ~ num_in according to their relative cross-product values - // (essentially sorting according to angles) - // If the angles are the same, sort according to their distance to origin - T dist[24]; -#ifdef __CUDACC__ - // compute distance to origin before sort, and sort them together with the - // points - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } - - // CUDA version - // In the future, we can potentially use thrust - // for sorting here to improve speed (though not guaranteed) - for (int i = 1; i < num_in - 1; i++) { - for (int j = i + 1; j < num_in; j++) { - T crossProduct = cross_2d(q[i], q[j]); - if ((crossProduct < -1e-6) || - (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) { - auto q_tmp = q[i]; - q[i] = q[j]; - q[j] = q_tmp; - auto dist_tmp = dist[i]; - dist[i] = dist[j]; - dist[j] = dist_tmp; - } - } - } -#else - // CPU version - std::sort( - q + 1, q + num_in, [](const Point& A, const Point& B) -> bool { - T temp = cross_2d(A, B); - if (fabs(temp) < 1e-6) { - return dot_2d(A, A) < dot_2d(B, B); - } else { - return temp > 0; - } - }); - // compute distance to origin after sort, since the points are now different. - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } -#endif - - // Step 4: - // Make sure there are at least 2 points (that don't overlap with each other) - // in the stack - int k; // index of the non-overlapped second point - for (k = 1; k < num_in; k++) { - if (dist[k] > 1e-8) { - break; - } - } - if (k == num_in) { - // We reach the end, which means the convex hull is just one point - q[0] = p[t]; - return 1; - } - q[1] = q[k]; - int m = 2; // 2 points in the stack - // Step 5: - // Finally we can start the scanning process. - // When a non-convex relationship between the 3 points is found - // (either concave shape or duplicated points), - // we pop the previous point from the stack - // until the 3-point relationship is convex again, or - // until the stack only contains two points - for (int i = k + 1; i < num_in; i++) { - while (m > 1) { - auto q1 = q[i] - q[m - 2], q2 = q[m - 1] - q[m - 2]; - // cross_2d() uses FMA and therefore computes round(round(q1.x*q2.y) - - // q2.x*q1.y) So it may not return 0 even when q1==q2. Therefore we - // compare round(q1.x*q2.y) and round(q2.x*q1.y) directly. (round means - // round to nearest floating point). - if (q1.x * q2.y >= q2.x * q1.y) - m--; - else - break; - } - // Using double also helps, but float can solve the issue for now. - // while (m > 1 && cross_2d(q[i] - q[m - 2], q[m - 1] - q[m - 2]) - // >= 0) { - // m--; - // } - q[m++] = q[i]; - } - - // Step 6 (Optional): - // In general sense we need the original coordinates, so we - // need to shift the points back (reverting Step 2) - // But if we're only interested in getting the area/perimeter of the shape - // We can simply return. - if (!shift_to_zero) { - for (int i = 0; i < m; i++) { - q[i] += start; - } - } - - return m; -} - -template -HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { - if (m <= 2) { - return 0; - } - - T area = 0; - for (int i = 1; i < m - 1; i++) { - area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); - } - - return area / 2.0; -} - -template -HOST_DEVICE_INLINE T rotated_boxes_intersection( - const RotatedBox& box1, - const RotatedBox& box2) { - // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned - // from rotated_rect_intersection_pts - Point intersectPts[24], orderedPts[24]; - - Point pts1[4]; - Point pts2[4]; - get_rotated_vertices(box1, pts1); - get_rotated_vertices(box2, pts2); - - int num = get_intersection_points(pts1, pts2, intersectPts); - - if (num <= 2) { - return 0.0; - } - - // Convex Hull to order the intersection points in clockwise order and find - // the contour area. - int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); - return polygon_area(orderedPts, num_convex); -} - -} // namespace - -template -HOST_DEVICE_INLINE T -single_box_iou_rotated(T const* const box1_raw, T const* const box2_raw) { - // shift center to the middle point to achieve higher precision in result - RotatedBox box1, box2; - auto center_shift_x = (box1_raw[0] + box2_raw[0]) / 2.0; - auto center_shift_y = (box1_raw[1] + box2_raw[1]) / 2.0; - box1.x_ctr = box1_raw[0] - center_shift_x; - box1.y_ctr = box1_raw[1] - center_shift_y; - box1.w = box1_raw[2]; - box1.h = box1_raw[3]; - box1.a = box1_raw[4]; - box2.x_ctr = box2_raw[0] - center_shift_x; - box2.y_ctr = box2_raw[1] - center_shift_y; - box2.w = box2_raw[2]; - box2.h = box2_raw[3]; - box2.a = box2_raw[4]; - - T area1 = box1.w * box1.h; - T area2 = box2.w * box2.h; - if (area1 < 1e-14 || area2 < 1e-14) { - return 0.f; - } - - T intersection = rotated_boxes_intersection(box1, box2); - T iou = intersection / (area1 + area2 - intersection); - return iou; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu deleted file mode 100644 index af088e7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/cuda_version.cu +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -#include - -namespace detectron2 { -int get_cudart_version() { - return CUDART_VERSION; -} -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h deleted file mode 100644 index 49ccd86..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv.h +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace detectron2 { - -#ifdef WITH_CUDA -int deform_conv_forward_cuda( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step); - -int deform_conv_backward_input_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step); - -int deform_conv_backward_parameters_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step); - -void modulated_deform_conv_cuda_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias); - -void modulated_deform_conv_cuda_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias); - -#endif - -inline int deform_conv_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - if (input.is_cuda()) { -#ifdef WITH_CUDA - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_forward_cuda( - input, - weight, - offset, - output, - columns, - ones, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - im2col_step); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline int deform_conv_backward_input( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - if (gradOutput.is_cuda()) { -#ifdef WITH_CUDA - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_backward_input_cuda( - input, - offset, - gradOutput, - gradInput, - gradOffset, - weight, - columns, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - im2col_step); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline int deform_conv_backward_filter( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step) { - if (gradOutput.is_cuda()) { -#ifdef WITH_CUDA - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return deform_conv_backward_parameters_cuda( - input, - offset, - gradOutput, - gradWeight, - columns, - ones, - kW, - kH, - dW, - dH, - padW, - padH, - dilationW, - dilationH, - group, - deformable_group, - scale, - im2col_step); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline void modulated_deform_conv_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias) { - if (input.is_cuda()) { -#ifdef WITH_CUDA - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return modulated_deform_conv_cuda_forward( - input, - weight, - bias, - ones, - offset, - mask, - output, - columns, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group, - with_bias); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline void modulated_deform_conv_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias) { - if (grad_output.is_cuda()) { -#ifdef WITH_CUDA - TORCH_CHECK(input.is_cuda(), "input tensor is not on GPU!"); - TORCH_CHECK(weight.is_cuda(), "weight tensor is not on GPU!"); - TORCH_CHECK(bias.is_cuda(), "bias tensor is not on GPU!"); - TORCH_CHECK(offset.is_cuda(), "offset tensor is not on GPU!"); - return modulated_deform_conv_cuda_backward( - input, - weight, - bias, - ones, - offset, - mask, - columns, - grad_input, - grad_weight, - grad_bias, - grad_offset, - grad_mask, - grad_output, - kernel_h, - kernel_w, - stride_h, - stride_w, - pad_h, - pad_w, - dilation_h, - dilation_w, - group, - deformable_group, - with_bias); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu deleted file mode 100644 index 5376db0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda.cu +++ /dev/null @@ -1,1131 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -// modified from -// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp -// Original license: Apache 2.0 - -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c -// Original license: Apache 2.0 - -#include - -#include "deform_conv.h" - -#include -#include - -namespace detectron2 { - -void deformable_im2col( - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor data_col); - -void deformable_col2im( - const at::Tensor data_col, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_im); - -void deformable_col2im_coord( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_offset); - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor data_col); - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_im); - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_offset, - at::Tensor grad_mask); - -void shape_check( - at::Tensor input, - at::Tensor offset, - at::Tensor* gradOutput, - at::Tensor weight, - int kH, - int kW, - int dH, - int dW, - int padH, - int padW, - int dilationH, - int dilationW, - int group, - int deformable_group) { - TORCH_CHECK( - weight.ndimension() == 4, - "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " - "but got: %s", - weight.ndimension()); - - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - TORCH_CHECK( - kW > 0 && kH > 0, - "kernel size should be greater than zero, but got kH: %d kW: %d", - kH, - kW); - - TORCH_CHECK( - (weight.size(2) == kH && weight.size(3) == kW), - "kernel size should be consistent with weight, ", - "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", - kH, - kW, - weight.size(2), - weight.size(3)); - - TORCH_CHECK( - dW > 0 && dH > 0, - "stride should be greater than zero, but got dH: %d dW: %d", - dH, - dW); - - TORCH_CHECK( - dilationW > 0 && dilationH > 0, - "dilation should be greater than 0, but got dilationH: %d dilationW: %d", - dilationH, - dilationW); - - int ndim = input.ndimension(); - int dimf = 0; - int dimh = 1; - int dimw = 2; - - if (ndim == 4) { - dimf++; - dimh++; - dimw++; - } - - TORCH_CHECK( - ndim == 3 || ndim == 4, - "3D or 4D input tensor expected but got: %s", - ndim); - - long nInputPlane = weight.size(1) * group; - long inputHeight = input.size(dimh); - long inputWidth = input.size(dimw); - long nOutputPlane = weight.size(0); - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - - TORCH_CHECK( - nInputPlane % deformable_group == 0, - "input channels must divide deformable group size"); - - if (outputWidth < 1 || outputHeight < 1) - AT_ERROR( - "Given input size: (%ld x %ld x %ld). " - "Calculated output size: (%ld x %ld x %ld). Output size is too small", - nInputPlane, - inputHeight, - inputWidth, - nOutputPlane, - outputHeight, - outputWidth); - - TORCH_CHECK( - input.size(1) == nInputPlane, - "invalid number of input planes, expected: %d, but got: %d", - nInputPlane, - input.size(1)); - - TORCH_CHECK( - (inputHeight >= kH && inputWidth >= kW), - "input image is smaller than kernel"); - - TORCH_CHECK( - (offset.size(2) == outputHeight && offset.size(3) == outputWidth), - "invalid spatial size of offset, expected height: %d width: %d, but " - "got height: %d width: %d", - outputHeight, - outputWidth, - offset.size(2), - offset.size(3)); - - TORCH_CHECK( - (offset.size(1) == deformable_group * 2 * kH * kW), - "invalid number of channels of offset"); - - if (gradOutput != NULL) { - TORCH_CHECK( - gradOutput->size(dimf) == nOutputPlane, - "invalid number of gradOutput planes, expected: %d, but got: %d", - nOutputPlane, - gradOutput->size(dimf)); - - TORCH_CHECK( - (gradOutput->size(dimh) == outputHeight && - gradOutput->size(dimw) == outputWidth), - "invalid size of gradOutput, expected height: %d width: %d , but " - "got height: %d width: %d", - outputHeight, - outputWidth, - gradOutput->size(dimh), - gradOutput->size(dimw)); - } -} - -int deform_conv_forward_cuda( - at::Tensor input, - at::Tensor weight, - at::Tensor offset, - at::Tensor output, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - // todo: resize columns to include im2col: done - // todo: add im2col_step as input - // todo: add new output buffer and transpose it to output (or directly - // transpose output) todo: possibly change data indexing because of - // parallel_imgs - - shape_check( - input, - offset, - NULL, - weight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input.unsqueeze_(0); - offset.unsqueeze_(0); - } - - // todo: assert batchsize dividable by im2col_step - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - output = output.view({batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < outputHeight * outputWidth) { - ones = at::ones({outputHeight, outputWidth}, input.options()); - } - - input = input.view({batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - offset = offset.view({batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - at::Tensor output_buffer = at::zeros( - {batchSize / im2col_step, - nOutputPlane, - im2col_step * outputHeight, - outputWidth}, - output.options()); - - output_buffer = output_buffer.view({output_buffer.size(0), - group, - output_buffer.size(1) / group, - output_buffer.size(2), - output_buffer.size(3)}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col( - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - - for (int g = 0; g < group; g++) { - output_buffer[elt][g] = output_buffer[elt][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output_buffer[elt][g]); - } - } - - output_buffer = - output_buffer.view({output_buffer.size(0), - output_buffer.size(1) * output_buffer.size(2), - output_buffer.size(3), - output_buffer.size(4)}); - - output_buffer = output_buffer.view({batchSize / im2col_step, - nOutputPlane, - im2col_step, - outputHeight, - outputWidth}); - output_buffer.transpose_(1, 2); - output.copy_(output_buffer); - output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - output = output.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_input_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradInput, - at::Tensor gradOffset, - at::Tensor weight, - at::Tensor columns, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - int im2col_step) { - shape_check( - input, - offset, - &gradOutput, - weight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - weight = weight.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view({1, input.size(0), input.size(1), input.size(2)}); - offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)}); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = weight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - // change order of grad output - gradOutput = gradOutput.view({batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - gradOutput.transpose_(1, 2); - - gradInput = gradInput.view({batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - input = input.view({batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - gradOffset = gradOffset.view({batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - offset = offset.view({batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - // divide into groups - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - gradOutput = gradOutput.view({gradOutput.size(0), - group, - gradOutput.size(1) / group, - gradOutput.size(2), - gradOutput.size(3), - gradOutput.size(4)}); - - for (int g = 0; g < group; g++) { - columns[g] = columns[g].addmm_( - weight[g].flatten(1).transpose(0, 1), - gradOutput[elt][g].flatten(1), - 0.0f, - 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradOutput = gradOutput.view({gradOutput.size(0), - gradOutput.size(1) * gradOutput.size(2), - gradOutput.size(3), - gradOutput.size(4), - gradOutput.size(5)}); - - deformable_col2im_coord( - columns, - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - gradOffset[elt]); - - deformable_col2im( - columns, - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - gradInput[elt]); - } - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - gradOffset = gradOffset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth}); - offset = offset.view({offset.size(1), offset.size(2), offset.size(3)}); - gradOffset = - gradOffset.view({offset.size(1), offset.size(2), offset.size(3)}); - } - - return 1; -} - -int deform_conv_backward_parameters_cuda( - at::Tensor input, - at::Tensor offset, - at::Tensor gradOutput, - at::Tensor gradWeight, // at::Tensor gradBias, - at::Tensor columns, - at::Tensor ones, - int kW, - int kH, - int dW, - int dH, - int padW, - int padH, - int dilationW, - int dilationH, - int group, - int deformable_group, - float scale, - int im2col_step) { - // todo: transpose and reshape outGrad - // todo: reshape columns - // todo: add im2col_step as input - - shape_check( - input, - offset, - &gradOutput, - gradWeight, - kH, - kW, - dH, - dW, - padH, - padW, - dilationH, - dilationW, - group, - deformable_group); - - input = input.contiguous(); - offset = offset.contiguous(); - gradOutput = gradOutput.contiguous(); - - int batch = 1; - - if (input.ndimension() == 3) { - // Force batch - batch = 0; - input = input.view( - at::IntList({1, input.size(0), input.size(1), input.size(2)})); - gradOutput = gradOutput.view( - {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)}); - } - - long batchSize = input.size(0); - long nInputPlane = input.size(1); - long inputHeight = input.size(2); - long inputWidth = input.size(3); - - long nOutputPlane = gradWeight.size(0); - - long outputWidth = - (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - long outputHeight = - (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - - TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); - - columns = at::zeros( - {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, - input.options()); - - gradOutput = gradOutput.view({batchSize / im2col_step, - im2col_step, - nOutputPlane, - outputHeight, - outputWidth}); - gradOutput.transpose_(1, 2); - - at::Tensor gradOutputBuffer = at::zeros_like(gradOutput); - gradOutputBuffer = gradOutputBuffer.view({batchSize / im2col_step, - nOutputPlane, - im2col_step, - outputHeight, - outputWidth}); - gradOutputBuffer.copy_(gradOutput); - // gradOutput is not contiguous, so we do reshape (instead of view) next - gradOutputBuffer = gradOutputBuffer.reshape({batchSize / im2col_step, - nOutputPlane, - im2col_step * outputHeight, - outputWidth}); - - gradOutput.transpose_(1, 2); - gradOutput = - gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth}); - - input = input.view({batchSize / im2col_step, - im2col_step, - nInputPlane, - inputHeight, - inputWidth}); - offset = offset.view({batchSize / im2col_step, - im2col_step, - deformable_group * 2 * kH * kW, - outputHeight, - outputWidth}); - - for (int elt = 0; elt < batchSize / im2col_step; elt++) { - deformable_im2col( - input[elt], - offset[elt], - nInputPlane, - inputHeight, - inputWidth, - kH, - kW, - padH, - padW, - dH, - dW, - dilationH, - dilationW, - im2col_step, - deformable_group, - columns); - - // divide into group - gradOutputBuffer = gradOutputBuffer.view({gradOutputBuffer.size(0), - group, - gradOutputBuffer.size(1) / group, - gradOutputBuffer.size(2), - gradOutputBuffer.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - gradWeight = gradWeight.view({group, - gradWeight.size(0) / group, - gradWeight.size(1), - gradWeight.size(2), - gradWeight.size(3)}); - - for (int g = 0; g < group; g++) { - gradWeight[g] = gradWeight[g] - .flatten(1) - .addmm_( - gradOutputBuffer[elt][g].flatten(1), - columns[g].transpose(1, 0), - 1.0, - scale) - .view_as(gradWeight[g]); - } - gradOutputBuffer = gradOutputBuffer.view( - {gradOutputBuffer.size(0), - gradOutputBuffer.size(1) * gradOutputBuffer.size(2), - gradOutputBuffer.size(3), - gradOutputBuffer.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1), - gradWeight.size(2), - gradWeight.size(3), - gradWeight.size(4)}); - } - - input = input.view({batchSize, nInputPlane, inputHeight, inputWidth}); - offset = offset.view( - {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth}); - - if (batch == 0) { - gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth}); - input = input.view({nInputPlane, inputHeight, inputWidth}); - } - - return 1; -} - -void modulated_deform_conv_cuda_forward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor output, - at::Tensor columns, - int kernel_h, - int kernel_w, - const int stride_h, - const int stride_w, - const int pad_h, - const int pad_w, - const int dilation_h, - const int dilation_w, - const int group, - const int deformable_group, - const bool with_bias) { - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_out = weight.size(0); - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR( - "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, - kernel_w, - kernel_h_, - kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR( - "Input shape and kernel channels wont match: (%d vs %d).", - channels, - channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - // resize output - output = output.view({batch, channels_out, height_out, width_out}).zero_(); - // resize temporary columns - columns = at::zeros( - {channels * kernel_h * kernel_w, 1 * height_out * width_out}, - input.options()); - - output = output.view({output.size(0), - group, - output.size(1) / group, - output.size(2), - output.size(3)}); - - for (int b = 0; b < batch; b++) { - modulated_deformable_im2col_cuda( - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - columns); - - // divide into group - weight = weight.view({group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - - for (int g = 0; g < group; g++) { - output[b][g] = output[b][g] - .flatten(1) - .addmm_(weight[g].flatten(1), columns[g]) - .view_as(output[b][g]); - } - - weight = weight.view({weight.size(0) * weight.size(1), - weight.size(2), - weight.size(3), - weight.size(4)}); - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - } - - output = output.view({output.size(0), - output.size(1) * output.size(2), - output.size(3), - output.size(4)}); - - if (with_bias) { - output += bias.view({1, bias.size(0), 1, 1}); - } -} - -void modulated_deform_conv_cuda_backward( - at::Tensor input, - at::Tensor weight, - at::Tensor bias, - at::Tensor ones, - at::Tensor offset, - at::Tensor mask, - at::Tensor columns, - at::Tensor grad_input, - at::Tensor grad_weight, - at::Tensor grad_bias, - at::Tensor grad_offset, - at::Tensor grad_mask, - at::Tensor grad_output, - int kernel_h, - int kernel_w, - int stride_h, - int stride_w, - int pad_h, - int pad_w, - int dilation_h, - int dilation_w, - int group, - int deformable_group, - const bool with_bias) { - TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - - const int batch = input.size(0); - const int channels = input.size(1); - const int height = input.size(2); - const int width = input.size(3); - - const int channels_kernel = weight.size(1); - const int kernel_h_ = weight.size(2); - const int kernel_w_ = weight.size(3); - if (kernel_h_ != kernel_h || kernel_w_ != kernel_w) - AT_ERROR( - "Input shape and kernel shape wont match: (%d x %d vs %d x %d).", - kernel_h_, - kernel_w, - kernel_h_, - kernel_w_); - if (channels != channels_kernel * group) - AT_ERROR( - "Input shape and kernel channels wont match: (%d vs %d).", - channels, - channels_kernel * group); - - const int height_out = - (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int width_out = - (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - - if (ones.ndimension() != 2 || - ones.size(0) * ones.size(1) < height_out * width_out) { - // Resize plane and fill with ones... - ones = at::ones({height_out, width_out}, input.options()); - } - - grad_input = grad_input.view({batch, channels, height, width}); - columns = at::zeros( - {channels * kernel_h * kernel_w, height_out * width_out}, - input.options()); - - grad_output = grad_output.view({grad_output.size(0), - group, - grad_output.size(1) / group, - grad_output.size(2), - grad_output.size(3)}); - - for (int b = 0; b < batch; b++) { - // divide int group - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - weight = weight.view({group, - weight.size(0) / group, - weight.size(1), - weight.size(2), - weight.size(3)}); - - for (int g = 0; g < group; g++) { - columns[g].addmm_( - weight[g].flatten(1).transpose(0, 1), - grad_output[b][g].flatten(1), - 0.0f, - 1.0f); - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - weight = weight.view({weight.size(0) * weight.size(1), - weight.size(2), - weight.size(3), - weight.size(4)}); - - // gradient w.r.t. input coordinate data - modulated_deformable_col2im_coord_cuda( - columns, - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - grad_offset[b], - grad_mask[b]); - // gradient w.r.t. input data - modulated_deformable_col2im_cuda( - columns, - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - grad_input[b]); - - // gradient w.r.t. weight, dWeight should accumulate across the batch and - // group - modulated_deformable_im2col_cuda( - input[b], - offset[b], - mask[b], - 1, - channels, - height, - width, - height_out, - width_out, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - deformable_group, - columns); - - columns = columns.view({group, columns.size(0) / group, columns.size(1)}); - grad_weight = grad_weight.view({group, - grad_weight.size(0) / group, - grad_weight.size(1), - grad_weight.size(2), - grad_weight.size(3)}); - if (with_bias) - grad_bias = grad_bias.view({group, grad_bias.size(0) / group}); - - for (int g = 0; g < group; g++) { - grad_weight[g] = - grad_weight[g] - .flatten(1) - .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1)) - .view_as(grad_weight[g]); - if (with_bias) { - grad_bias[g] = - grad_bias[g] - .view({-1, 1}) - .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1})) - .view(-1); - } - } - - columns = - columns.view({columns.size(0) * columns.size(1), columns.size(2)}); - grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1), - grad_weight.size(2), - grad_weight.size(3), - grad_weight.size(4)}); - if (with_bias) - grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)}); - } - grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1), - grad_output.size(2), - grad_output.size(3), - grad_output.size(4)}); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu deleted file mode 100644 index 841f316..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/deformable/deform_conv_cuda_kernel.cu +++ /dev/null @@ -1,1288 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -// modified from -// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu -// Original license: Apache 2.0 -// clang-format off - -// modify from -// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu - -/*! - ******************* BEGIN Caffe Copyright Notice and Disclaimer ***************** - * - * COPYRIGHT - * - * All contributions by the University of California: - * Copyright (c) 2014-2017 The Regents of the University of California (Regents) - * All rights reserved. - * - * All other contributions: - * Copyright (c) 2014-2017, the respective contributors - * All rights reserved. - * - * Caffe uses a shared copyright model: each contributor holds copyright over - * their contributions to Caffe. The project versioning records all such - * contribution and copyright details. If a contributor wants to further mark - * their specific copyright on a particular contribution, they should indicate - * their copyright solely in the commit message of the change when it is - * committed. - * - * LICENSE - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - *AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - *IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE - *FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - *DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - *SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - *CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - *OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - *OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * CONTRIBUTION AGREEMENT - * - * By contributing to the BVLC/caffe repository through pull-request, comment, - * or otherwise, the contributor releases their content to the - * license and copyright terms herein. - * - ***************** END Caffe Copyright Notice and Disclaimer ********************* - * - * Copyright (c) 2018 Microsoft - * Licensed under The MIT License [see LICENSE for details] - * \file modulated_deformable_im2col.cuh - * \brief Function definitions of converting an image to - * column matrix based on kernel, padding, dilation, and offset. - * These functions are mainly used in deformable convolution operators. - * \ref: https://arxiv.org/abs/1703.06211 - * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng - */ - -#include -#include -#include -#include -#include -#include - -using namespace at; - -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) - - -namespace { - -const int CUDA_NUM_THREADS = 1024; -const int kMaxGridNum = 65535; - -inline int GET_BLOCKS(const int N) { - return std::min(kMaxGridNum, (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS); -} - -} - -template -__device__ scalar_t deformable_im2col_bilinear( - const scalar_t* bottom_data, - const int data_width, - const int height, - const int width, - scalar_t h, - scalar_t w) { - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t get_gradient_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int h, - const int w, - const int height, - const int width) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t get_coordinate_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int height, - const int width, - const scalar_t* im_data, - const int data_width, - const int bp_dir) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } else if (bp_dir == 1) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void deformable_im2col_gpu_kernel( - const int n, - const scalar_t* data_im, - const scalar_t* data_offset, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int num_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* data_col) { - CUDA_KERNEL_LOOP(index, n) { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - scalar_t* data_col_ptr = data_col + - ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - // const scalar_t* data_im_ptr = data_im + ((b_col * num_channels + c_im) * - // height + h_in) * width + w_in; - const scalar_t* data_im_ptr = - data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) { - for (int j = 0; j < kernel_w; ++j) { - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + - w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - // const scalar_t map_h = i * dilation_h + offset_h; - // const scalar_t map_w = j * dilation_w + offset_w; - // const int cur_height = height - h_in; - // const int cur_width = width - w_in; - // val = deformable_im2col_bilinear(data_im_ptr, width, cur_height, - // cur_width, map_h, map_w); - val = deformable_im2col_bilinear( - data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val; - data_col_ptr += batch_size * height_col * width_col; - } - } - } -} - - -template -__global__ void deformable_col2im_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_offset, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_im) { - CUDA_KERNEL_LOOP(index, n) { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = - (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = - index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index]; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) { - for (int dx = -2; dx <= 2; dx++) { - if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && - cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) { - int cur_bottom_grad_pos = - ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = get_gradient_weight( - cur_inv_h_data, - cur_inv_w_data, - cur_h + dy, - cur_w + dx, - height, - width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - - -template -__global__ void deformable_col2im_coord_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_im, - const scalar_t* data_offset, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int offset_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_offset) { - CUDA_KERNEL_LOOP(index, n) { - scalar_t val = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t* data_col_ptr = data_col + - deformable_group_index * channel_per_deformable_group * batch_size * - width_col * height_col; - const scalar_t* data_im_ptr = data_im + - (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; - col_c += col_step) { - const int col_pos = - (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = - (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = - (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = - (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + - w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { - inv_h = inv_w = -2; - } - const scalar_t weight = get_coordinate_weight( - inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); - val += weight * data_col_ptr[col_pos]; - cnt += 1; - } - - grad_offset[index] = val; - } -} - - -namespace detectron2 { - -void deformable_im2col( - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor data_col) { - // num_axes should be smaller than block size - // todo: check parallel_imgs is correctly passed in - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = channels * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - at::cuda::CUDAGuard device_guard(data_im.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "deformable_im2col_gpu", ([&] { - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* data_col_ = data_col.data_ptr(); - - deformable_im2col_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_im_, - data_offset_, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - channels, - deformable_group, - height_col, - width_col, - data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("error in deformable_im2col: %s\n", cudaGetErrorString(err)); - } -} - - -void deformable_col2im( - const at::Tensor data_col, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_im) { - // todo: make sure parallel_imgs is passed in correctly - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = - channels * ksize_h * ksize_w * height_col * width_col * parallel_imgs; - int channel_per_deformable_group = channels / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* grad_im_ = grad_im.data_ptr(); - - deformable_col2im_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_offset_, - channels, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - deformable_group, - height_col, - width_col, - grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("error in deformable_col2im: %s\n", cudaGetErrorString(err)); - } -} - - -void deformable_col2im_coord( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const int channels, - const int height, - const int width, - const int ksize_h, - const int ksize_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int parallel_imgs, - const int deformable_group, - at::Tensor grad_offset) { - int height_col = - (height + 2 * pad_h - (dilation_h * (ksize_h - 1) + 1)) / stride_h + 1; - int width_col = - (width + 2 * pad_w - (dilation_w * (ksize_w - 1) + 1)) / stride_w + 1; - int num_kernels = height_col * width_col * 2 * ksize_h * ksize_w * - deformable_group * parallel_imgs; - int channel_per_deformable_group = - channels * ksize_h * ksize_w / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - scalar_t* grad_offset_ = grad_offset.data_ptr(); - - deformable_col2im_coord_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_im_, - data_offset_, - channels, - height, - width, - ksize_h, - ksize_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - parallel_imgs, - 2 * ksize_h * ksize_w * deformable_group, - deformable_group, - height_col, - width_col, - grad_offset_); - })); -} - -} // namespace detectron2 - - -template -__device__ scalar_t dmcn_im2col_bilinear( - const scalar_t* bottom_data, - const int data_width, - const int height, - const int width, - scalar_t h, - scalar_t w) { - int h_low = floor(h); - int w_low = floor(w); - int h_high = h_low + 1; - int w_high = w_low + 1; - - scalar_t lh = h - h_low; - scalar_t lw = w - w_low; - scalar_t hh = 1 - lh, hw = 1 - lw; - - scalar_t v1 = 0; - if (h_low >= 0 && w_low >= 0) - v1 = bottom_data[h_low * data_width + w_low]; - scalar_t v2 = 0; - if (h_low >= 0 && w_high <= width - 1) - v2 = bottom_data[h_low * data_width + w_high]; - scalar_t v3 = 0; - if (h_high <= height - 1 && w_low >= 0) - v3 = bottom_data[h_high * data_width + w_low]; - scalar_t v4 = 0; - if (h_high <= height - 1 && w_high <= width - 1) - v4 = bottom_data[h_high * data_width + w_high]; - - scalar_t w1 = hh * hw, w2 = hh * lw, w3 = lh * hw, w4 = lh * lw; - - scalar_t val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); - return val; -} - -template -__device__ scalar_t dmcn_get_gradient_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int h, - const int w, - const int height, - const int width) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - if (h == argmax_h_low && w == argmax_w_low) - weight = (h + 1 - argmax_h) * (w + 1 - argmax_w); - if (h == argmax_h_low && w == argmax_w_high) - weight = (h + 1 - argmax_h) * (argmax_w + 1 - w); - if (h == argmax_h_high && w == argmax_w_low) - weight = (argmax_h + 1 - h) * (w + 1 - argmax_w); - if (h == argmax_h_high && w == argmax_w_high) - weight = (argmax_h + 1 - h) * (argmax_w + 1 - w); - return weight; -} - -template -__device__ scalar_t dmcn_get_coordinate_weight( - scalar_t argmax_h, - scalar_t argmax_w, - const int height, - const int width, - const scalar_t* im_data, - const int data_width, - const int bp_dir) { - if (argmax_h <= -1 || argmax_h >= height || argmax_w <= -1 || - argmax_w >= width) { - // empty - return 0; - } - - int argmax_h_low = floor(argmax_h); - int argmax_w_low = floor(argmax_w); - int argmax_h_high = argmax_h_low + 1; - int argmax_w_high = argmax_w_low + 1; - - scalar_t weight = 0; - - if (bp_dir == 0) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += -1 * (argmax_w - argmax_w_low) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += (argmax_w_low + 1 - argmax_w) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_w - argmax_w_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } else if (bp_dir == 1) { - if (argmax_h_low >= 0 && argmax_w_low >= 0) - weight += -1 * (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_low]; - if (argmax_h_low >= 0 && argmax_w_high <= width - 1) - weight += (argmax_h_low + 1 - argmax_h) * - im_data[argmax_h_low * data_width + argmax_w_high]; - if (argmax_h_high <= height - 1 && argmax_w_low >= 0) - weight += -1 * (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_low]; - if (argmax_h_high <= height - 1 && argmax_w_high <= width - 1) - weight += (argmax_h - argmax_h_low) * - im_data[argmax_h_high * data_width + argmax_w_high]; - } - - return weight; -} - -template -__global__ void modulated_deformable_im2col_gpu_kernel( - const int n, - const scalar_t* data_im, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int num_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* data_col) { - CUDA_KERNEL_LOOP(index, n) { - // index index of output matrix - const int w_col = index % width_col; - const int h_col = (index / width_col) % height_col; - const int b_col = (index / width_col / height_col) % batch_size; - const int c_im = (index / width_col / height_col) / batch_size; - const int c_col = c_im * kernel_h * kernel_w; - - // compute deformable group index - const int deformable_group_index = c_im / channel_per_deformable_group; - - const int h_in = h_col * stride_h - pad_h; - const int w_in = w_col * stride_w - pad_w; - - scalar_t* data_col_ptr = data_col + - ((c_col * batch_size + b_col) * height_col + h_col) * width_col + w_col; - // const float* data_im_ptr = data_im + ((b_col * num_channels + c_im) * - // height + h_in) * width + w_in; - const scalar_t* data_im_ptr = - data_im + (b_col * num_channels + c_im) * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - - const scalar_t* data_mask_ptr = data_mask + - (b_col * deformable_group + deformable_group_index) * kernel_h * - kernel_w * height_col * width_col; - - for (int i = 0; i < kernel_h; ++i) { - for (int j = 0; j < kernel_w; ++j) { - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_col) * width_col + w_col; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_col) * width_col + - w_col; - const int data_mask_hw_ptr = - ((i * kernel_w + j) * height_col + h_col) * width_col + w_col; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t val = static_cast(0); - const scalar_t h_im = h_in + i * dilation_h + offset_h; - const scalar_t w_im = w_in + j * dilation_w + offset_w; - // if (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) { - if (h_im > -1 && w_im > -1 && h_im < height && w_im < width) { - // const float map_h = i * dilation_h + offset_h; - // const float map_w = j * dilation_w + offset_w; - // const int cur_height = height - h_in; - // const int cur_width = width - w_in; - // val = dmcn_im2col_bilinear(data_im_ptr, width, cur_height, - // cur_width, map_h, map_w); - val = dmcn_im2col_bilinear( - data_im_ptr, width, height, width, h_im, w_im); - } - *data_col_ptr = val * mask; - data_col_ptr += batch_size * height_col * width_col; - // data_col_ptr += height_col * width_col; - } - } - } -} - -template -__global__ void modulated_deformable_col2im_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_im) { - CUDA_KERNEL_LOOP(index, n) { - const int j = (index / width_col / height_col / batch_size) % kernel_w; - const int i = - (index / width_col / height_col / batch_size / kernel_w) % kernel_h; - const int c = - index / width_col / height_col / batch_size / kernel_w / kernel_h; - // compute the start and end of the output - - const int deformable_group_index = c / channel_per_deformable_group; - - int w_out = index % width_col; - int h_out = (index / width_col) % height_col; - int b = (index / width_col / height_col) % batch_size; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const scalar_t* data_mask_ptr = data_mask + - (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * - height_col * width_col; - const int data_offset_h_ptr = - ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; - const int data_offset_w_ptr = - ((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + w_out; - const int data_mask_hw_ptr = - ((i * kernel_w + j) * height_col + h_out) * width_col + w_out; - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - const scalar_t cur_inv_h_data = h_in + i * dilation_h + offset_h; - const scalar_t cur_inv_w_data = w_in + j * dilation_w + offset_w; - - const scalar_t cur_top_grad = data_col[index] * mask; - const int cur_h = (int)cur_inv_h_data; - const int cur_w = (int)cur_inv_w_data; - for (int dy = -2; dy <= 2; dy++) { - for (int dx = -2; dx <= 2; dx++) { - if (cur_h + dy >= 0 && cur_h + dy < height && cur_w + dx >= 0 && - cur_w + dx < width && abs(cur_inv_h_data - (cur_h + dy)) < 1 && - abs(cur_inv_w_data - (cur_w + dx)) < 1) { - int cur_bottom_grad_pos = - ((b * channels + c) * height + cur_h + dy) * width + cur_w + dx; - scalar_t weight = dmcn_get_gradient_weight( - cur_inv_h_data, - cur_inv_w_data, - cur_h + dy, - cur_w + dx, - height, - width); - atomicAdd(grad_im + cur_bottom_grad_pos, weight * cur_top_grad); - } - } - } - } -} - -template -__global__ void modulated_deformable_col2im_coord_gpu_kernel( - const int n, - const scalar_t* data_col, - const scalar_t* data_im, - const scalar_t* data_offset, - const scalar_t* data_mask, - const int channels, - const int height, - const int width, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int channel_per_deformable_group, - const int batch_size, - const int offset_channels, - const int deformable_group, - const int height_col, - const int width_col, - scalar_t* grad_offset, - scalar_t* grad_mask) { - CUDA_KERNEL_LOOP(index, n) { - scalar_t val = 0, mval = 0; - int w = index % width_col; - int h = (index / width_col) % height_col; - int c = (index / width_col / height_col) % offset_channels; - int b = (index / width_col / height_col) / offset_channels; - // compute the start and end of the output - - const int deformable_group_index = c / (2 * kernel_h * kernel_w); - const int col_step = kernel_h * kernel_w; - int cnt = 0; - const scalar_t* data_col_ptr = data_col + - deformable_group_index * channel_per_deformable_group * batch_size * - width_col * height_col; - const scalar_t* data_im_ptr = data_im + - (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / kernel_w * height * width; - const scalar_t* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; - const scalar_t* data_mask_ptr = data_mask + - (b * deformable_group + deformable_group_index) * kernel_h * kernel_w * - height_col * width_col; - - const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; - - for (int col_c = (offset_c / 2); col_c < channel_per_deformable_group; - col_c += col_step) { - const int col_pos = - (((col_c * batch_size + b) * height_col) + h) * width_col + w; - const int bp_dir = offset_c % 2; - - int j = (col_pos / width_col / height_col / batch_size) % kernel_w; - int i = - (col_pos / width_col / height_col / batch_size / kernel_w) % kernel_h; - int w_out = col_pos % width_col; - int h_out = (col_pos / width_col) % height_col; - int w_in = w_out * stride_w - pad_w; - int h_in = h_out * stride_h - pad_h; - const int data_offset_h_ptr = - (((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out); - const int data_offset_w_ptr = - (((2 * (i * kernel_w + j) + 1) * height_col + h_out) * width_col + - w_out); - const int data_mask_hw_ptr = - (((i * kernel_w + j) * height_col + h_out) * width_col + w_out); - const scalar_t offset_h = data_offset_ptr[data_offset_h_ptr]; - const scalar_t offset_w = data_offset_ptr[data_offset_w_ptr]; - const scalar_t mask = data_mask_ptr[data_mask_hw_ptr]; - scalar_t inv_h = h_in + i * dilation_h + offset_h; - scalar_t inv_w = w_in + j * dilation_w + offset_w; - if (inv_h <= -1 || inv_w <= -1 || inv_h >= height || inv_w >= width) { - inv_h = inv_w = -2; - } else { - mval += data_col_ptr[col_pos] * - dmcn_im2col_bilinear( - data_im_ptr + cnt * height * width, - width, - height, - width, - inv_h, - inv_w); - } - const scalar_t weight = dmcn_get_coordinate_weight( - inv_h, - inv_w, - height, - width, - data_im_ptr + cnt * height * width, - width, - bp_dir); - val += weight * data_col_ptr[col_pos] * mask; - cnt += 1; - } - // KERNEL_ASSIGN(grad_offset[index], offset_req, val); - grad_offset[index] = val; - if (offset_c % 2 == 0) - // KERNEL_ASSIGN(grad_mask[(((b * deformable_group + - // deformable_group_index) * kernel_h * kernel_w + offset_c / 2) * - // height_col + h) * width_col + w], mask_req, mval); - grad_mask - [(((b * deformable_group + deformable_group_index) * kernel_h * - kernel_w + - offset_c / 2) * - height_col + - h) * - width_col + - w] = mval; - } -} - - -namespace detectron2 { - -void modulated_deformable_im2col_cuda( - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kenerl_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor data_col) { - // num_axes should be smaller than block size - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = channels * batch_size * height_col * width_col; - - at::cuda::CUDAGuard device_guard(data_im.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* data_col_ = data_col.data_ptr(); - - modulated_deformable_im2col_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_im_, - data_offset_, - data_mask_, - height_im, - width_im, - kernel_h, - kenerl_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - channels, - deformable_group, - height_col, - width_col, - data_col_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_im2col_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_cuda( - const at::Tensor data_col, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_im) { - const int channel_per_deformable_group = channels / deformable_group; - const int num_kernels = - channels * kernel_h * kernel_w * batch_size * height_col * width_col; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* grad_im_ = grad_im.data_ptr(); - - modulated_deformable_col2im_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_offset_, - data_mask_, - channels, - height_im, - width_im, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - deformable_group, - height_col, - width_col, - grad_im_); - })); - - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_col2im_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -void modulated_deformable_col2im_coord_cuda( - const at::Tensor data_col, - const at::Tensor data_im, - const at::Tensor data_offset, - const at::Tensor data_mask, - const int batch_size, - const int channels, - const int height_im, - const int width_im, - const int height_col, - const int width_col, - const int kernel_h, - const int kernel_w, - const int pad_h, - const int pad_w, - const int stride_h, - const int stride_w, - const int dilation_h, - const int dilation_w, - const int deformable_group, - at::Tensor grad_offset, - at::Tensor grad_mask) { - const int num_kernels = batch_size * height_col * width_col * 2 * kernel_h * - kernel_w * deformable_group; - const int channel_per_deformable_group = - channels * kernel_h * kernel_w / deformable_group; - - at::cuda::CUDAGuard device_guard(data_col.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { - const scalar_t* data_col_ = data_col.data_ptr(); - const scalar_t* data_im_ = data_im.data_ptr(); - const scalar_t* data_offset_ = data_offset.data_ptr(); - const scalar_t* data_mask_ = data_mask.data_ptr(); - scalar_t* grad_offset_ = grad_offset.data_ptr(); - scalar_t* grad_mask_ = grad_mask.data_ptr(); - - modulated_deformable_col2im_coord_gpu_kernel<<< - GET_BLOCKS(num_kernels), - CUDA_NUM_THREADS, - 0, - stream>>>( - num_kernels, - data_col_, - data_im_, - data_offset_, - data_mask_, - channels, - height_im, - width_im, - kernel_h, - kernel_w, - pad_h, - pad_w, - stride_h, - stride_w, - dilation_h, - dilation_w, - channel_per_deformable_group, - batch_size, - 2 * kernel_h * kernel_w * deformable_group, - deformable_group, - height_col, - width_col, - grad_offset_, - grad_mask_); - })); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf( - "error in modulated_deformable_col2im_coord_cuda: %s\n", - cudaGetErrorString(err)); - } -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h deleted file mode 100644 index 9c86c8d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace detectron2 { - -at::Tensor nms_rotated_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); - -#ifdef WITH_CUDA -at::Tensor nms_rotated_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); -#endif - -// Interface for Python -// inline is needed to prevent multiple function definitions when this header is -// included by different cpps -inline at::Tensor nms_rotated( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold) { - assert(dets.device().is_cuda() == scores.device().is_cuda()); - if (dets.device().is_cuda()) { -#ifdef WITH_CUDA - return nms_rotated_cuda( - dets.contiguous(), scores.contiguous(), iou_threshold); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - - return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp deleted file mode 100644 index 0658e38..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cpu.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include "../box_iou_rotated/box_iou_rotated_utils.h" -#include "nms_rotated.h" - -namespace detectron2 { - -template -at::Tensor nms_rotated_cpu_kernel( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold) { - // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, - // however, the code in this function is much shorter because - // we delegate the IoU computation for rotated boxes to - // the single_box_iou_rotated function in box_iou_rotated_utils.h - AT_ASSERTM(dets.device().is_cpu(), "dets must be a CPU tensor"); - AT_ASSERTM(scores.device().is_cpu(), "scores must be a CPU tensor"); - AT_ASSERTM( - dets.scalar_type() == scores.scalar_type(), - "dets should have the same type as scores"); - - if (dets.numel() == 0) { - return at::empty({0}, dets.options().dtype(at::kLong)); - } - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - - auto ndets = dets.size(0); - at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); - at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); - - auto suppressed = suppressed_t.data_ptr(); - auto keep = keep_t.data_ptr(); - auto order = order_t.data_ptr(); - - int64_t num_to_keep = 0; - - for (int64_t _i = 0; _i < ndets; _i++) { - auto i = order[_i]; - if (suppressed[i] == 1) { - continue; - } - - keep[num_to_keep++] = i; - - for (int64_t _j = _i + 1; _j < ndets; _j++) { - auto j = order[_j]; - if (suppressed[j] == 1) { - continue; - } - - auto ovr = single_box_iou_rotated( - dets[i].data_ptr(), dets[j].data_ptr()); - if (ovr >= iou_threshold) { - suppressed[j] = 1; - } - } - } - return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); -} - -at::Tensor nms_rotated_cpu( - // input must be contiguous - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold) { - auto result = at::empty({0}, dets.options()); - - AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { - result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); - }); - return result; -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu deleted file mode 100644 index 40977a0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/nms_rotated/nms_rotated_cuda.cu +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include -#include -#include -#include "../box_iou_rotated/box_iou_rotated_utils.h" - -using namespace detectron2; - -namespace { -int const threadsPerBlock = sizeof(unsigned long long) * 8; -} - -template -__global__ void nms_rotated_cuda_kernel( - const int n_boxes, - const float iou_threshold, - const T* dev_boxes, - unsigned long long* dev_mask) { - // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel - - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - // Compared to nms_cuda_kernel, where each box is represented with 4 values - // (x1, y1, x2, y2), each rotated box is represented with 5 values - // (x_center, y_center, width, height, angle_degrees) here. - __shared__ T block_boxes[threadsPerBlock * 5]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; - block_boxes[threadIdx.x * 5 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; - block_boxes[threadIdx.x * 5 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; - block_boxes[threadIdx.x * 5 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; - block_boxes[threadIdx.x * 5 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const T* cur_box = dev_boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - // Instead of devIoU used by original horizontal nms, here - // we use the single_box_iou_rotated function from box_iou_rotated_utils.h - if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > - iou_threshold) { - t |= 1ULL << i; - } - } - const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } -} - -namespace detectron2 { - -at::Tensor nms_rotated_cuda( - // input must be contiguous - const at::Tensor& dets, - const at::Tensor& scores, - float iou_threshold) { - // using scalar_t = float; - AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor"); - AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor"); - at::cuda::CUDAGuard device_guard(dets.device()); - - auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); - auto dets_sorted = dets.index_select(0, order_t); - - auto dets_num = dets.size(0); - - const int col_blocks = - at::cuda::ATenCeilDiv(static_cast(dets_num), threadsPerBlock); - - at::Tensor mask = - at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); - - dim3 blocks(col_blocks, col_blocks); - dim3 threads(threadsPerBlock); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - AT_DISPATCH_FLOATING_TYPES( - dets_sorted.scalar_type(), "nms_rotated_kernel_cuda", [&] { - nms_rotated_cuda_kernel<<>>( - dets_num, - iou_threshold, - dets_sorted.data_ptr(), - (unsigned long long*)mask.data_ptr()); - }); - - at::Tensor mask_cpu = mask.to(at::kCPU); - unsigned long long* mask_host = - (unsigned long long*)mask_cpu.data_ptr(); - - std::vector remv(col_blocks); - memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); - - at::Tensor keep = - at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data_ptr(); - - int num_to_keep = 0; - for (int i = 0; i < dets_num; i++) { - int nblock = i / threadsPerBlock; - int inblock = i % threadsPerBlock; - - if (!(remv[nblock] & (1ULL << inblock))) { - keep_out[num_to_keep++] = i; - unsigned long long* p = mask_host + i * col_blocks; - for (int j = nblock; j < col_blocks; j++) { - remv[j] |= p[j]; - } - } - } - - AT_CUDA_CHECK(cudaGetLastError()); - return order_t.index( - {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) - .to(order_t.device(), keep.scalar_type())}); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp deleted file mode 100644 index fa7942e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/csrc/vision.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -#include -#include "ROIAlign/ROIAlign.h" -#include "ROIAlignRotated/ROIAlignRotated.h" -#include "box_iou_rotated/box_iou_rotated.h" -#include "deformable/deform_conv.h" -#include "nms_rotated/nms_rotated.h" - -namespace detectron2 { - -#ifdef WITH_CUDA -extern int get_cudart_version(); -#endif - -std::string get_cuda_version() { -#ifdef WITH_CUDA - std::ostringstream oss; - - // copied from - // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 - auto printCudaStyleVersion = [&](int v) { - oss << (v / 1000) << "." << (v / 10 % 100); - if (v % 10 != 0) { - oss << "." << (v % 10); - } - }; - printCudaStyleVersion(get_cudart_version()); - return oss.str(); -#else - return std::string("not available"); -#endif -} - -// similar to -// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp -std::string get_compiler_version() { - std::ostringstream ss; -#if defined(__GNUC__) -#ifndef __clang__ - -#if ((__GNUC__ <= 4) && (__GNUC_MINOR__ <= 8)) -#error "GCC >= 4.9 is required!" -#endif - - { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } -#endif -#endif - -#if defined(__clang_major__) - { - ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." - << __clang_patchlevel__; - } -#endif - -#if defined(_MSC_VER) - { ss << "MSVC " << _MSC_FULL_VER; } -#endif - return ss.str(); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); - m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); - - m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); - - m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); - m.def( - "deform_conv_backward_input", - &deform_conv_backward_input, - "deform_conv_backward_input"); - m.def( - "deform_conv_backward_filter", - &deform_conv_backward_filter, - "deform_conv_backward_filter"); - m.def( - "modulated_deform_conv_forward", - &modulated_deform_conv_forward, - "modulated_deform_conv_forward"); - m.def( - "modulated_deform_conv_backward", - &modulated_deform_conv_backward, - "modulated_deform_conv_backward"); - - m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); - - m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); - m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); - - m.def( - "roi_align_rotated_forward", - &ROIAlignRotated_forward, - "Forward pass for Rotated ROI-Align Operator"); - m.def( - "roi_align_rotated_backward", - &ROIAlignRotated_backward, - "Backward pass for Rotated ROI-Align Operator"); -} - -} // namespace detectron2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py deleted file mode 100644 index ba8c649..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/deform_conv.py +++ /dev/null @@ -1,494 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from functools import lru_cache -import torch -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from detectron2 import _C - -from .wrappers import _NewEmptyTensorOp - - -class _DeformConv(Function): - @staticmethod - def forward( - ctx, - input, - offset, - weight, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - im2col_step=64, - ): - if input is not None and input.dim() != 4: - raise ValueError( - "Expected 4D tensor as input, got {}D tensor instead.".format(input.dim()) - ) - ctx.stride = _pair(stride) - ctx.padding = _pair(padding) - ctx.dilation = _pair(dilation) - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.im2col_step = im2col_step - - ctx.save_for_backward(input, offset, weight) - - output = input.new_empty( - _DeformConv._output_size(input, weight, ctx.padding, ctx.dilation, ctx.stride) - ) - - ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones - - if not input.is_cuda: - raise NotImplementedError - else: - cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) - assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" - - _C.deform_conv_forward( - input, - weight, - offset, - output, - ctx.bufs_[0], - ctx.bufs_[1], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - cur_im2col_step, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, offset, weight = ctx.saved_tensors - - grad_input = grad_offset = grad_weight = None - - if not grad_output.is_cuda: - raise NotImplementedError - else: - cur_im2col_step = _DeformConv._cal_im2col_step(input.shape[0], ctx.im2col_step) - assert (input.shape[0] % cur_im2col_step) == 0, "im2col step must divide batchsize" - - if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - _C.deform_conv_backward_input( - input, - offset, - grad_output, - grad_input, - grad_offset, - weight, - ctx.bufs_[0], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - cur_im2col_step, - ) - - if ctx.needs_input_grad[2]: - grad_weight = torch.zeros_like(weight) - _C.deform_conv_backward_filter( - input, - offset, - grad_output, - grad_weight, - ctx.bufs_[0], - ctx.bufs_[1], - weight.size(3), - weight.size(2), - ctx.stride[1], - ctx.stride[0], - ctx.padding[1], - ctx.padding[0], - ctx.dilation[1], - ctx.dilation[0], - ctx.groups, - ctx.deformable_groups, - 1, - cur_im2col_step, - ) - - return grad_input, grad_offset, grad_weight, None, None, None, None, None, None - - @staticmethod - def _output_size(input, weight, padding, dilation, stride): - channels = weight.size(0) - output_size = (input.size(0), channels) - for d in range(input.dim() - 2): - in_size = input.size(d + 2) - pad = padding[d] - kernel = dilation[d] * (weight.size(d + 2) - 1) + 1 - stride_ = stride[d] - output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,) - if not all(map(lambda s: s > 0, output_size)): - raise ValueError( - "convolution input is too small (output would be {})".format( - "x".join(map(str, output_size)) - ) - ) - return output_size - - @staticmethod - @lru_cache(maxsize=128) - def _cal_im2col_step(input_size, default_size): - """ - Calculate proper im2col step size, which should be divisible by input_size and not larger - than prefer_size. Meanwhile the step size should be as large as possible to be more - efficient. So we choose the largest one among all divisors of input_size which are smaller - than prefer_size. - :param input_size: input batch size . - :param default_size: default preferred im2col step size. - :return: the largest proper step size. - """ - if input_size <= default_size: - return input_size - best_step = 1 - for step in range(2, min(int(math.sqrt(input_size)) + 1, default_size)): - if input_size % step == 0: - if input_size // step <= default_size: - return input_size // step - best_step = step - - return best_step - - -class _ModulatedDeformConv(Function): - @staticmethod - def forward( - ctx, - input, - offset, - mask, - weight, - bias=None, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - ): - ctx.stride = stride - ctx.padding = padding - ctx.dilation = dilation - ctx.groups = groups - ctx.deformable_groups = deformable_groups - ctx.with_bias = bias is not None - if not ctx.with_bias: - bias = input.new_empty(1) # fake tensor - if not input.is_cuda: - raise NotImplementedError - if ( - weight.requires_grad - or mask.requires_grad - or offset.requires_grad - or input.requires_grad - ): - ctx.save_for_backward(input, offset, mask, weight, bias) - output = input.new_empty(_ModulatedDeformConv._infer_shape(ctx, input, weight)) - ctx._bufs = [input.new_empty(0), input.new_empty(0)] - _C.modulated_deform_conv_forward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - output, - ctx._bufs[1], - weight.shape[2], - weight.shape[3], - ctx.stride, - ctx.stride, - ctx.padding, - ctx.padding, - ctx.dilation, - ctx.dilation, - ctx.groups, - ctx.deformable_groups, - ctx.with_bias, - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - if not grad_output.is_cuda: - raise NotImplementedError - input, offset, mask, weight, bias = ctx.saved_tensors - grad_input = torch.zeros_like(input) - grad_offset = torch.zeros_like(offset) - grad_mask = torch.zeros_like(mask) - grad_weight = torch.zeros_like(weight) - grad_bias = torch.zeros_like(bias) - _C.modulated_deform_conv_backward( - input, - weight, - bias, - ctx._bufs[0], - offset, - mask, - ctx._bufs[1], - grad_input, - grad_weight, - grad_bias, - grad_offset, - grad_mask, - grad_output, - weight.shape[2], - weight.shape[3], - ctx.stride, - ctx.stride, - ctx.padding, - ctx.padding, - ctx.dilation, - ctx.dilation, - ctx.groups, - ctx.deformable_groups, - ctx.with_bias, - ) - if not ctx.with_bias: - grad_bias = None - - return ( - grad_input, - grad_offset, - grad_mask, - grad_weight, - grad_bias, - None, - None, - None, - None, - None, - ) - - @staticmethod - def _infer_shape(ctx, input, weight): - n = input.size(0) - channels_out = weight.size(0) - height, width = input.shape[2:4] - kernel_h, kernel_w = weight.shape[2:4] - height_out = ( - height + 2 * ctx.padding - (ctx.dilation * (kernel_h - 1) + 1) - ) // ctx.stride + 1 - width_out = ( - width + 2 * ctx.padding - (ctx.dilation * (kernel_w - 1) + 1) - ) // ctx.stride + 1 - return n, channels_out, height_out, width_out - - -deform_conv = _DeformConv.apply -modulated_deform_conv = _ModulatedDeformConv.apply - - -class DeformConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=False, - norm=None, - activation=None, - ): - """ - Deformable convolution from :paper:`deformconv`. - - Arguments are similar to :class:`Conv2D`. Extra arguments: - - Args: - deformable_groups (int): number of groups used in deformable convolution. - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - """ - super(DeformConv, self).__init__() - - assert not bias - assert in_channels % groups == 0, "in_channels {} cannot be divisible by groups {}".format( - in_channels, groups - ) - assert ( - out_channels % groups == 0 - ), "out_channels {} cannot be divisible by groups {}".format(out_channels, groups) - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = _pair(stride) - self.padding = _pair(padding) - self.dilation = _pair(dilation) - self.groups = groups - self.deformable_groups = deformable_groups - self.norm = norm - self.activation = activation - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // self.groups, *self.kernel_size) - ) - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - - def forward(self, x, offset): - if x.numel() == 0: - # When input is empty, we want to return a empty tensor with "correct" shape, - # So that the following operations will not panic - # if they check for the shape of the tensor. - # This computes the height and width of the output tensor - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [x.shape[0], self.weight.shape[0]] + output_shape - return _NewEmptyTensorOp.apply(x, output_shape) - - x = deform_conv( - x, - offset, - self.weight, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deformable_groups, - ) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", padding=" + str(self.padding) - tmpstr += ", dilation=" + str(self.dilation) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", deformable_groups=" + str(self.deformable_groups) - tmpstr += ", bias=False" - return tmpstr - - -class ModulatedDeformConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - deformable_groups=1, - bias=True, - norm=None, - activation=None, - ): - """ - Modulated deformable convolution from :paper:`deformconv2`. - - Arguments are similar to :class:`Conv2D`. Extra arguments: - - Args: - deformable_groups (int): number of groups used in deformable convolution. - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - """ - super(ModulatedDeformConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.stride = stride - self.padding = padding - self.dilation = dilation - self.groups = groups - self.deformable_groups = deformable_groups - self.with_bias = bias - self.norm = norm - self.activation = activation - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) - ) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - if self.bias is not None: - nn.init.constant_(self.bias, 0) - - def forward(self, x, offset, mask): - if x.numel() == 0: - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [x.shape[0], self.weight.shape[0]] + output_shape - return _NewEmptyTensorOp.apply(x, output_shape) - - x = modulated_deform_conv( - x, - offset, - mask, - self.weight, - self.bias, - self.stride, - self.padding, - self.dilation, - self.groups, - self.deformable_groups, - ) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", padding=" + str(self.padding) - tmpstr += ", dilation=" + str(self.dilation) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", deformable_groups=" + str(self.deformable_groups) - tmpstr += ", bias=" + str(self.with_bias) - return tmpstr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py deleted file mode 100644 index 0fe115d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/mask_ops.py +++ /dev/null @@ -1,248 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import torch -from PIL import Image -from torch.nn import functional as F - -__all__ = ["paste_masks_in_image"] - - -BYTES_PER_FLOAT = 4 -# TODO: This memory limit may be too much or too little. It would be better to -# determine it based on available resources. -GPU_MEM_LIMIT = 1024 ** 3 # 1 GB memory limit - - -def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): - """ - Args: - masks: N, 1, H, W - boxes: N, 4 - img_h, img_w (int): - skip_empty (bool): only paste masks within the region that - tightly bound all boxes, and returns the results this region only. - An important optimization for CPU. - - Returns: - if skip_empty == False, a mask of shape (N, img_h, img_w) - if skip_empty == True, a mask of shape (N, h', w'), and the slice - object for the corresponding region. - """ - # On GPU, paste all masks together (up to chunk size) - # by using the entire image to sample the masks - # Compared to pasting them one by one, - # this has more operations but is faster on COCO-scale dataset. - device = masks.device - if skip_empty: - x0_int, y0_int = torch.clamp(boxes.min(dim=0).values.floor()[:2] - 1, min=0).to( - dtype=torch.int32 - ) - x1_int = torch.clamp(boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32) - y1_int = torch.clamp(boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32) - else: - x0_int, y0_int = 0, 0 - x1_int, y1_int = img_w, img_h - x0, y0, x1, y1 = torch.split(boxes, 1, dim=1) # each is Nx1 - - N = masks.shape[0] - - img_y = torch.arange(y0_int, y1_int, device=device, dtype=torch.float32) + 0.5 - img_x = torch.arange(x0_int, x1_int, device=device, dtype=torch.float32) + 0.5 - img_y = (img_y - y0) / (y1 - y0) * 2 - 1 - img_x = (img_x - x0) / (x1 - x0) * 2 - 1 - # img_x, img_y have shapes (N, w), (N, h) - - gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1)) - gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) - grid = torch.stack([gx, gy], dim=3) - - img_masks = F.grid_sample(masks.to(dtype=torch.float32), grid, align_corners=False) - - if skip_empty: - return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int)) - else: - return img_masks[:, 0], () - - -def paste_masks_in_image(masks, boxes, image_shape, threshold=0.5): - """ - Paste a set of masks that are of a fixed resolution (e.g., 28 x 28) into an image. - The location, height, and width for pasting each mask is determined by their - corresponding bounding boxes in boxes. - - Note: - This is a complicated but more accurate implementation. In actual deployment, it is - often enough to use a faster but less accurate implementation. - See :func:`paste_mask_in_image_old` in this file for an alternative implementation. - - Args: - masks (tensor): Tensor of shape (Bimg, Hmask, Wmask), where Bimg is the number of - detected object instances in the image and Hmask, Wmask are the mask width and mask - height of the predicted mask (e.g., Hmask = Wmask = 28). Values are in [0, 1]. - boxes (Boxes or Tensor): A Boxes of length Bimg or Tensor of shape (Bimg, 4). - boxes[i] and masks[i] correspond to the same object instance. - image_shape (tuple): height, width - threshold (float): A threshold in [0, 1] for converting the (soft) masks to - binary masks. - - Returns: - img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the - number of detected object instances and Himage, Wimage are the image width - and height. img_masks[i] is a binary mask for object instance i. - """ - - assert masks.shape[-1] == masks.shape[-2], "Only square mask predictions are supported" - N = len(masks) - if N == 0: - return masks.new_empty((0,) + image_shape, dtype=torch.uint8) - if not isinstance(boxes, torch.Tensor): - boxes = boxes.tensor - device = boxes.device - assert len(boxes) == N, boxes.shape - - img_h, img_w = image_shape - - # The actual implementation split the input into chunks, - # and paste them chunk by chunk. - if device.type == "cpu": - # CPU is most efficient when they are pasted one by one with skip_empty=True - # so that it performs minimal number of operations. - num_chunks = N - else: - # GPU benefits from parallelism for larger chunks, but may have memory issue - # int(img_h) because shape may be tensors in tracing - num_chunks = int(np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT / GPU_MEM_LIMIT)) - assert ( - num_chunks <= N - ), "Default GPU_MEM_LIMIT in mask_ops.py is too small; try increasing it" - chunks = torch.chunk(torch.arange(N, device=device), num_chunks) - - img_masks = torch.zeros( - N, img_h, img_w, device=device, dtype=torch.bool if threshold >= 0 else torch.uint8 - ) - for inds in chunks: - masks_chunk, spatial_inds = _do_paste_mask( - masks[inds, None, :, :], boxes[inds], img_h, img_w, skip_empty=device.type == "cpu" - ) - - if threshold >= 0: - masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool) - else: - # for visualization and debugging - masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8) - - img_masks[(inds,) + spatial_inds] = masks_chunk - return img_masks - - -# The below are the original paste function (from Detectron1) which has -# larger quantization error. -# It is faster on CPU, while the aligned one is faster on GPU thanks to grid_sample. - - -def paste_mask_in_image_old(mask, box, img_h, img_w, threshold): - """ - Paste a single mask in an image. - This is a per-box implementation of :func:`paste_masks_in_image`. - This function has larger quantization error due to incorrect pixel - modeling and is not used any more. - - Args: - mask (Tensor): A tensor of shape (Hmask, Wmask) storing the mask of a single - object instance. Values are in [0, 1]. - box (Tensor): A tensor of shape (4, ) storing the x0, y0, x1, y1 box corners - of the object instance. - img_h, img_w (int): Image height and width. - threshold (float): Mask binarization threshold in [0, 1]. - - Returns: - im_mask (Tensor): - The resized and binarized object mask pasted into the original - image plane (a tensor of shape (img_h, img_w)). - """ - # Conversion from continuous box coordinates to discrete pixel coordinates - # via truncation (cast to int32). This determines which pixels to paste the - # mask onto. - box = box.to(dtype=torch.int32) # Continuous to discrete coordinate conversion - # An example (1D) box with continuous coordinates (x0=0.7, x1=4.3) will map to - # a discrete coordinates (x0=0, x1=4). Note that box is mapped to 5 = x1 - x0 + 1 - # pixels (not x1 - x0 pixels). - samples_w = box[2] - box[0] + 1 # Number of pixel samples, *not* geometric width - samples_h = box[3] - box[1] + 1 # Number of pixel samples, *not* geometric height - - # Resample the mask from it's original grid to the new samples_w x samples_h grid - mask = Image.fromarray(mask.cpu().numpy()) - mask = mask.resize((samples_w, samples_h), resample=Image.BILINEAR) - mask = np.array(mask, copy=False) - - if threshold >= 0: - mask = np.array(mask > threshold, dtype=np.uint8) - mask = torch.from_numpy(mask) - else: - # for visualization and debugging, we also - # allow it to return an unmodified mask - mask = torch.from_numpy(mask * 255).to(torch.uint8) - - im_mask = torch.zeros((img_h, img_w), dtype=torch.uint8) - x_0 = max(box[0], 0) - x_1 = min(box[2] + 1, img_w) - y_0 = max(box[1], 0) - y_1 = min(box[3] + 1, img_h) - - im_mask[y_0:y_1, x_0:x_1] = mask[ - (y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0]) - ] - return im_mask - - -# Our pixel modeling requires extrapolation for any continuous -# coordinate < 0.5 or > length - 0.5. When sampling pixels on the masks, -# we would like this extrapolation to be an interpolation between boundary values and zero, -# instead of using absolute zero or boundary values. -# Therefore `paste_mask_in_image_old` is often used with zero padding around the masks like this: -# masks, scale = pad_masks(masks[:, 0, :, :], 1) -# boxes = scale_boxes(boxes.tensor, scale) - - -def pad_masks(masks, padding): - """ - Args: - masks (tensor): A tensor of shape (B, M, M) representing B masks. - padding (int): Number of cells to pad on all sides. - - Returns: - The padded masks and the scale factor of the padding size / original size. - """ - B = masks.shape[0] - M = masks.shape[-1] - pad2 = 2 * padding - scale = float(M + pad2) / M - padded_masks = masks.new_zeros((B, M + pad2, M + pad2)) - padded_masks[:, padding:-padding, padding:-padding] = masks - return padded_masks, scale - - -def scale_boxes(boxes, scale): - """ - Args: - boxes (tensor): A tensor of shape (B, 4) representing B boxes with 4 - coords representing the corners x0, y0, x1, y1, - scale (float): The box scaling factor. - - Returns: - Scaled boxes. - """ - w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5 - h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5 - x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5 - y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5 - - w_half *= scale - h_half *= scale - - scaled_boxes = torch.zeros_like(boxes) - scaled_boxes[:, 0] = x_c - w_half - scaled_boxes[:, 2] = x_c + w_half - scaled_boxes[:, 1] = y_c - h_half - scaled_boxes[:, 3] = y_c + h_half - return scaled_boxes diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py deleted file mode 100644 index aafe29b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/nms.py +++ /dev/null @@ -1,146 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -from torchvision.ops import boxes as box_ops -from torchvision.ops import nms # BC-compat - - -def batched_nms(boxes, scores, idxs, iou_threshold): - """ - Same as torchvision.ops.boxes.batched_nms, but safer. - """ - assert boxes.shape[-1] == 4 - # TODO may need better strategy. - # Investigate after having a fully-cuda NMS op. - if len(boxes) < 40000: - return box_ops.batched_nms(boxes, scores, idxs, iou_threshold) - - result_mask = scores.new_zeros(scores.size(), dtype=torch.bool) - for id in torch.unique(idxs).cpu().tolist(): - mask = (idxs == id).nonzero().view(-1) - keep = nms(boxes[mask], scores[mask], iou_threshold) - result_mask[mask[keep]] = True - keep = result_mask.nonzero().view(-1) - keep = keep[scores[keep].argsort(descending=True)] - return keep - - -# Note: this function (nms_rotated) might be moved into -# torchvision/ops/boxes.py in the future -def nms_rotated(boxes, scores, iou_threshold): - """ - Performs non-maximum suppression (NMS) on the rotated boxes according - to their intersection-over-union (IoU). - - Rotated NMS iteratively removes lower scoring rotated boxes which have an - IoU greater than iou_threshold with another (higher scoring) rotated box. - - Note that RotatedBox (5, 3, 4, 2, -90) covers exactly the same region as - RotatedBox (5, 3, 4, 2, 90) does, and their IoU will be 1. However, they - can be representing completely different objects in certain tasks, e.g., OCR. - - As for the question of whether rotated-NMS should treat them as faraway boxes - even though their IOU is 1, it depends on the application and/or ground truth annotation. - - As an extreme example, consider a single character v and the square box around it. - - If the angle is 0 degree, the object (text) would be read as 'v'; - - If the angle is 90 degrees, the object (text) would become '>'; - - If the angle is 180 degrees, the object (text) would become '^'; - - If the angle is 270/-90 degrees, the object (text) would become '<' - - All of these cases have IoU of 1 to each other, and rotated NMS that only - uses IoU as criterion would only keep one of them with the highest score - - which, practically, still makes sense in most cases because typically - only one of theses orientations is the correct one. Also, it does not matter - as much if the box is only used to classify the object (instead of transcribing - them with a sequential OCR recognition model) later. - - On the other hand, when we use IoU to filter proposals that are close to the - ground truth during training, we should definitely take the angle into account if - we know the ground truth is labeled with the strictly correct orientation (as in, - upside-down words are annotated with -180 degrees even though they can be covered - with a 0/90/-90 degree box, etc.) - - The way the original dataset is annotated also matters. For example, if the dataset - is a 4-point polygon dataset that does not enforce ordering of vertices/orientation, - we can estimate a minimum rotated bounding box to this polygon, but there's no way - we can tell the correct angle with 100% confidence (as shown above, there could be 4 different - rotated boxes, with angles differed by 90 degrees to each other, covering the exactly - same region). In that case we have to just use IoU to determine the box - proximity (as many detection benchmarks (even for text) do) unless there're other - assumptions we can make (like width is always larger than height, or the object is not - rotated by more than 90 degrees CCW/CW, etc.) - - In summary, not considering angles in rotated NMS seems to be a good option for now, - but we should be aware of its implications. - - Args: - boxes (Tensor[N, 5]): Rotated boxes to perform NMS on. They are expected to be in - (x_center, y_center, width, height, angle_degrees) format. - scores (Tensor[N]): Scores for each one of the rotated boxes - iou_threshold (float): Discards all overlapping rotated boxes with IoU < iou_threshold - - Returns: - keep (Tensor): int64 tensor with the indices of the elements that have been kept - by Rotated NMS, sorted in decreasing order of scores - """ - from detectron2 import _C - - return _C.nms_rotated(boxes, scores, iou_threshold) - - -# Note: this function (batched_nms_rotated) might be moved into -# torchvision/ops/boxes.py in the future -def batched_nms_rotated(boxes, scores, idxs, iou_threshold): - """ - Performs non-maximum suppression in a batched fashion. - - Each index value correspond to a category, and NMS - will not be applied between elements of different categories. - - Args: - boxes (Tensor[N, 5]): - boxes where NMS will be performed. They - are expected to be in (x_ctr, y_ctr, width, height, angle_degrees) format - scores (Tensor[N]): - scores for each one of the boxes - idxs (Tensor[N]): - indices of the categories for each one of the boxes. - iou_threshold (float): - discards all overlapping boxes - with IoU < iou_threshold - - Returns: - Tensor: - int64 tensor with the indices of the elements that have been kept - by NMS, sorted in decreasing order of scores - """ - assert boxes.shape[-1] == 5 - - if boxes.numel() == 0: - return torch.empty((0,), dtype=torch.int64, device=boxes.device) - # Strategy: in order to perform NMS independently per class, - # we add an offset to all the boxes. The offset is dependent - # only on the class idx, and is large enough so that boxes - # from different classes do not overlap - - # Note that batched_nms in torchvision/ops/boxes.py only uses max_coordinate, - # which won't handle negative coordinates correctly. - # Here by using min_coordinate we can make sure the negative coordinates are - # correctly handled. - max_coordinate = ( - torch.max(boxes[:, 0], boxes[:, 1]) + torch.max(boxes[:, 2], boxes[:, 3]) / 2 - ).max() - min_coordinate = ( - torch.min(boxes[:, 0], boxes[:, 1]) - torch.max(boxes[:, 2], boxes[:, 3]) / 2 - ).min() - offsets = idxs.to(boxes) * (max_coordinate - min_coordinate + 1) - boxes_for_nms = boxes.clone() # avoid modifying the original values in boxes - boxes_for_nms[:, :2] += offsets[:, None] - keep = nms_rotated(boxes_for_nms, scores, iou_threshold) - return keep diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py deleted file mode 100644 index f8c4ce1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from detectron2 import _C - - -class _ROIAlign(Function): - @staticmethod - def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): - ctx.save_for_backward(roi) - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.sampling_ratio = sampling_ratio - ctx.input_shape = input.size() - ctx.aligned = aligned - output = _C.roi_align_forward( - input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - (rois,) = ctx.saved_tensors - output_size = ctx.output_size - spatial_scale = ctx.spatial_scale - sampling_ratio = ctx.sampling_ratio - bs, ch, h, w = ctx.input_shape - grad_input = _C.roi_align_backward( - grad_output, - rois, - spatial_scale, - output_size[0], - output_size[1], - bs, - ch, - h, - w, - sampling_ratio, - ctx.aligned, - ) - return grad_input, None, None, None, None, None - - -roi_align = _ROIAlign.apply - - -class ROIAlign(nn.Module): - def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): - """ - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each output - sample. 0 to take samples densely. - aligned (bool): if False, use the legacy implementation in - Detectron. If True, align the results more perfectly. - - Note: - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel indices (in our - pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, - c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled - from the underlying signal at continuous coordinates 0.5 and 1.5). But the original - roi_align (aligned=False) does not subtract the 0.5 when computing neighboring - pixel indices and therefore it uses pixels with a slightly incorrect alignment - (relative to our pixel model) when performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; see - detectron2/tests/test_roi_align.py for verification. - - The difference does not make a difference to the model's performance if - ROIAlign is used together with conv layers. - """ - super(ROIAlign, self).__init__() - self.output_size = output_size - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio - self.aligned = aligned - - def forward(self, input, rois): - """ - Args: - input: NCHW images - rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. - """ - assert rois.dim() == 2 and rois.size(1) == 5 - return roi_align( - input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned - ) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "output_size=" + str(self.output_size) - tmpstr += ", spatial_scale=" + str(self.spatial_scale) - tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) - tmpstr += ", aligned=" + str(self.aligned) - tmpstr += ")" - return tmpstr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py deleted file mode 100644 index 6ed87e6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/roi_align_rotated.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from detectron2 import _C - - -class _ROIAlignRotated(Function): - @staticmethod - def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): - ctx.save_for_backward(roi) - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.sampling_ratio = sampling_ratio - ctx.input_shape = input.size() - output = _C.roi_align_rotated_forward( - input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio - ) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - (rois,) = ctx.saved_tensors - output_size = ctx.output_size - spatial_scale = ctx.spatial_scale - sampling_ratio = ctx.sampling_ratio - bs, ch, h, w = ctx.input_shape - grad_input = _C.roi_align_rotated_backward( - grad_output, - rois, - spatial_scale, - output_size[0], - output_size[1], - bs, - ch, - h, - w, - sampling_ratio, - ) - return grad_input, None, None, None, None, None - - -roi_align_rotated = _ROIAlignRotated.apply - - -class ROIAlignRotated(nn.Module): - def __init__(self, output_size, spatial_scale, sampling_ratio): - """ - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each output - sample. 0 to take samples densely. - - Note: - ROIAlignRotated supports continuous coordinate by default: - Given a continuous coordinate c, its two neighboring pixel indices (in our - pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, - c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled - from the underlying signal at continuous coordinates 0.5 and 1.5). - """ - super(ROIAlignRotated, self).__init__() - self.output_size = output_size - self.spatial_scale = spatial_scale - self.sampling_ratio = sampling_ratio - - def forward(self, input, rois): - """ - Args: - input: NCHW images - rois: Bx6 boxes. First column is the index into N. - The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). - """ - assert rois.dim() == 2 and rois.size(1) == 6 - return roi_align_rotated( - input, rois, self.output_size, self.spatial_scale, self.sampling_ratio - ) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "output_size=" + str(self.output_size) - tmpstr += ", spatial_scale=" + str(self.spatial_scale) - tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) - tmpstr += ")" - return tmpstr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py deleted file mode 100644 index ea9b085..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/rotated_boxes.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from __future__ import absolute_import, division, print_function, unicode_literals - -from detectron2 import _C - - -def pairwise_iou_rotated(boxes1, boxes2): - """ - Return intersection-over-union (Jaccard index) of boxes. - - Both sets of boxes are expected to be in - (x_center, y_center, width, height, angle) format. - - Arguments: - boxes1 (Tensor[N, 5]) - boxes2 (Tensor[M, 5]) - - Returns: - iou (Tensor[N, M]): the NxM matrix containing the pairwise - IoU values for every element in boxes1 and boxes2 - """ - return _C.box_iou_rotated(boxes1, boxes2) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py deleted file mode 100644 index ed7f0d0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/shape_spec.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from collections import namedtuple - - -class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): - """ - A simple structure that contains basic shape specification about a tensor. - It is often used as the auxiliary inputs/outputs of models, - to obtain the shape inference ability among pytorch modules. - - Attributes: - channels: - height: - width: - stride: - """ - - def __new__(cls, *, channels=None, height=None, width=None, stride=None): - return super().__new__(cls, channels, height, width, stride) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py deleted file mode 100644 index 7e3935e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/layers/wrappers.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Wrappers around on some nn functions, mainly to support empty tensors. - -Ideally, add support directly in PyTorch to empty tensors in those functions. - -These can be removed once https://github.com/pytorch/pytorch/issues/12013 -is implemented -""" - -import math -import torch -from torch.nn.modules.utils import _ntuple - -TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) - - -def cat(tensors, dim=0): - """ - Efficient version of torch.cat that avoids a copy if there is only a single element in a list - """ - assert isinstance(tensors, (list, tuple)) - if len(tensors) == 1: - return tensors[0] - return torch.cat(tensors, dim) - - -class _NewEmptyTensorOp(torch.autograd.Function): - @staticmethod - def forward(ctx, x, new_shape): - ctx.shape = x.shape - return x.new_empty(new_shape) - - @staticmethod - def backward(ctx, grad): - shape = ctx.shape - return _NewEmptyTensorOp.apply(grad, shape), None - - -class Conv2d(torch.nn.Conv2d): - """ - A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. - """ - - def __init__(self, *args, **kwargs): - """ - Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: - - Args: - norm (nn.Module, optional): a normalization layer - activation (callable(Tensor) -> Tensor): a callable activation function - - It assumes that norm layer is used before activation. - """ - norm = kwargs.pop("norm", None) - activation = kwargs.pop("activation", None) - super().__init__(*args, **kwargs) - - self.norm = norm - self.activation = activation - - def forward(self, x): - if x.numel() == 0 and self.training: - # https://github.com/pytorch/pytorch/issues/12013 - assert not isinstance( - self.norm, torch.nn.SyncBatchNorm - ), "SyncBatchNorm does not support empty inputs!" - - if x.numel() == 0 and TORCH_VERSION <= (1, 4): - assert not isinstance( - self.norm, torch.nn.GroupNorm - ), "GroupNorm does not support empty inputs in PyTorch <=1.4!" - # When input is empty, we want to return a empty tensor with "correct" shape, - # So that the following operations will not panic - # if they check for the shape of the tensor. - # This computes the height and width of the output tensor - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [x.shape[0], self.weight.shape[0]] + output_shape - empty = _NewEmptyTensorOp.apply(x, output_shape) - if self.training: - # This is to make DDP happy. - # DDP expects all workers to have gradient w.r.t the same set of parameters. - _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + _dummy - else: - return empty - - x = super().forward(x) - if self.norm is not None: - x = self.norm(x) - if self.activation is not None: - x = self.activation(x) - return x - - -if TORCH_VERSION > (1, 4): - ConvTranspose2d = torch.nn.ConvTranspose2d -else: - - class ConvTranspose2d(torch.nn.ConvTranspose2d): - """ - A wrapper around :class:`torch.nn.ConvTranspose2d` to support zero-size tensor. - """ - - def forward(self, x): - if x.numel() > 0: - return super(ConvTranspose2d, self).forward(x) - # get output shape - - # When input is empty, we want to return a empty tensor with "correct" shape, - # So that the following operations will not panic - # if they check for the shape of the tensor. - # This computes the height and width of the output tensor - output_shape = [ - (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op - for i, p, di, k, d, op in zip( - x.shape[-2:], - self.padding, - self.dilation, - self.kernel_size, - self.stride, - self.output_padding, - ) - ] - output_shape = [x.shape[0], self.out_channels] + output_shape - # This is to make DDP happy. - # DDP expects all workers to have gradient w.r.t the same set of parameters. - _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return _NewEmptyTensorOp.apply(x, output_shape) + _dummy - - -if TORCH_VERSION > (1, 4): - BatchNorm2d = torch.nn.BatchNorm2d -else: - - class BatchNorm2d(torch.nn.BatchNorm2d): - """ - A wrapper around :class:`torch.nn.BatchNorm2d` to support zero-size tensor. - """ - - def forward(self, x): - if x.numel() > 0: - return super(BatchNorm2d, self).forward(x) - # get output shape - output_shape = x.shape - return _NewEmptyTensorOp.apply(x, output_shape) - - -if TORCH_VERSION > (1, 5): - Linear = torch.nn.Linear -else: - - class Linear(torch.nn.Linear): - """ - A wrapper around :class:`torch.nn.Linear` to support empty inputs and more features. - Because of https://github.com/pytorch/pytorch/issues/34202 - """ - - def forward(self, x): - if x.numel() == 0: - output_shape = [x.shape[0], self.weight.shape[0]] - - empty = _NewEmptyTensorOp.apply(x, output_shape) - if self.training: - # This is to make DDP happy. - # DDP expects all workers to have gradient w.r.t the same set of parameters. - _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 - return empty + _dummy - else: - return empty - - x = super().forward(x) - return x - - -def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None): - """ - A wrapper around :func:`torch.nn.functional.interpolate` to support zero-size tensor. - """ - if TORCH_VERSION > (1, 4) or input.numel() > 0: - return torch.nn.functional.interpolate( - input, size, scale_factor, mode, align_corners=align_corners - ) - - def _check_size_scale_factor(dim): - if size is None and scale_factor is None: - raise ValueError("either size or scale_factor should be defined") - if size is not None and scale_factor is not None: - raise ValueError("only one of size or scale_factor should be defined") - if ( - scale_factor is not None - and isinstance(scale_factor, tuple) - and len(scale_factor) != dim - ): - raise ValueError( - "scale_factor shape must match input shape. " - "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) - ) - - def _output_size(dim): - _check_size_scale_factor(dim) - if size is not None: - return size - scale_factors = _ntuple(dim)(scale_factor) - # math.floor might return float in py2.7 - return [int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)] - - output_shape = tuple(_output_size(2)) - output_shape = input.shape[:-2] + output_shape - return _NewEmptyTensorOp.apply(input, output_shape) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py deleted file mode 100644 index 886616f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Model Zoo API for Detectron2: a collection of functions to create common model architectures and -optionally load pre-trained weights as released in -`MODEL_ZOO.md `_. -""" -from .model_zoo import get, get_config_file, get_checkpoint_url - -__all__ = ["get_checkpoint_url", "get", "get_config_file"] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py deleted file mode 100644 index 68d0ce5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/model_zoo/model_zoo.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import os -import pkg_resources -import torch - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.modeling import build_model - - -class _ModelZooUrls(object): - """ - Mapping from names to officially released Detectron2 pre-trained models. - """ - - S3_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" - - # format: {config_path.yaml} -> model_id/model_final_{commit}.pkl - CONFIG_PATH_TO_URL_SUFFIX = { - # COCO Detection with Faster R-CNN - "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml": "137257644/model_final_721ade.pkl", - "COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml": "137847829/model_final_51d356.pkl", - "COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml": "137257794/model_final_b275ba.pkl", - "COCO-Detection/faster_rcnn_R_50_C4_3x.yaml": "137849393/model_final_f97cb7.pkl", - "COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml": "137849425/model_final_68d202.pkl", - "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml": "137849458/model_final_280758.pkl", - "COCO-Detection/faster_rcnn_R_101_C4_3x.yaml": "138204752/model_final_298dad.pkl", - "COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml": "138204841/model_final_3e0943.pkl", - "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml": "137851257/model_final_f6e8b1.pkl", - "COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml": "139173657/model_final_68b088.pkl", - # COCO Detection with RetinaNet - "COCO-Detection/retinanet_R_50_FPN_1x.yaml": "137593951/model_final_b796dc.pkl", - "COCO-Detection/retinanet_R_50_FPN_3x.yaml": "137849486/model_final_4cafe0.pkl", - "COCO-Detection/retinanet_R_101_FPN_3x.yaml": "138363263/model_final_59f53c.pkl", - # COCO Detection with RPN and Fast R-CNN - "COCO-Detection/rpn_R_50_C4_1x.yaml": "137258005/model_final_450694.pkl", - "COCO-Detection/rpn_R_50_FPN_1x.yaml": "137258492/model_final_02ce48.pkl", - "COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml": "137635226/model_final_e5f7ce.pkl", - # COCO Instance Segmentation Baselines with Mask R-CNN - "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml": "137259246/model_final_9243eb.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml": "137260150/model_final_4f86c3.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "137260431/model_final_a54504.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml": "137849525/model_final_4ce675.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml": "137849551/model_final_84107b.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml": "137849600/model_final_f10217.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml": "138363239/model_final_a2914c.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml": "138363294/model_final_0464b7.pkl", - "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml": "138205316/model_final_a3ec72.pkl", - "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml": "139653917/model_final_2d9806.pkl", # noqa - # COCO Person Keypoint Detection Baselines with Keypoint R-CNN - "COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml": "137261548/model_final_04e291.pkl", - "COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml": "137849621/model_final_a6e10b.pkl", - "COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml": "138363331/model_final_997cc7.pkl", - "COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml": "139686956/model_final_5ad38f.pkl", - # COCO Panoptic Segmentation Baselines with Panoptic FPN - "COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml": "139514544/model_final_dbfeb4.pkl", - "COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml": "139514569/model_final_c10459.pkl", - "COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml": "139514519/model_final_cafdb1.pkl", - # LVIS Instance Segmentation Baselines with Mask R-CNN - "LVIS-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml": "144219072/model_final_571f7c.pkl", - "LVIS-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml": "144219035/model_final_824ab5.pkl", - "LVIS-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml": "144219108/model_final_5e3439.pkl", # noqa - # Cityscapes & Pascal VOC Baselines - "Cityscapes/mask_rcnn_R_50_FPN.yaml": "142423278/model_final_af9cf5.pkl", - "PascalVOC-Detection/faster_rcnn_R_50_C4.yaml": "142202221/model_final_b1acc2.pkl", - # Other Settings - "Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml": "138602867/model_final_65c703.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml": "144998336/model_final_821d0b.pkl", - "Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml": "138602847/model_final_e9d89b.pkl", - "Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml": "144998488/model_final_480dd8.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml": "169527823/model_final_3b3c51.pkl", - "Misc/mask_rcnn_R_50_FPN_3x_gn.yaml": "138602888/model_final_dc5d9e.pkl", - "Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml": "138602908/model_final_01ca85.pkl", - "Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml": "139797668/model_final_be35db.pkl", - "Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml": "18131413/model_0039999_e76410.pkl", # noqa - # D1 Comparisons - "Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml": "137781054/model_final_7ab50c.pkl", # noqa - "Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml": "137781281/model_final_62ca52.pkl", # noqa - "Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml": "137781195/model_final_cce136.pkl", - } - - -def get_checkpoint_url(config_path): - """ - Returns the URL to the model trained using the given config - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - Returns: - str: a URL to the model - """ - name = config_path.replace(".yaml", "") - if config_path in _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX: - suffix = _ModelZooUrls.CONFIG_PATH_TO_URL_SUFFIX[config_path] - return _ModelZooUrls.S3_PREFIX + name + "/" + suffix - raise RuntimeError("{} not available in Model Zoo!".format(name)) - - -def get_config_file(config_path): - """ - Returns path to a builtin config file. - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - Returns: - str: the real path to the config file. - """ - cfg_file = pkg_resources.resource_filename( - "detectron2.model_zoo", os.path.join("configs", config_path) - ) - if not os.path.exists(cfg_file): - raise RuntimeError("{} not available in Model Zoo!".format(config_path)) - return cfg_file - - -def get(config_path, trained: bool = False): - """ - Get a model specified by relative path under Detectron2's official ``configs/`` directory. - - Args: - config_path (str): config file name relative to detectron2's "configs/" - directory, e.g., "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - trained (bool): If True, will initialize the model with the trained model zoo weights. - If False, the checkpoint specified in the config file's ``MODEL.WEIGHTS`` is used - instead; this will typically (though not always) initialize a subset of weights using - an ImageNet pre-trained model, while randomly initializing the other weights. - - Example: - - .. code-block:: python - - from detectron2 import model_zoo - model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml", trained=True) - """ - cfg_file = get_config_file(config_path) - - cfg = get_cfg() - cfg.merge_from_file(cfg_file) - if trained: - cfg.MODEL.WEIGHTS = get_checkpoint_url(config_path) - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - return model diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py deleted file mode 100644 index 9e23fe4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch - -from detectron2.layers import ShapeSpec - -from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY -from .backbone import ( - BACKBONE_REGISTRY, - FPN, - Backbone, - ResNet, - ResNetBlockBase, - build_backbone, - build_resnet_backbone, - make_stage, -) -from .meta_arch import ( - META_ARCH_REGISTRY, - SEM_SEG_HEADS_REGISTRY, - GeneralizedRCNN, - PanopticFPN, - ProposalNetwork, - RetinaNet, - SemanticSegmentor, - build_model, - build_sem_seg_head, -) -from .postprocessing import detector_postprocess -from .proposal_generator import ( - PROPOSAL_GENERATOR_REGISTRY, - build_proposal_generator, - RPN_HEAD_REGISTRY, - build_rpn_head, -) -from .roi_heads import ( - ROI_BOX_HEAD_REGISTRY, - ROI_HEADS_REGISTRY, - ROI_KEYPOINT_HEAD_REGISTRY, - ROI_MASK_HEAD_REGISTRY, - ROIHeads, - StandardROIHeads, - BaseMaskRCNNHead, - BaseKeypointRCNNHead, - build_box_head, - build_keypoint_head, - build_mask_head, - build_roi_heads, -) -from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA - -_EXCLUDE = {"torch", "ShapeSpec"} -__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] - -assert ( - torch.Tensor([1]) == torch.Tensor([2]) -).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py deleted file mode 100644 index 93927bc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/anchor_generator.py +++ /dev/null @@ -1,382 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from typing import List -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec -from detectron2.structures import Boxes, RotatedBoxes -from detectron2.utils.registry import Registry - -ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") -ANCHOR_GENERATOR_REGISTRY.__doc__ = """ -Registry for modules that creates object detection anchors for feature maps. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -class BufferList(nn.Module): - """ - Similar to nn.ParameterList, but for buffers - """ - - def __init__(self, buffers=None): - super(BufferList, self).__init__() - if buffers is not None: - self.extend(buffers) - - def extend(self, buffers): - offset = len(self) - for i, buffer in enumerate(buffers): - self.register_buffer(str(offset + i), buffer) - return self - - def __len__(self): - return len(self._buffers) - - def __iter__(self): - return iter(self._buffers.values()) - - -def _create_grid_offsets(size: List[int], stride: int, offset: float, device: torch.device): - grid_height, grid_width = size - shifts_x = torch.arange( - offset * stride, grid_width * stride, step=stride, dtype=torch.float32, device=device - ) - shifts_y = torch.arange( - offset * stride, grid_height * stride, step=stride, dtype=torch.float32, device=device - ) - - shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) - shift_x = shift_x.reshape(-1) - shift_y = shift_y.reshape(-1) - return shift_x, shift_y - - -def _broadcast_params(params, num_features, name): - """ - If one size (or aspect ratio) is specified and there are multiple feature - maps, we "broadcast" anchors of that single size (or aspect ratio) - over all feature maps. - - If params is list[float], or list[list[float]] with len(params) == 1, repeat - it num_features time. - - Returns: - list[list[float]]: param for each feature - """ - assert isinstance( - params, (list, tuple) - ), f"{name} in anchor generator has to be a list! Got {params}." - assert len(params), f"{name} in anchor generator cannot be empty!" - if not isinstance(params[0], (list, tuple)): # list[float] - return [params] * num_features - if len(params) == 1: - return list(params) * num_features - assert len(params) == num_features, ( - f"Got {name} of length {len(params)} in anchor generator, " - f"but the number of input features is {num_features}!" - ) - return params - - -@ANCHOR_GENERATOR_REGISTRY.register() -class DefaultAnchorGenerator(nn.Module): - """ - Compute anchors in the standard ways described in - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". - """ - - box_dim: int = 4 - """ - the dimension of each anchor box. - """ - - @configurable - def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): - """ - This interface is experimental. - - Args: - sizes (list[list[float]] or list[float]): - If sizes is list[list[float]], sizes[i] is the list of anchor sizes - (i.e. sqrt of anchor area) to use for the i-th feature map. - If sizes is list[float], the sizes are used for all feature maps. - Anchor sizes are given in absolute lengths in units of - the input image; they do not dynamically scale if the input image size changes. - aspect_ratios (list[list[float]] or list[float]): list of aspect ratios - (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. - strides (list[int]): stride of each input feature. - offset (float): Relative offset between the center of the first anchor and the top-left - corner of the image. Value has to be in [0, 1). - Recommend to use 0.5, which means half stride. - """ - super().__init__() - - self.strides = strides - self.num_features = len(self.strides) - sizes = _broadcast_params(sizes, self.num_features, "sizes") - aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") - self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) - - self.offset = offset - assert 0.0 <= self.offset < 1.0, self.offset - - @classmethod - def from_config(cls, cfg, input_shape: List[ShapeSpec]): - return { - "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, - "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, - "strides": [x.stride for x in input_shape], - "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, - } - - def _calculate_anchors(self, sizes, aspect_ratios): - cell_anchors = [ - self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) - ] - return BufferList(cell_anchors) - - @property - def num_cell_anchors(self): - """ - Alias of `num_anchors`. - """ - return self.num_anchors - - @property - def num_anchors(self): - """ - Returns: - list[int]: Each int is the number of anchors at every pixel - location, on that feature map. - For example, if at every pixel we use anchors of 3 aspect - ratios and 5 sizes, the number of anchors is 15. - (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) - - In standard RPN models, `num_anchors` on every feature map is the same. - """ - return [len(cell_anchors) for cell_anchors in self.cell_anchors] - - def _grid_anchors(self, grid_sizes: List[List[int]]): - """ - Returns: - list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 - """ - anchors = [] - for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): - shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) - shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) - - anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) - - return anchors - - def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): - """ - Generate a tensor storing canonical anchor boxes, which are all anchor - boxes of different sizes and aspect_ratios centered at (0, 0). - We can later build the set of anchors for a full feature map by - shifting and tiling these tensors (see `meth:_grid_anchors`). - - Args: - sizes (tuple[float]): - aspect_ratios (tuple[float]]): - - Returns: - Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes - in XYXY format. - """ - - # This is different from the anchor generator defined in the original Faster R-CNN - # code or Detectron. They yield the same AP, however the old version defines cell - # anchors in a less natural way with a shift relative to the feature grid and - # quantization that results in slightly different sizes for different aspect ratios. - # See also https://github.com/facebookresearch/Detectron/issues/227 - - anchors = [] - for size in sizes: - area = size ** 2.0 - for aspect_ratio in aspect_ratios: - # s * s = w * h - # a = h / w - # ... some algebra ... - # w = sqrt(s * s / a) - # h = a * w - w = math.sqrt(area / aspect_ratio) - h = aspect_ratio * w - x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 - anchors.append([x0, y0, x1, y1]) - return torch.tensor(anchors) - - def forward(self, features): - """ - Args: - features (list[Tensor]): list of backbone feature maps on which to generate anchors. - - Returns: - list[Boxes]: a list of Boxes containing all the anchors for each feature map - (i.e. the cell anchors repeated over all locations in the feature map). - The number of anchors of each feature map is Hi x Wi x num_cell_anchors, - where Hi, Wi are resolution of the feature map divided by anchor stride. - """ - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) - return [Boxes(x) for x in anchors_over_all_feature_maps] - - -@ANCHOR_GENERATOR_REGISTRY.register() -class RotatedAnchorGenerator(nn.Module): - """ - Compute rotated anchors used by Rotated RPN (RRPN), described in - "Arbitrary-Oriented Scene Text Detection via Rotation Proposals". - """ - - box_dim: int = 5 - """ - the dimension of each anchor box. - """ - - @configurable - def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): - """ - This interface is experimental. - - Args: - sizes (list[list[float]] or list[float]): - If sizes is list[list[float]], sizes[i] is the list of anchor sizes - (i.e. sqrt of anchor area) to use for the i-th feature map. - If sizes is list[float], the sizes are used for all feature maps. - Anchor sizes are given in absolute lengths in units of - the input image; they do not dynamically scale if the input image size changes. - aspect_ratios (list[list[float]] or list[float]): list of aspect ratios - (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. - strides (list[int]): stride of each input feature. - angles (list[list[float]] or list[float]): list of angles (in degrees CCW) - to use for anchors. Same "broadcast" rule for `sizes` applies. - offset (float): Relative offset between the center of the first anchor and the top-left - corner of the image. Value has to be in [0, 1). - Recommend to use 0.5, which means half stride. - """ - super().__init__() - - self.strides = strides - self.num_features = len(self.strides) - sizes = _broadcast_params(sizes, self.num_features, "sizes") - aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") - angles = _broadcast_params(angles, self.num_features, "angles") - self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) - - self.offset = offset - assert 0.0 <= self.offset < 1.0, self.offset - - @classmethod - def from_config(cls, cfg, input_shape: List[ShapeSpec]): - return { - "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, - "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, - "strides": [x.stride for x in input_shape], - "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, - "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, - } - - def _calculate_anchors(self, sizes, aspect_ratios, angles): - cell_anchors = [ - self.generate_cell_anchors(size, aspect_ratio, angle).float() - for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) - ] - return BufferList(cell_anchors) - - @property - def num_cell_anchors(self): - """ - Alias of `num_anchors`. - """ - return self.num_anchors - - @property - def num_anchors(self): - """ - Returns: - list[int]: Each int is the number of anchors at every pixel - location, on that feature map. - For example, if at every pixel we use anchors of 3 aspect - ratios, 2 sizes and 5 angles, the number of anchors is 30. - (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS - and ANCHOR_GENERATOR.ANGLES in config) - - In standard RRPN models, `num_anchors` on every feature map is the same. - """ - return [len(cell_anchors) for cell_anchors in self.cell_anchors] - - def _grid_anchors(self, grid_sizes): - anchors = [] - for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): - shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors.device) - zeros = torch.zeros_like(shift_x) - shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) - - anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) - - return anchors - - def generate_cell_anchors( - self, - sizes=(32, 64, 128, 256, 512), - aspect_ratios=(0.5, 1, 2), - angles=(-90, -60, -30, 0, 30, 60, 90), - ): - """ - Generate a tensor storing canonical anchor boxes, which are all anchor - boxes of different sizes, aspect_ratios, angles centered at (0, 0). - We can later build the set of anchors for a full feature map by - shifting and tiling these tensors (see `meth:_grid_anchors`). - - Args: - sizes (tuple[float]): - aspect_ratios (tuple[float]]): - angles (tuple[float]]): - - Returns: - Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) - storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. - """ - anchors = [] - for size in sizes: - area = size ** 2.0 - for aspect_ratio in aspect_ratios: - # s * s = w * h - # a = h / w - # ... some algebra ... - # w = sqrt(s * s / a) - # h = a * w - w = math.sqrt(area / aspect_ratio) - h = aspect_ratio * w - anchors.extend([0, 0, w, h, a] for a in angles) - - return torch.tensor(anchors) - - def forward(self, features): - """ - Args: - features (list[Tensor]): list of backbone feature maps on which to generate anchors. - - Returns: - list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map - (i.e. the cell anchors repeated over all locations in the feature map). - The number of anchors of each feature map is Hi x Wi x num_cell_anchors, - where Hi, Wi are resolution of the feature map divided by anchor stride. - """ - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) - return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] - - -def build_anchor_generator(cfg, input_shape): - """ - Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. - """ - anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME - return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py deleted file mode 100644 index d477fb1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip - -from .backbone import Backbone -from .fpn import FPN -from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage - -__all__ = [k for k in globals().keys() if not k.startswith("_")] -# TODO can expose more resnet blocks after careful consideration diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py deleted file mode 100644 index 66dee4a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/backbone.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from abc import ABCMeta, abstractmethod -import torch.nn as nn - -from detectron2.layers import ShapeSpec - -__all__ = ["Backbone"] - - -class Backbone(nn.Module, metaclass=ABCMeta): - """ - Abstract base class for network backbones. - """ - - def __init__(self): - """ - The `__init__` method of any subclass can specify its own set of arguments. - """ - super().__init__() - - @abstractmethod - def forward(self): - """ - Subclasses must override this method, but adhere to the same return type. - - Returns: - dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor - """ - pass - - @property - def size_divisibility(self): - """ - Some backbones require the input height and width to be divisible by a - specific integer. This is typically true for encoder / decoder type networks - with lateral connection (e.g., FPN) for which feature maps need to match - dimension in the "bottom up" and "top down" paths. Set to 0 if no specific - input size divisibility is required. - """ - return 0 - - def output_shape(self): - """ - Returns: - dict[str->ShapeSpec] - """ - # this is a backward-compatible default - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py deleted file mode 100644 index 3d2ecae..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/build.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from detectron2.layers import ShapeSpec -from detectron2.utils.registry import Registry - -from .backbone import Backbone - -BACKBONE_REGISTRY = Registry("BACKBONE") -BACKBONE_REGISTRY.__doc__ = """ -Registry for backbones, which extract feature maps from images - -The registered object must be a callable that accepts two arguments: - -1. A :class:`detectron2.config.CfgNode` -2. A :class:`detectron2.layers.ShapeSpec`, which contains the input shape specification. - -It must returns an instance of :class:`Backbone`. -""" - - -def build_backbone(cfg, input_shape=None): - """ - Build a backbone from `cfg.MODEL.BACKBONE.NAME`. - - Returns: - an instance of :class:`Backbone` - """ - if input_shape is None: - input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) - - backbone_name = cfg.MODEL.BACKBONE.NAME - backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) - assert isinstance(backbone, Backbone) - return backbone diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py deleted file mode 100644 index 338b5f5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/fpn.py +++ /dev/null @@ -1,245 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -import fvcore.nn.weight_init as weight_init -import torch.nn.functional as F -from torch import nn - -from detectron2.layers import Conv2d, ShapeSpec, get_norm - -from .backbone import Backbone -from .build import BACKBONE_REGISTRY -from .resnet import build_resnet_backbone - -__all__ = ["build_resnet_fpn_backbone", "build_retinanet_resnet_fpn_backbone", "FPN"] - - -class FPN(Backbone): - """ - This module implements :paper:`FPN`. - It creates pyramid features built on top of some input feature maps. - """ - - def __init__( - self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum" - ): - """ - Args: - bottom_up (Backbone): module representing the bottom up subnetwork. - Must be a subclass of :class:`Backbone`. The multi-scale feature - maps generated by the bottom up network, and listed in `in_features`, - are used to generate FPN levels. - in_features (list[str]): names of the input feature maps coming - from the backbone to which FPN is attached. For example, if the - backbone produces ["res2", "res3", "res4"], any *contiguous* sublist - of these may be used; order must be from high to low resolution. - out_channels (int): number of channels in the output feature maps. - norm (str): the normalization to use. - top_block (nn.Module or None): if provided, an extra operation will - be performed on the output of the last (smallest resolution) - FPN output, and the result will extend the result list. The top_block - further downsamples the feature map. It must have an attribute - "num_levels", meaning the number of extra FPN levels added by - this block, and "in_feature", which is a string representing - its input feature (e.g., p5). - fuse_type (str): types for fusing the top down features and the lateral - ones. It can be "sum" (default), which sums up element-wise; or "avg", - which takes the element-wise mean of the two. - """ - super(FPN, self).__init__() - assert isinstance(bottom_up, Backbone) - - # Feature map strides and channels from the bottom up network (e.g. ResNet) - input_shapes = bottom_up.output_shape() - in_strides = [input_shapes[f].stride for f in in_features] - in_channels = [input_shapes[f].channels for f in in_features] - - _assert_strides_are_log2_contiguous(in_strides) - lateral_convs = [] - output_convs = [] - - use_bias = norm == "" - for idx, in_channels in enumerate(in_channels): - lateral_norm = get_norm(norm, out_channels) - output_norm = get_norm(norm, out_channels) - - lateral_conv = Conv2d( - in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm - ) - output_conv = Conv2d( - out_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - bias=use_bias, - norm=output_norm, - ) - weight_init.c2_xavier_fill(lateral_conv) - weight_init.c2_xavier_fill(output_conv) - stage = int(math.log2(in_strides[idx])) - self.add_module("fpn_lateral{}".format(stage), lateral_conv) - self.add_module("fpn_output{}".format(stage), output_conv) - - lateral_convs.append(lateral_conv) - output_convs.append(output_conv) - # Place convs into top-down order (from low to high resolution) - # to make the top-down computation in forward clearer. - self.lateral_convs = lateral_convs[::-1] - self.output_convs = output_convs[::-1] - self.top_block = top_block - self.in_features = in_features - self.bottom_up = bottom_up - # Return feature names are "p", like ["p2", "p3", ..., "p6"] - self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides} - # top block output feature maps. - if self.top_block is not None: - for s in range(stage, stage + self.top_block.num_levels): - self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) - - self._out_features = list(self._out_feature_strides.keys()) - self._out_feature_channels = {k: out_channels for k in self._out_features} - self._size_divisibility = in_strides[-1] - assert fuse_type in {"avg", "sum"} - self._fuse_type = fuse_type - - @property - def size_divisibility(self): - return self._size_divisibility - - def forward(self, x): - """ - Args: - input (dict[str->Tensor]): mapping feature map name (e.g., "res5") to - feature map tensor for each feature level in high to low resolution order. - - Returns: - dict[str->Tensor]: - mapping from feature map name to FPN feature map tensor - in high to low resolution order. Returned feature names follow the FPN - paper convention: "p", where stage has stride = 2 ** stage e.g., - ["p2", "p3", ..., "p6"]. - """ - # Reverse feature maps into top-down order (from low to high resolution) - bottom_up_features = self.bottom_up(x) - x = [bottom_up_features[f] for f in self.in_features[::-1]] - results = [] - prev_features = self.lateral_convs[0](x[0]) - results.append(self.output_convs[0](prev_features)) - for features, lateral_conv, output_conv in zip( - x[1:], self.lateral_convs[1:], self.output_convs[1:] - ): - top_down_features = F.interpolate(prev_features, scale_factor=2, mode="nearest") - lateral_features = lateral_conv(features) - prev_features = lateral_features + top_down_features - if self._fuse_type == "avg": - prev_features /= 2 - results.insert(0, output_conv(prev_features)) - - if self.top_block is not None: - top_block_in_feature = bottom_up_features.get(self.top_block.in_feature, None) - if top_block_in_feature is None: - top_block_in_feature = results[self._out_features.index(self.top_block.in_feature)] - results.extend(self.top_block(top_block_in_feature)) - assert len(self._out_features) == len(results) - return dict(zip(self._out_features, results)) - - def output_shape(self): - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } - - -def _assert_strides_are_log2_contiguous(strides): - """ - Assert that each stride is 2x times its preceding stride, i.e. "contiguous in log2". - """ - for i, stride in enumerate(strides[1:], 1): - assert stride == 2 * strides[i - 1], "Strides {} {} are not log2 contiguous".format( - stride, strides[i - 1] - ) - - -class LastLevelMaxPool(nn.Module): - """ - This module is used in the original FPN to generate a downsampled - P6 feature from P5. - """ - - def __init__(self): - super().__init__() - self.num_levels = 1 - self.in_feature = "p5" - - def forward(self, x): - return [F.max_pool2d(x, kernel_size=1, stride=2, padding=0)] - - -class LastLevelP6P7(nn.Module): - """ - This module is used in RetinaNet to generate extra layers, P6 and P7 from - C5 feature. - """ - - def __init__(self, in_channels, out_channels, in_feature="res5"): - super().__init__() - self.num_levels = 2 - self.in_feature = in_feature - self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) - self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) - for module in [self.p6, self.p7]: - weight_init.c2_xavier_fill(module) - - def forward(self, c5): - p6 = self.p6(c5) - p7 = self.p7(F.relu(p6)) - return [p6, p7] - - -@BACKBONE_REGISTRY.register() -def build_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): - """ - Args: - cfg: a detectron2 CfgNode - - Returns: - backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. - """ - bottom_up = build_resnet_backbone(cfg, input_shape) - in_features = cfg.MODEL.FPN.IN_FEATURES - out_channels = cfg.MODEL.FPN.OUT_CHANNELS - backbone = FPN( - bottom_up=bottom_up, - in_features=in_features, - out_channels=out_channels, - norm=cfg.MODEL.FPN.NORM, - top_block=LastLevelMaxPool(), - fuse_type=cfg.MODEL.FPN.FUSE_TYPE, - ) - return backbone - - -@BACKBONE_REGISTRY.register() -def build_retinanet_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): - """ - Args: - cfg: a detectron2 CfgNode - - Returns: - backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. - """ - bottom_up = build_resnet_backbone(cfg, input_shape) - in_features = cfg.MODEL.FPN.IN_FEATURES - out_channels = cfg.MODEL.FPN.OUT_CHANNELS - in_channels_p6p7 = bottom_up.output_shape()["res5"].channels - backbone = FPN( - bottom_up=bottom_up, - in_features=in_features, - out_channels=out_channels, - norm=cfg.MODEL.FPN.NORM, - top_block=LastLevelP6P7(in_channels_p6p7, out_channels), - fuse_type=cfg.MODEL.FPN.FUSE_TYPE, - ) - return backbone diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py deleted file mode 100644 index f1faae0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/backbone/resnet.py +++ /dev/null @@ -1,591 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn.functional as F -from torch import nn - -from detectron2.layers import ( - CNNBlockBase, - Conv2d, - DeformConv, - ModulatedDeformConv, - ShapeSpec, - get_norm, -) - -from .backbone import Backbone -from .build import BACKBONE_REGISTRY - -__all__ = [ - "ResNetBlockBase", - "BasicBlock", - "BottleneckBlock", - "DeformBottleneckBlock", - "BasicStem", - "ResNet", - "make_stage", - "build_resnet_backbone", -] - - -ResNetBlockBase = CNNBlockBase -""" -Alias for backward compatibiltiy. -""" - - -class BasicBlock(CNNBlockBase): - """ - The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`, - with two 3x3 conv layers and a projection shortcut if needed. - """ - - def __init__(self, in_channels, out_channels, *, stride=1, norm="BN"): - """ - Args: - in_channels (int): Number of input channels. - out_channels (int): Number of output channels. - stride (int): Stride for the first conv. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - """ - super().__init__(in_channels, out_channels, stride) - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - self.conv1 = Conv2d( - in_channels, - out_channels, - kernel_size=3, - stride=stride, - padding=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - self.conv2 = Conv2d( - out_channels, - out_channels, - kernel_size=3, - stride=1, - padding=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - out = self.conv2(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -class BottleneckBlock(CNNBlockBase): - """ - The standard bottleneck residual block used by ResNet-50, 101 and 152 - defined in :paper:`ResNet`. It contains 3 conv layers with kernels - 1x1, 3x3, 1x1, and a projection shortcut if needed. - """ - - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - dilation=1, - ): - """ - Args: - bottleneck_channels (int): number of output channels for the 3x3 - "bottleneck" conv layers. - num_groups (int): number of groups for the 3x3 conv layer. - norm (str or callable): normalization for all conv layers. - See :func:`layers.get_norm` for supported format. - stride_in_1x1 (bool): when stride>1, whether to put stride in the - first 1x1 convolution or the bottleneck 3x3 convolution. - dilation (int): the dilation rate of the 3x3 conv layer. - """ - super().__init__(in_channels, out_channels, stride) - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - # The original MSRA ResNet models have stride in the first 1x1 conv - # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have - # stride in the 3x3 conv - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv2 = Conv2d( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - bias=False, - groups=num_groups, - dilation=dilation, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - # Zero-initialize the last normalization in each residual branch, - # so that at the beginning, the residual branch starts with zeros, - # and each residual block behaves like an identity. - # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": - # "For BN layers, the learnable scaling coefficient γ is initialized - # to be 1, except for each residual block's last BN - # where γ is initialized to be 0." - - # nn.init.constant_(self.conv3.norm.weight, 0) - # TODO this somehow hurts performance when training GN models from scratch. - # Add it as an option when we need to use this code to train a backbone. - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - - out = self.conv2(out) - out = F.relu_(out) - - out = self.conv3(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -class DeformBottleneckBlock(ResNetBlockBase): - """ - Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv ` - in the 3x3 convolution. - """ - - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - dilation=1, - deform_modulated=False, - deform_num_groups=1, - ): - super().__init__(in_channels, out_channels, stride) - self.deform_modulated = deform_modulated - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - if deform_modulated: - deform_conv_op = ModulatedDeformConv - # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size - offset_channels = 27 - else: - deform_conv_op = DeformConv - offset_channels = 18 - - self.conv2_offset = Conv2d( - bottleneck_channels, - offset_channels * deform_num_groups, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - dilation=dilation, - ) - self.conv2 = deform_conv_op( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - padding=1 * dilation, - bias=False, - groups=num_groups, - dilation=dilation, - deformable_groups=deform_num_groups, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - nn.init.constant_(self.conv2_offset.weight, 0) - nn.init.constant_(self.conv2_offset.bias, 0) - - def forward(self, x): - out = self.conv1(x) - out = F.relu_(out) - - if self.deform_modulated: - offset_mask = self.conv2_offset(out) - offset_x, offset_y, mask = torch.chunk(offset_mask, 3, dim=1) - offset = torch.cat((offset_x, offset_y), dim=1) - mask = mask.sigmoid() - out = self.conv2(out, offset, mask) - else: - offset = self.conv2_offset(out) - out = self.conv2(out, offset) - out = F.relu_(out) - - out = self.conv3(out) - - if self.shortcut is not None: - shortcut = self.shortcut(x) - else: - shortcut = x - - out += shortcut - out = F.relu_(out) - return out - - -def make_stage(block_class, num_blocks, first_stride, *, in_channels, out_channels, **kwargs): - """ - Create a list of blocks just like those in a ResNet stage. - - Args: - block_class (type): a subclass of ResNetBlockBase - num_blocks (int): - first_stride (int): the stride of the first block. The other blocks will have stride=1. - in_channels (int): input channels of the entire stage. - out_channels (int): output channels of **every block** in the stage. - kwargs: other arguments passed to the constructor of every block. - - Returns: - list[nn.Module]: a list of block module. - """ - assert "stride" not in kwargs, "Stride of blocks in make_stage cannot be changed." - blocks = [] - for i in range(num_blocks): - blocks.append( - block_class( - in_channels=in_channels, - out_channels=out_channels, - stride=first_stride if i == 0 else 1, - **kwargs, - ) - ) - in_channels = out_channels - return blocks - - -class BasicStem(CNNBlockBase): - """ - The standard ResNet stem (layers before the first residual block). - """ - - def __init__(self, in_channels=3, out_channels=64, norm="BN"): - """ - Args: - norm (str or callable): norm after the first conv layer. - See :func:`layers.get_norm` for supported format. - """ - super().__init__(in_channels, out_channels, 4) - self.in_channels = in_channels - self.conv1 = Conv2d( - in_channels, - out_channels, - kernel_size=7, - stride=2, - padding=3, - bias=False, - norm=get_norm(norm, out_channels), - ) - weight_init.c2_msra_fill(self.conv1) - - def forward(self, x): - x = self.conv1(x) - x = F.relu_(x) - x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1) - return x - - -class ResNet(Backbone): - """ - Implement :paper:`ResNet`. - """ - - def __init__(self, stem, stages, num_classes=None, out_features=None): - """ - Args: - stem (nn.Module): a stem module - stages (list[list[CNNBlockBase]]): several (typically 4) stages, - each contains multiple :class:`CNNBlockBase`. - num_classes (None or int): if None, will not perform classification. - Otherwise, will create a linear layer. - out_features (list[str]): name of the layers whose outputs should - be returned in forward. Can be anything in "stem", "linear", or "res2" ... - If None, will return the output of the last layer. - """ - super(ResNet, self).__init__() - self.stem = stem - self.num_classes = num_classes - - current_stride = self.stem.stride - self._out_feature_strides = {"stem": current_stride} - self._out_feature_channels = {"stem": self.stem.out_channels} - - self.stages_and_names = [] - for i, blocks in enumerate(stages): - assert len(blocks) > 0, len(blocks) - for block in blocks: - assert isinstance(block, CNNBlockBase), block - - name = "res" + str(i + 2) - stage = nn.Sequential(*blocks) - - self.add_module(name, stage) - self.stages_and_names.append((stage, name)) - - self._out_feature_strides[name] = current_stride = int( - current_stride * np.prod([k.stride for k in blocks]) - ) - self._out_feature_channels[name] = curr_channels = blocks[-1].out_channels - - if num_classes is not None: - self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) - self.linear = nn.Linear(curr_channels, num_classes) - - # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": - # "The 1000-way fully-connected layer is initialized by - # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." - nn.init.normal_(self.linear.weight, std=0.01) - name = "linear" - - if out_features is None: - out_features = [name] - self._out_features = out_features - assert len(self._out_features) - children = [x[0] for x in self.named_children()] - for out_feature in self._out_features: - assert out_feature in children, "Available children: {}".format(", ".join(children)) - - def forward(self, x): - outputs = {} - x = self.stem(x) - if "stem" in self._out_features: - outputs["stem"] = x - for stage, name in self.stages_and_names: - x = stage(x) - if name in self._out_features: - outputs[name] = x - if self.num_classes is not None: - x = self.avgpool(x) - x = torch.flatten(x, 1) - x = self.linear(x) - if "linear" in self._out_features: - outputs["linear"] = x - return outputs - - def output_shape(self): - return { - name: ShapeSpec( - channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] - ) - for name in self._out_features - } - - def freeze(self, freeze_at=0): - """ - Freeze the first several stages of the ResNet. Commonly used in - fine-tuning. - - Layers that produce the same feature map spatial size are defined as one - "stage" by :paper:`FPN`. - - Args: - freeze_at (int): number of stages to freeze. - `1` means freezing the stem. `2` means freezing the stem and - one residual stage, etc. - - Returns: - nn.Module: this ResNet itself - """ - if freeze_at >= 1: - self.stem.freeze() - for idx, (stage, _) in enumerate(self.stages_and_names, start=2): - if freeze_at >= idx: - for block in stage.children(): - block.freeze() - return self - - -@BACKBONE_REGISTRY.register() -def build_resnet_backbone(cfg, input_shape): - """ - Create a ResNet instance from config. - - Returns: - ResNet: a :class:`ResNet` instance. - """ - # need registration of new blocks/stems? - norm = cfg.MODEL.RESNETS.NORM - stem = BasicStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - - # fmt: off - freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT - out_features = cfg.MODEL.RESNETS.OUT_FEATURES - depth = cfg.MODEL.RESNETS.DEPTH - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group - in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION - deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE - deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED - deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS - # fmt: on - assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) - - num_blocks_per_stage = { - 18: [2, 2, 2, 2], - 34: [3, 4, 6, 3], - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], - }[depth] - - if depth in [18, 34]: - assert out_channels == 64, "Must set MODEL.RESNETS.RES2_OUT_CHANNELS = 64 for R18/R34" - assert not any( - deform_on_per_stage - ), "MODEL.RESNETS.DEFORM_ON_PER_STAGE unsupported for R18/R34" - assert res5_dilation == 1, "Must set MODEL.RESNETS.RES5_DILATION = 1 for R18/R34" - assert num_groups == 1, "Must set MODEL.RESNETS.NUM_GROUPS = 1 for R18/R34" - - stages = [] - - # Avoid creating variables without gradients - # It consumes extra memory and may cause allreduce to fail - out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features] - max_stage_idx = max(out_stage_idx) - for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): - dilation = res5_dilation if stage_idx == 5 else 1 - first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 - stage_kargs = { - "num_blocks": num_blocks_per_stage[idx], - "first_stride": first_stride, - "in_channels": in_channels, - "out_channels": out_channels, - "norm": norm, - } - # Use BasicBlock for R18 and R34. - if depth in [18, 34]: - stage_kargs["block_class"] = BasicBlock - else: - stage_kargs["bottleneck_channels"] = bottleneck_channels - stage_kargs["stride_in_1x1"] = stride_in_1x1 - stage_kargs["dilation"] = dilation - stage_kargs["num_groups"] = num_groups - if deform_on_per_stage[idx]: - stage_kargs["block_class"] = DeformBottleneckBlock - stage_kargs["deform_modulated"] = deform_modulated - stage_kargs["deform_num_groups"] = deform_num_groups - else: - stage_kargs["block_class"] = BottleneckBlock - blocks = make_stage(**stage_kargs) - in_channels = out_channels - out_channels *= 2 - bottleneck_channels *= 2 - stages.append(blocks) - return ResNet(stem, stages, out_features=out_features).freeze(freeze_at) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py deleted file mode 100644 index 88426fd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/box_regression.py +++ /dev/null @@ -1,247 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from typing import Tuple -import torch - -# Value for clamping large dw and dh predictions. The heuristic is that we clamp -# such that dw and dh are no larger than what would transform a 16px box into a -# 1000px box (based on a small anchor, 16px, and a typical image size, 1000px). -_DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16) - - -__all__ = ["Box2BoxTransform", "Box2BoxTransformRotated"] - - -def apply_deltas_broadcast(box2box_transform, deltas, boxes): - """ - Apply transform deltas to boxes. Similar to `box2box_transform.apply_deltas`, - but allow broadcasting boxes when the second dimension of deltas is a multiple - of box dimension. - - Args: - box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): the transform to apply - deltas (Tensor): tensor of shape (N,B) or (N,KxB) - boxes (Tensor): tensor of shape (N,B) - - Returns: - Tensor: same shape as deltas. - """ - assert deltas.dim() == boxes.dim() == 2, f"{deltas.shape}, {boxes.shape}" - N, B = boxes.shape - assert ( - deltas.shape[1] % B == 0 - ), f"Second dim of deltas should be a multiple of {B}. Got {deltas.shape}" - K = deltas.shape[1] // B - ret = box2box_transform.apply_deltas( - deltas.view(N * K, B), boxes.unsqueeze(1).expand(N, K, B).reshape(N * K, B) - ) - return ret.view(N, K * B) - - -@torch.jit.script -class Box2BoxTransform(object): - """ - The box-to-box transform defined in R-CNN. The transformation is parameterized - by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height - by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height). - """ - - def __init__( - self, weights: Tuple[float, float, float, float], scale_clamp: float = _DEFAULT_SCALE_CLAMP - ): - """ - Args: - weights (4-element tuple): Scaling factors that are applied to the - (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set - such that the deltas have unit variance; now they are treated as - hyperparameters of the system. - scale_clamp (float): When predicting deltas, the predicted box scaling - factors (dw and dh) are clamped such that they are <= scale_clamp. - """ - self.weights = weights - self.scale_clamp = scale_clamp - - def get_deltas(self, src_boxes, target_boxes): - """ - Get box regression transformation deltas (dx, dy, dw, dh) that can be used - to transform the `src_boxes` into the `target_boxes`. That is, the relation - ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless - any delta is too large and is clamped). - - Args: - src_boxes (Tensor): source boxes, e.g., object proposals - target_boxes (Tensor): target of the transformation, e.g., ground-truth - boxes. - """ - assert isinstance(src_boxes, torch.Tensor), type(src_boxes) - assert isinstance(target_boxes, torch.Tensor), type(target_boxes) - - src_widths = src_boxes[:, 2] - src_boxes[:, 0] - src_heights = src_boxes[:, 3] - src_boxes[:, 1] - src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths - src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights - - target_widths = target_boxes[:, 2] - target_boxes[:, 0] - target_heights = target_boxes[:, 3] - target_boxes[:, 1] - target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths - target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights - - wx, wy, ww, wh = self.weights - dx = wx * (target_ctr_x - src_ctr_x) / src_widths - dy = wy * (target_ctr_y - src_ctr_y) / src_heights - dw = ww * torch.log(target_widths / src_widths) - dh = wh * torch.log(target_heights / src_heights) - - deltas = torch.stack((dx, dy, dw, dh), dim=1) - assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!" - return deltas - - def apply_deltas(self, deltas, boxes): - """ - Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`. - - Args: - deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1. - deltas[i] represents k potentially different class-specific - box transformations for the single box boxes[i]. - boxes (Tensor): boxes to transform, of shape (N, 4) - """ - boxes = boxes.to(deltas.dtype) - - widths = boxes[:, 2] - boxes[:, 0] - heights = boxes[:, 3] - boxes[:, 1] - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - wx, wy, ww, wh = self.weights - dx = deltas[:, 0::4] / wx - dy = deltas[:, 1::4] / wy - dw = deltas[:, 2::4] / ww - dh = deltas[:, 3::4] / wh - - # Prevent sending too large values into torch.exp() - dw = torch.clamp(dw, max=self.scale_clamp) - dh = torch.clamp(dh, max=self.scale_clamp) - - pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] - pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] - pred_w = torch.exp(dw) * widths[:, None] - pred_h = torch.exp(dh) * heights[:, None] - - pred_boxes = torch.zeros_like(deltas) - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w # x1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h # y1 - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w # x2 - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h # y2 - return pred_boxes - - -@torch.jit.script -class Box2BoxTransformRotated(object): - """ - The box-to-box transform defined in Rotated R-CNN. The transformation is parameterized - by 5 deltas: (dx, dy, dw, dh, da). The transformation scales the box's width and height - by exp(dw), exp(dh), shifts a box's center by the offset (dx * width, dy * height), - and rotate a box's angle by da (radians). - Note: angles of deltas are in radians while angles of boxes are in degrees. - """ - - def __init__( - self, - weights: Tuple[float, float, float, float, float], - scale_clamp: float = _DEFAULT_SCALE_CLAMP, - ): - """ - Args: - weights (5-element tuple): Scaling factors that are applied to the - (dx, dy, dw, dh, da) deltas. These are treated as - hyperparameters of the system. - scale_clamp (float): When predicting deltas, the predicted box scaling - factors (dw and dh) are clamped such that they are <= scale_clamp. - """ - self.weights = weights - self.scale_clamp = scale_clamp - - def get_deltas(self, src_boxes, target_boxes): - """ - Get box regression transformation deltas (dx, dy, dw, dh, da) that can be used - to transform the `src_boxes` into the `target_boxes`. That is, the relation - ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless - any delta is too large and is clamped). - - Args: - src_boxes (Tensor): Nx5 source boxes, e.g., object proposals - target_boxes (Tensor): Nx5 target of the transformation, e.g., ground-truth - boxes. - """ - assert isinstance(src_boxes, torch.Tensor), type(src_boxes) - assert isinstance(target_boxes, torch.Tensor), type(target_boxes) - - src_ctr_x, src_ctr_y, src_widths, src_heights, src_angles = torch.unbind(src_boxes, dim=1) - - target_ctr_x, target_ctr_y, target_widths, target_heights, target_angles = torch.unbind( - target_boxes, dim=1 - ) - - wx, wy, ww, wh, wa = self.weights - dx = wx * (target_ctr_x - src_ctr_x) / src_widths - dy = wy * (target_ctr_y - src_ctr_y) / src_heights - dw = ww * torch.log(target_widths / src_widths) - dh = wh * torch.log(target_heights / src_heights) - # Angles of deltas are in radians while angles of boxes are in degrees. - # the conversion to radians serve as a way to normalize the values - da = target_angles - src_angles - da = (da + 180.0) % 360.0 - 180.0 # make it in [-180, 180) - da *= wa * math.pi / 180.0 - - deltas = torch.stack((dx, dy, dw, dh, da), dim=1) - assert ( - (src_widths > 0).all().item() - ), "Input boxes to Box2BoxTransformRotated are not valid!" - return deltas - - def apply_deltas(self, deltas, boxes): - """ - Apply transformation `deltas` (dx, dy, dw, dh, da) to `boxes`. - - Args: - deltas (Tensor): transformation deltas of shape (N, 5). - deltas[i] represents box transformation for the single box boxes[i]. - boxes (Tensor): boxes to transform, of shape (N, 5) - """ - assert deltas.shape[1] == 5 and boxes.shape[1] == 5 - - boxes = boxes.to(deltas.dtype) - - ctr_x = boxes[:, 0] - ctr_y = boxes[:, 1] - widths = boxes[:, 2] - heights = boxes[:, 3] - angles = boxes[:, 4] - - wx, wy, ww, wh, wa = self.weights - - dx = deltas[:, 0] / wx - dy = deltas[:, 1] / wy - dw = deltas[:, 2] / ww - dh = deltas[:, 3] / wh - da = deltas[:, 4] / wa - - # Prevent sending too large values into torch.exp() - dw = torch.clamp(dw, max=self.scale_clamp) - dh = torch.clamp(dh, max=self.scale_clamp) - - pred_boxes = torch.zeros_like(deltas) - pred_boxes[:, 0] = dx * widths + ctr_x # x_ctr - pred_boxes[:, 1] = dy * heights + ctr_y # y_ctr - pred_boxes[:, 2] = torch.exp(dw) * widths # width - pred_boxes[:, 3] = torch.exp(dh) * heights # height - - # Following original RRPN implementation, - # angles of deltas are in radians while angles of boxes are in degrees. - pred_angle = da * 180.0 / math.pi + angles - pred_angle = (pred_angle + 180.0) % 360.0 - 180.0 # make it in [-180, 180) - - pred_boxes[:, 4] = pred_angle - - return pred_boxes diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py deleted file mode 100644 index 2911f8c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/matcher.py +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import List -import torch - - -class Matcher(object): - """ - This class assigns to each predicted "element" (e.g., a box) a ground-truth - element. Each predicted element will have exactly zero or one matches; each - ground-truth element may be matched to zero or more predicted elements. - - The matching is determined by the MxN match_quality_matrix, that characterizes - how well each (ground-truth, prediction)-pair match each other. For example, - if the elements are boxes, this matrix may contain box intersection-over-union - overlap values. - - The matcher returns (a) a vector of length N containing the index of the - ground-truth element m in [0, M) that matches to prediction n in [0, N). - (b) a vector of length N containing the labels for each prediction. - """ - - def __init__( - self, thresholds: List[float], labels: List[int], allow_low_quality_matches: bool = False - ): - """ - Args: - thresholds (list): a list of thresholds used to stratify predictions - into levels. - labels (list): a list of values to label predictions belonging at - each level. A label can be one of {-1, 0, 1} signifying - {ignore, negative class, positive class}, respectively. - allow_low_quality_matches (bool): if True, produce additional matches - for predictions with maximum match quality lower than high_threshold. - See set_low_quality_matches_ for more details. - - For example, - thresholds = [0.3, 0.5] - labels = [0, -1, 1] - All predictions with iou < 0.3 will be marked with 0 and - thus will be considered as false positives while training. - All predictions with 0.3 <= iou < 0.5 will be marked with -1 and - thus will be ignored. - All predictions with 0.5 <= iou will be marked with 1 and - thus will be considered as true positives. - """ - # Add -inf and +inf to first and last position in thresholds - thresholds = thresholds[:] - assert thresholds[0] > 0 - thresholds.insert(0, -float("inf")) - thresholds.append(float("inf")) - assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])) - assert all(l in [-1, 0, 1] for l in labels) - assert len(labels) == len(thresholds) - 1 - self.thresholds = thresholds - self.labels = labels - self.allow_low_quality_matches = allow_low_quality_matches - - def __call__(self, match_quality_matrix): - """ - Args: - match_quality_matrix (Tensor[float]): an MxN tensor, containing the - pairwise quality between M ground-truth elements and N predicted - elements. All elements must be >= 0 (due to the us of `torch.nonzero` - for selecting indices in :meth:`set_low_quality_matches_`). - - Returns: - matches (Tensor[int64]): a vector of length N, where matches[i] is a matched - ground-truth index in [0, M) - match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates - whether a prediction is a true or false positive or ignored - """ - assert match_quality_matrix.dim() == 2 - if match_quality_matrix.numel() == 0: - default_matches = match_quality_matrix.new_full( - (match_quality_matrix.size(1),), 0, dtype=torch.int64 - ) - # When no gt boxes exist, we define IOU = 0 and therefore set labels - # to `self.labels[0]`, which usually defaults to background class 0 - # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds - default_match_labels = match_quality_matrix.new_full( - (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 - ) - return default_matches, default_match_labels - - assert torch.all(match_quality_matrix >= 0) - - # match_quality_matrix is M (gt) x N (predicted) - # Max over gt elements (dim 0) to find best gt candidate for each prediction - matched_vals, matches = match_quality_matrix.max(dim=0) - - match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - - for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): - low_high = (matched_vals >= low) & (matched_vals < high) - match_labels[low_high] = l - - if self.allow_low_quality_matches: - self.set_low_quality_matches_(match_labels, match_quality_matrix) - - return matches, match_labels - - def set_low_quality_matches_(self, match_labels, match_quality_matrix): - """ - Produce additional matches for predictions that have only low-quality matches. - Specifically, for each ground-truth G find the set of predictions that have - maximum overlap with it (including ties); for each prediction in that set, if - it is unmatched, then match it to the ground-truth G. - - This function implements the RPN assignment case (i) in Sec. 3.1.2 of - :paper:`Faster R-CNN`. - """ - # For each gt, find the prediction with which it has highest quality - highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) - # Find the highest quality match available, even if it is low, including ties. - # Note that the matches qualities must be positive due to the use of - # `torch.nonzero`. - _, pred_inds_with_highest_quality = torch.nonzero( - match_quality_matrix == highest_quality_foreach_gt[:, None], as_tuple=True - ) - # If an anchor was labeled positive only due to a low-quality match - # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. - # This follows the implementation in Detectron, and is found to have no significant impact. - match_labels[pred_inds_with_highest_quality] = 1 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py deleted file mode 100644 index 96ef9b5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from .build import META_ARCH_REGISTRY, build_model # isort:skip - -from .panoptic_fpn import PanopticFPN - -# import all the meta_arch, so they will be registered -from .rcnn import GeneralizedRCNN, ProposalNetwork -from .retinanet import RetinaNet -from .semantic_seg import SEM_SEG_HEADS_REGISTRY, SemanticSegmentor, build_sem_seg_head diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py deleted file mode 100644 index 630389d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/build.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch - -from detectron2.utils.registry import Registry - -META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip -META_ARCH_REGISTRY.__doc__ = """ -Registry for meta-architectures, i.e. the whole model. - -The registered object will be called with `obj(cfg)` -and expected to return a `nn.Module` object. -""" - - -def build_model(cfg): - """ - Build the whole model architecture, defined by ``cfg.MODEL.META_ARCHITECTURE``. - Note that it does not load any weights from ``cfg``. - """ - meta_arch = cfg.MODEL.META_ARCHITECTURE - model = META_ARCH_REGISTRY.get(meta_arch)(cfg) - model.to(torch.device(cfg.MODEL.DEVICE)) - return model diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py deleted file mode 100644 index c5f92f7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/panoptic_fpn.py +++ /dev/null @@ -1,218 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -from torch import nn - -from detectron2.structures import ImageList - -from ..backbone import build_backbone -from ..postprocessing import detector_postprocess, sem_seg_postprocess -from ..proposal_generator import build_proposal_generator -from ..roi_heads import build_roi_heads -from .build import META_ARCH_REGISTRY -from .semantic_seg import build_sem_seg_head - -__all__ = ["PanopticFPN"] - - -@META_ARCH_REGISTRY.register() -class PanopticFPN(nn.Module): - """ - Implement the paper :paper:`PanopticFPN`. - """ - - def __init__(self, cfg): - super().__init__() - - self.instance_loss_weight = cfg.MODEL.PANOPTIC_FPN.INSTANCE_LOSS_WEIGHT - - # options when combining instance & semantic outputs - self.combine_on = cfg.MODEL.PANOPTIC_FPN.COMBINE.ENABLED - self.combine_overlap_threshold = cfg.MODEL.PANOPTIC_FPN.COMBINE.OVERLAP_THRESH - self.combine_stuff_area_limit = cfg.MODEL.PANOPTIC_FPN.COMBINE.STUFF_AREA_LIMIT - self.combine_instances_confidence_threshold = ( - cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH - ) - - self.backbone = build_backbone(cfg) - self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) - self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) - self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) - - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper`. - Each item in the list contains the inputs for one image. - - For now, each item in the list is a dict that contains: - - * "image": Tensor, image in (C, H, W) format. - * "instances": Instances - * "sem_seg": semantic segmentation ground truth. - * Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - list[dict]: - each dict is the results for one image. The dict contains the following keys: - - * "instances": see :meth:`GeneralizedRCNN.forward` for its format. - * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. - * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`. - See the return value of - :func:`combine_semantic_and_instance_outputs` for its format. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - features = self.backbone(images.tensor) - - if "proposals" in batched_inputs[0]: - proposals = [x["proposals"].to(self.device) for x in batched_inputs] - proposal_losses = {} - - if "sem_seg" in batched_inputs[0]: - gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] - gt_sem_seg = ImageList.from_tensors( - gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value - ).tensor - else: - gt_sem_seg = None - sem_seg_results, sem_seg_losses = self.sem_seg_head(features, gt_sem_seg) - - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - if self.proposal_generator: - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - detector_results, detector_losses = self.roi_heads( - images, features, proposals, gt_instances - ) - - if self.training: - losses = {} - losses.update(sem_seg_losses) - losses.update({k: v * self.instance_loss_weight for k, v in detector_losses.items()}) - losses.update(proposal_losses) - return losses - - processed_results = [] - for sem_seg_result, detector_result, input_per_image, image_size in zip( - sem_seg_results, detector_results, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) - detector_r = detector_postprocess(detector_result, height, width) - - processed_results.append({"sem_seg": sem_seg_r, "instances": detector_r}) - - if self.combine_on: - panoptic_r = combine_semantic_and_instance_outputs( - detector_r, - sem_seg_r.argmax(dim=0), - self.combine_overlap_threshold, - self.combine_stuff_area_limit, - self.combine_instances_confidence_threshold, - ) - processed_results[-1]["panoptic_seg"] = panoptic_r - return processed_results - - -def combine_semantic_and_instance_outputs( - instance_results, - semantic_results, - overlap_threshold, - stuff_area_limit, - instances_confidence_threshold, -): - """ - Implement a simple combining logic following - "combine_semantic_and_instance_predictions.py" in panopticapi - to produce panoptic segmentation outputs. - - Args: - instance_results: output of :func:`detector_postprocess`. - semantic_results: an (H, W) tensor, each is the contiguous semantic - category id - - Returns: - panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. - segments_info (list[dict]): Describe each segment in `panoptic_seg`. - Each dict contains keys "id", "category_id", "isthing". - """ - panoptic_seg = torch.zeros_like(semantic_results, dtype=torch.int32) - - # sort instance outputs by scores - sorted_inds = torch.argsort(-instance_results.scores) - - current_segment_id = 0 - segments_info = [] - - instance_masks = instance_results.pred_masks.to(dtype=torch.bool, device=panoptic_seg.device) - - # Add instances one-by-one, check for overlaps with existing ones - for inst_id in sorted_inds: - score = instance_results.scores[inst_id].item() - if score < instances_confidence_threshold: - break - mask = instance_masks[inst_id] # H,W - mask_area = mask.sum().item() - - if mask_area == 0: - continue - - intersect = (mask > 0) & (panoptic_seg > 0) - intersect_area = intersect.sum().item() - - if intersect_area * 1.0 / mask_area > overlap_threshold: - continue - - if intersect_area > 0: - mask = mask & (panoptic_seg == 0) - - current_segment_id += 1 - panoptic_seg[mask] = current_segment_id - segments_info.append( - { - "id": current_segment_id, - "isthing": True, - "score": score, - "category_id": instance_results.pred_classes[inst_id].item(), - "instance_id": inst_id.item(), - } - ) - - # Add semantic results to remaining empty areas - semantic_labels = torch.unique(semantic_results).cpu().tolist() - for semantic_label in semantic_labels: - if semantic_label == 0: # 0 is a special "thing" class - continue - mask = (semantic_results == semantic_label) & (panoptic_seg == 0) - mask_area = mask.sum().item() - if mask_area < stuff_area_limit: - continue - - current_segment_id += 1 - panoptic_seg[mask] = current_segment_id - segments_info.append( - { - "id": current_segment_id, - "isthing": False, - "category_id": semantic_label, - "area": mask_area, - } - ) - - return panoptic_seg, segments_info diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py deleted file mode 100644 index b15ea8a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/rcnn.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import numpy as np -import torch -from torch import nn - -from detectron2.structures import ImageList -from detectron2.utils.events import get_event_storage -from detectron2.utils.logger import log_first_n - -from ..backbone import build_backbone -from ..postprocessing import detector_postprocess -from ..proposal_generator import build_proposal_generator -from ..roi_heads import build_roi_heads -from .build import META_ARCH_REGISTRY - -__all__ = ["GeneralizedRCNN", "ProposalNetwork"] - - -@META_ARCH_REGISTRY.register() -class GeneralizedRCNN(nn.Module): - """ - Generalized R-CNN. Any models that contains the following three components: - 1. Per-image feature extraction (aka backbone) - 2. Region proposal generation - 3. Per-region feature extraction and prediction - """ - - def __init__(self, cfg): - super().__init__() - - self.backbone = build_backbone(cfg) - self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) - self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape()) - self.vis_period = cfg.VIS_PERIOD - self.input_format = cfg.INPUT.FORMAT - - assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD) - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - @property - def device(self): - return self.pixel_mean.device - - def visualize_training(self, batched_inputs, proposals): - """ - A function used to visualize images and proposals. It shows ground truth - bounding boxes on the original image and up to 20 predicted object - proposals on the original image. Users can implement different - visualization functions for different models. - - Args: - batched_inputs (list): a list that contains input to the model. - proposals (list): a list that contains predicted proposals. Both - batched_inputs and proposals should have the same length. - """ - from detectron2.utils.visualizer import Visualizer - - storage = get_event_storage() - max_vis_prop = 20 - - for input, prop in zip(batched_inputs, proposals): - img = input["image"].cpu().numpy() - assert img.shape[0] == 3, "Images should have 3 channels." - if self.input_format == "BGR": - img = img[::-1, :, :] - img = img.transpose(1, 2, 0) - v_gt = Visualizer(img, None) - v_gt = v_gt.overlay_instances(boxes=input["instances"].gt_boxes) - anno_img = v_gt.get_image() - box_size = min(len(prop.proposal_boxes), max_vis_prop) - v_pred = Visualizer(img, None) - v_pred = v_pred.overlay_instances( - boxes=prop.proposal_boxes[0:box_size].tensor.cpu().numpy() - ) - prop_img = v_pred.get_image() - vis_img = np.concatenate((anno_img, prop_img), axis=1) - vis_img = vis_img.transpose(2, 0, 1) - vis_name = "Left: GT bounding boxes; Right: Predicted proposals" - storage.put_image(vis_name, vis_img) - break # only visualize one image in a batch - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - - * image: Tensor, image in (C, H, W) format. - * instances (optional): groundtruth :class:`Instances` - * proposals (optional): :class:`Instances`, precomputed proposals. - - Other information that's included in the original dicts, such as: - - * "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "instances" whose value is a :class:`Instances`. - The :class:`Instances` object has the following keys: - "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" - """ - if not self.training: - return self.inference(batched_inputs) - - images = self.preprocess_image(batched_inputs) - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - elif "targets" in batched_inputs[0]: - log_first_n( - logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 - ) - gt_instances = [x["targets"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - - features = self.backbone(images.tensor) - - if self.proposal_generator: - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - else: - assert "proposals" in batched_inputs[0] - proposals = [x["proposals"].to(self.device) for x in batched_inputs] - proposal_losses = {} - - _, detector_losses = self.roi_heads(images, features, proposals, gt_instances) - if self.vis_period > 0: - storage = get_event_storage() - if storage.iter % self.vis_period == 0: - self.visualize_training(batched_inputs, proposals) - - losses = {} - losses.update(detector_losses) - losses.update(proposal_losses) - return losses - - def inference(self, batched_inputs, detected_instances=None, do_postprocess=True): - """ - Run inference on the given inputs. - - Args: - batched_inputs (list[dict]): same as in :meth:`forward` - detected_instances (None or list[Instances]): if not None, it - contains an `Instances` object per image. The `Instances` - object contains "pred_boxes" and "pred_classes" which are - known boxes in the image. - The inference will then skip the detection of bounding boxes, - and only predict other per-ROI outputs. - do_postprocess (bool): whether to apply post-processing on the outputs. - - Returns: - same as in :meth:`forward`. - """ - assert not self.training - - images = self.preprocess_image(batched_inputs) - features = self.backbone(images.tensor) - - if detected_instances is None: - if self.proposal_generator: - proposals, _ = self.proposal_generator(images, features, None) - else: - assert "proposals" in batched_inputs[0] - proposals = [x["proposals"].to(self.device) for x in batched_inputs] - - results, _ = self.roi_heads(images, features, proposals, None) - else: - detected_instances = [x.to(self.device) for x in detected_instances] - results = self.roi_heads.forward_with_given_boxes(features, detected_instances) - - if do_postprocess: - return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes) - else: - return results - - def preprocess_image(self, batched_inputs): - """ - Normalize, pad and batch the input images. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - return images - - @staticmethod - def _postprocess(instances, batched_inputs, image_sizes): - """ - Rescale the output instances to the target size. - """ - # note: private function; subject to changes - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - instances, batched_inputs, image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"instances": r}) - return processed_results - - -@META_ARCH_REGISTRY.register() -class ProposalNetwork(nn.Module): - """ - A meta architecture that only predicts object proposals. - """ - - def __init__(self, cfg): - super().__init__() - self.backbone = build_backbone(cfg) - self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) - - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - Same as in :class:`GeneralizedRCNN.forward` - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "proposals" whose value is a - :class:`Instances` with keys "proposal_boxes" and "objectness_logits". - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - features = self.backbone(images.tensor) - - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - elif "targets" in batched_inputs[0]: - log_first_n( - logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 - ) - gt_instances = [x["targets"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - proposals, proposal_losses = self.proposal_generator(images, features, gt_instances) - # In training, the proposals are not useful at all but we generate them anyway. - # This makes RPN-only models about 5% slower. - if self.training: - return proposal_losses - - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - proposals, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"proposals": r}) - return processed_results diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py deleted file mode 100644 index 35c42cc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/retinanet.py +++ /dev/null @@ -1,489 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import math -import numpy as np -from typing import List -import torch -from fvcore.nn import sigmoid_focal_loss_jit, smooth_l1_loss -from torch import nn - -from detectron2.layers import ShapeSpec, batched_nms, cat -from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage -from detectron2.utils.logger import log_first_n - -from ..anchor_generator import build_anchor_generator -from ..backbone import build_backbone -from ..box_regression import Box2BoxTransform -from ..matcher import Matcher -from ..postprocessing import detector_postprocess -from .build import META_ARCH_REGISTRY - -__all__ = ["RetinaNet"] - - -def permute_to_N_HWA_K(tensor, K): - """ - Transpose/reshape a tensor from (N, (A x K), H, W) to (N, (HxWxA), K) - """ - assert tensor.dim() == 4, tensor.shape - N, _, H, W = tensor.shape - tensor = tensor.view(N, -1, K, H, W) - tensor = tensor.permute(0, 3, 4, 1, 2) - tensor = tensor.reshape(N, -1, K) # Size=(N,HWA,K) - return tensor - - -def permute_all_cls_and_box_to_N_HWA_K_and_concat(box_cls, box_delta, num_classes=80): - """ - Rearrange the tensor layout from the network output, i.e.: - list[Tensor]: #lvl tensors of shape (N, A x K, Hi, Wi) - to per-image predictions, i.e.: - Tensor: of shape (N x sum(Hi x Wi x A), K) - """ - # for each feature level, permute the outputs to make them be in the - # same format as the labels. Note that the labels are computed for - # all feature levels concatenated, so we keep the same representation - # for the objectness and the box_delta - box_cls_flattened = [permute_to_N_HWA_K(x, num_classes) for x in box_cls] - box_delta_flattened = [permute_to_N_HWA_K(x, 4) for x in box_delta] - # concatenate on the first dimension (representing the feature levels), to - # take into account the way the labels were generated (with all feature maps - # being concatenated as well) - box_cls = cat(box_cls_flattened, dim=1).view(-1, num_classes) - box_delta = cat(box_delta_flattened, dim=1).view(-1, 4) - return box_cls, box_delta - - -@META_ARCH_REGISTRY.register() -class RetinaNet(nn.Module): - """ - Implement RetinaNet in :paper:`RetinaNet`. - """ - - def __init__(self, cfg): - super().__init__() - - # fmt: off - self.num_classes = cfg.MODEL.RETINANET.NUM_CLASSES - self.in_features = cfg.MODEL.RETINANET.IN_FEATURES - # Loss parameters: - self.focal_loss_alpha = cfg.MODEL.RETINANET.FOCAL_LOSS_ALPHA - self.focal_loss_gamma = cfg.MODEL.RETINANET.FOCAL_LOSS_GAMMA - self.smooth_l1_loss_beta = cfg.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA - # Inference parameters: - self.score_threshold = cfg.MODEL.RETINANET.SCORE_THRESH_TEST - self.topk_candidates = cfg.MODEL.RETINANET.TOPK_CANDIDATES_TEST - self.nms_threshold = cfg.MODEL.RETINANET.NMS_THRESH_TEST - self.max_detections_per_image = cfg.TEST.DETECTIONS_PER_IMAGE - # Vis parameters - self.vis_period = cfg.VIS_PERIOD - self.input_format = cfg.INPUT.FORMAT - # fmt: on - - self.backbone = build_backbone(cfg) - - backbone_shape = self.backbone.output_shape() - feature_shapes = [backbone_shape[f] for f in self.in_features] - self.head = RetinaNetHead(cfg, feature_shapes) - self.anchor_generator = build_anchor_generator(cfg, feature_shapes) - - # Matching and loss - self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) - self.matcher = Matcher( - cfg.MODEL.RETINANET.IOU_THRESHOLDS, - cfg.MODEL.RETINANET.IOU_LABELS, - allow_low_quality_matches=True, - ) - - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - """ - In Detectron1, loss is normalized by number of foreground samples in the batch. - When batch size is 1 per GPU, #foreground has a large variance and - using it lead to lower performance. Here we maintain an EMA of #foreground to - stabilize the normalizer. - """ - self.loss_normalizer = 100 # initialize with any reasonable #fg that's not too small - self.loss_normalizer_momentum = 0.9 - - @property - def device(self): - return self.pixel_mean.device - - def visualize_training(self, batched_inputs, results): - """ - A function used to visualize ground truth images and final network predictions. - It shows ground truth bounding boxes on the original image and up to 20 - predicted object bounding boxes on the original image. - - Args: - batched_inputs (list): a list that contains input to the model. - results (List[Instances]): a list of #images elements. - """ - from detectron2.utils.visualizer import Visualizer - - assert len(batched_inputs) == len( - results - ), "Cannot visualize inputs and results of different sizes" - storage = get_event_storage() - max_boxes = 20 - - image_index = 0 # only visualize a single image - img = batched_inputs[image_index]["image"].cpu().numpy() - assert img.shape[0] == 3, "Images should have 3 channels." - if self.input_format == "BGR": - img = img[::-1, :, :] - img = img.transpose(1, 2, 0) - v_gt = Visualizer(img, None) - v_gt = v_gt.overlay_instances(boxes=batched_inputs[image_index]["instances"].gt_boxes) - anno_img = v_gt.get_image() - processed_results = detector_postprocess(results[image_index], img.shape[0], img.shape[1]) - predicted_boxes = processed_results.pred_boxes.tensor.detach().cpu().numpy() - - v_pred = Visualizer(img, None) - v_pred = v_pred.overlay_instances(boxes=predicted_boxes[0:max_boxes]) - prop_img = v_pred.get_image() - vis_img = np.vstack((anno_img, prop_img)) - vis_img = vis_img.transpose(2, 0, 1) - vis_name = f"Top: GT bounding boxes; Bottom: {max_boxes} Highest Scoring Results" - storage.put_image(vis_name, vis_img) - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - - * image: Tensor, image in (C, H, W) format. - * instances: Instances - - Other information that's included in the original dicts, such as: - - * "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - Returns: - dict[str: Tensor]: - mapping from a named loss to a tensor storing the loss. Used during training only. - """ - images = self.preprocess_image(batched_inputs) - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - elif "targets" in batched_inputs[0]: - log_first_n( - logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 - ) - gt_instances = [x["targets"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - - features = self.backbone(images.tensor) - features = [features[f] for f in self.in_features] - box_cls, box_delta = self.head(features) - anchors = self.anchor_generator(features) - - if self.training: - gt_classes, gt_anchors_reg_deltas = self.get_ground_truth(anchors, gt_instances) - losses = self.losses(gt_classes, gt_anchors_reg_deltas, box_cls, box_delta) - - if self.vis_period > 0: - storage = get_event_storage() - if storage.iter % self.vis_period == 0: - results = self.inference(box_cls, box_delta, anchors, images.image_sizes) - self.visualize_training(batched_inputs, results) - - return losses - else: - results = self.inference(box_cls, box_delta, anchors, images.image_sizes) - processed_results = [] - for results_per_image, input_per_image, image_size in zip( - results, batched_inputs, images.image_sizes - ): - height = input_per_image.get("height", image_size[0]) - width = input_per_image.get("width", image_size[1]) - r = detector_postprocess(results_per_image, height, width) - processed_results.append({"instances": r}) - return processed_results - - def losses(self, gt_classes, gt_anchors_deltas, pred_class_logits, pred_anchor_deltas): - """ - Args: - For `gt_classes` and `gt_anchors_deltas` parameters, see - :meth:`RetinaNet.get_ground_truth`. - Their shapes are (N, R) and (N, R, 4), respectively, where R is - the total number of anchors across levels, i.e. sum(Hi x Wi x A) - For `pred_class_logits` and `pred_anchor_deltas`, see - :meth:`RetinaNetHead.forward`. - - Returns: - dict[str, Tensor]: - mapping from a named loss to a scalar tensor - storing the loss. Used during training only. The dict keys are: - "loss_cls" and "loss_box_reg" - """ - pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( - pred_class_logits, pred_anchor_deltas, self.num_classes - ) # Shapes: (N x R, K) and (N x R, 4), respectively. - - gt_classes = gt_classes.flatten() - gt_anchors_deltas = gt_anchors_deltas.view(-1, 4) - - valid_idxs = gt_classes >= 0 - foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes) - num_foreground = foreground_idxs.sum().item() - get_event_storage().put_scalar("num_foreground", num_foreground) - self.loss_normalizer = ( - self.loss_normalizer_momentum * self.loss_normalizer - + (1 - self.loss_normalizer_momentum) * num_foreground - ) - - gt_classes_target = torch.zeros_like(pred_class_logits) - gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1 - - # logits loss - loss_cls = sigmoid_focal_loss_jit( - pred_class_logits[valid_idxs], - gt_classes_target[valid_idxs], - alpha=self.focal_loss_alpha, - gamma=self.focal_loss_gamma, - reduction="sum", - ) / max(1, self.loss_normalizer) - - # regression loss - loss_box_reg = smooth_l1_loss( - pred_anchor_deltas[foreground_idxs], - gt_anchors_deltas[foreground_idxs], - beta=self.smooth_l1_loss_beta, - reduction="sum", - ) / max(1, self.loss_normalizer) - - return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg} - - @torch.no_grad() - def get_ground_truth(self, anchors, targets): - """ - Args: - anchors (list[Boxes]): A list of #feature level Boxes. - The Boxes contains anchors of this image on the specific feature level. - targets (list[Instances]): a list of N `Instances`s. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. Specify `targets` during training only. - - Returns: - gt_classes (Tensor): - An integer tensor of shape (N, R) storing ground-truth labels for each anchor. - R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels. - Anchors with an IoU with some target higher than the foreground threshold - are assigned their corresponding label in the [0, K-1] range. - Anchors whose IoU are below the background threshold are assigned - the label "K". Anchors whose IoU are between the foreground and background - thresholds are assigned a label "-1", i.e. ignore. - gt_anchors_deltas (Tensor): - Shape (N, R, 4). - The last dimension represents ground-truth box2box transform - targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. - The values in the tensor are meaningful only when the corresponding - anchor is labeled as foreground. - """ - gt_classes = [] - gt_anchors_deltas = [] - anchors = Boxes.cat(anchors) # Rx4 - - for targets_per_image in targets: - match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, anchors) - gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix) - - has_gt = len(targets_per_image) > 0 - if has_gt: - # ground truth box regression - matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs] - gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas( - anchors.tensor, matched_gt_boxes.tensor - ) - - gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs] - # Anchors with label 0 are treated as background. - gt_classes_i[anchor_labels == 0] = self.num_classes - # Anchors with label -1 are ignored. - gt_classes_i[anchor_labels == -1] = -1 - else: - gt_classes_i = torch.zeros_like(gt_matched_idxs) + self.num_classes - gt_anchors_reg_deltas_i = torch.zeros_like(anchors.tensor) - - gt_classes.append(gt_classes_i) - gt_anchors_deltas.append(gt_anchors_reg_deltas_i) - - return torch.stack(gt_classes), torch.stack(gt_anchors_deltas) - - def inference(self, box_cls, box_delta, anchors, image_sizes): - """ - Arguments: - box_cls, box_delta: Same as the output of :meth:`RetinaNetHead.forward` - anchors (list[Boxes]): A list of #feature level Boxes. - The Boxes contain anchors of this image on the specific feature level. - image_sizes (List[torch.Size]): the input image sizes - - Returns: - results (List[Instances]): a list of #images elements. - """ - results = [] - - box_cls = [permute_to_N_HWA_K(x, self.num_classes) for x in box_cls] - box_delta = [permute_to_N_HWA_K(x, 4) for x in box_delta] - # list[Tensor], one per level, each has shape (N, Hi x Wi x A, K or 4) - - for img_idx, image_size in enumerate(image_sizes): - box_cls_per_image = [box_cls_per_level[img_idx] for box_cls_per_level in box_cls] - box_reg_per_image = [box_reg_per_level[img_idx] for box_reg_per_level in box_delta] - results_per_image = self.inference_single_image( - box_cls_per_image, box_reg_per_image, anchors, tuple(image_size) - ) - results.append(results_per_image) - return results - - def inference_single_image(self, box_cls, box_delta, anchors, image_size): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Arguments: - box_cls (list[Tensor]): list of #feature levels. Each entry contains - tensor of size (H x W x A, K) - box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4. - anchors (list[Boxes]): list of #feature levels. Each entry contains - a Boxes object, which contains all the anchors for that - image in that feature level. - image_size (tuple(H, W)): a tuple of the image height and width. - - Returns: - Same as `inference`, but for only one image. - """ - boxes_all = [] - scores_all = [] - class_idxs_all = [] - - # Iterate over every feature level - for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta, anchors): - # (HxWxAxK,) - box_cls_i = box_cls_i.flatten().sigmoid_() - - # Keep top k top scoring indices only. - num_topk = min(self.topk_candidates, box_reg_i.size(0)) - # torch.sort is actually faster than .topk (at least on GPUs) - predicted_prob, topk_idxs = box_cls_i.sort(descending=True) - predicted_prob = predicted_prob[:num_topk] - topk_idxs = topk_idxs[:num_topk] - - # filter out the proposals with low confidence score - keep_idxs = predicted_prob > self.score_threshold - predicted_prob = predicted_prob[keep_idxs] - topk_idxs = topk_idxs[keep_idxs] - - anchor_idxs = topk_idxs // self.num_classes - classes_idxs = topk_idxs % self.num_classes - - box_reg_i = box_reg_i[anchor_idxs] - anchors_i = anchors_i[anchor_idxs] - # predict boxes - predicted_boxes = self.box2box_transform.apply_deltas(box_reg_i, anchors_i.tensor) - - boxes_all.append(predicted_boxes) - scores_all.append(predicted_prob) - class_idxs_all.append(classes_idxs) - - boxes_all, scores_all, class_idxs_all = [ - cat(x) for x in [boxes_all, scores_all, class_idxs_all] - ] - keep = batched_nms(boxes_all, scores_all, class_idxs_all, self.nms_threshold) - keep = keep[: self.max_detections_per_image] - - result = Instances(image_size) - result.pred_boxes = Boxes(boxes_all[keep]) - result.scores = scores_all[keep] - result.pred_classes = class_idxs_all[keep] - return result - - def preprocess_image(self, batched_inputs): - """ - Normalize, pad and batch the input images. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - return images - - -class RetinaNetHead(nn.Module): - """ - The head used in RetinaNet for object classification and box regression. - It has two subnets for the two tasks, with a common structure but separate parameters. - """ - - def __init__(self, cfg, input_shape: List[ShapeSpec]): - super().__init__() - # fmt: off - in_channels = input_shape[0].channels - num_classes = cfg.MODEL.RETINANET.NUM_CLASSES - num_convs = cfg.MODEL.RETINANET.NUM_CONVS - prior_prob = cfg.MODEL.RETINANET.PRIOR_PROB - num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors - # fmt: on - assert ( - len(set(num_anchors)) == 1 - ), "Using different number of anchors between levels is not currently supported!" - num_anchors = num_anchors[0] - - cls_subnet = [] - bbox_subnet = [] - for _ in range(num_convs): - cls_subnet.append( - nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) - ) - cls_subnet.append(nn.ReLU()) - bbox_subnet.append( - nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) - ) - bbox_subnet.append(nn.ReLU()) - - self.cls_subnet = nn.Sequential(*cls_subnet) - self.bbox_subnet = nn.Sequential(*bbox_subnet) - self.cls_score = nn.Conv2d( - in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 - ) - self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1) - - # Initialization - for modules in [self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred]: - for layer in modules.modules(): - if isinstance(layer, nn.Conv2d): - torch.nn.init.normal_(layer.weight, mean=0, std=0.01) - torch.nn.init.constant_(layer.bias, 0) - - # Use prior in model initialization to improve stability - bias_value = -(math.log((1 - prior_prob) / prior_prob)) - torch.nn.init.constant_(self.cls_score.bias, bias_value) - - def forward(self, features): - """ - Arguments: - features (list[Tensor]): FPN feature map tensors in high to low resolution. - Each tensor in the list correspond to different feature levels. - - Returns: - logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). - The tensor predicts the classification probability - at each spatial position for each of the A anchors and K object - classes. - bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). - The tensor predicts 4-vector (dx,dy,dw,dh) box - regression values for every anchor. These values are the - relative offset between the anchor and the ground truth box. - """ - logits = [] - bbox_reg = [] - for feature in features: - logits.append(self.cls_score(self.cls_subnet(feature))) - bbox_reg.append(self.bbox_pred(self.bbox_subnet(feature))) - return logits, bbox_reg diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py deleted file mode 100644 index 2c41a72..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/meta_arch/semantic_seg.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from typing import Dict -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import Conv2d, ShapeSpec -from detectron2.structures import ImageList -from detectron2.utils.registry import Registry - -from ..backbone import build_backbone -from ..postprocessing import sem_seg_postprocess -from .build import META_ARCH_REGISTRY - -__all__ = ["SemanticSegmentor", "SEM_SEG_HEADS_REGISTRY", "SemSegFPNHead", "build_sem_seg_head"] - - -SEM_SEG_HEADS_REGISTRY = Registry("SEM_SEG_HEADS") -SEM_SEG_HEADS_REGISTRY.__doc__ = """ -Registry for semantic segmentation heads, which make semantic segmentation predictions -from feature maps. -""" - - -@META_ARCH_REGISTRY.register() -class SemanticSegmentor(nn.Module): - """ - Main class for semantic segmentation architectures. - """ - - def __init__(self, cfg): - super().__init__() - self.backbone = build_backbone(cfg) - self.sem_seg_head = build_sem_seg_head(cfg, self.backbone.output_shape()) - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DatasetMapper`. - Each item in the list contains the inputs for one image. - - For now, each item in the list is a dict that contains: - - * "image": Tensor, image in (C, H, W) format. - * "sem_seg": semantic segmentation ground truth - * Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - - Returns: - list[dict]: - Each dict is the output for one input image. - The dict contains one key "sem_seg" whose value is a - Tensor that represents the - per-pixel segmentation prediced by the head. - The prediction has shape KxHxW that represents the logits of - each class for each pixel. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - - features = self.backbone(images.tensor) - - if "sem_seg" in batched_inputs[0]: - targets = [x["sem_seg"].to(self.device) for x in batched_inputs] - targets = ImageList.from_tensors( - targets, self.backbone.size_divisibility, self.sem_seg_head.ignore_value - ).tensor - else: - targets = None - results, losses = self.sem_seg_head(features, targets) - - if self.training: - return losses - - processed_results = [] - for result, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): - height = input_per_image.get("height") - width = input_per_image.get("width") - r = sem_seg_postprocess(result, image_size, height, width) - processed_results.append({"sem_seg": r}) - return processed_results - - -def build_sem_seg_head(cfg, input_shape): - """ - Build a semantic segmentation head from `cfg.MODEL.SEM_SEG_HEAD.NAME`. - """ - name = cfg.MODEL.SEM_SEG_HEAD.NAME - return SEM_SEG_HEADS_REGISTRY.get(name)(cfg, input_shape) - - -@SEM_SEG_HEADS_REGISTRY.register() -class SemSegFPNHead(nn.Module): - """ - A semantic segmentation head described in :paper:`PanopticFPN`. - It takes FPN features as input and merges information from all - levels of the FPN into single output. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - - # fmt: off - self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES - feature_strides = {k: v.stride for k, v in input_shape.items()} - feature_channels = {k: v.channels for k, v in input_shape.items()} - self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE - num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES - conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM - self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE - norm = cfg.MODEL.SEM_SEG_HEAD.NORM - self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT - # fmt: on - - self.scale_heads = [] - for in_feature in self.in_features: - head_ops = [] - head_length = max( - 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)) - ) - for k in range(head_length): - norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None - conv = Conv2d( - feature_channels[in_feature] if k == 0 else conv_dims, - conv_dims, - kernel_size=3, - stride=1, - padding=1, - bias=not norm, - norm=norm_module, - activation=F.relu, - ) - weight_init.c2_msra_fill(conv) - head_ops.append(conv) - if feature_strides[in_feature] != self.common_stride: - head_ops.append( - nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) - ) - self.scale_heads.append(nn.Sequential(*head_ops)) - self.add_module(in_feature, self.scale_heads[-1]) - self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) - weight_init.c2_msra_fill(self.predictor) - - def forward(self, features, targets=None): - """ - Returns: - In training, returns (None, dict of losses) - In inference, returns (CxHxW logits, {}) - """ - x = self.layers(features) - if self.training: - return None, self.losses(x, targets) - else: - x = F.interpolate( - x, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - return x, {} - - def layers(self, features): - for i, f in enumerate(self.in_features): - if i == 0: - x = self.scale_heads[i](features[f]) - else: - x = x + self.scale_heads[i](features[f]) - x = self.predictor(x) - return x - - def losses(self, predictions, targets): - predictions = F.interpolate( - predictions, scale_factor=self.common_stride, mode="bilinear", align_corners=False - ) - loss = F.cross_entropy( - predictions, targets, reduction="mean", ignore_index=self.ignore_value - ) - losses = {"loss_sem_seg": loss * self.loss_weight} - return losses diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py deleted file mode 100644 index 678f5af..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/poolers.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -import math -import sys -import torch -from torch import nn -from torchvision.ops import RoIPool - -from detectron2.layers import ROIAlign, ROIAlignRotated, cat - -__all__ = ["ROIPooler"] - - -def assign_boxes_to_levels(box_lists, min_level, max_level, canonical_box_size, canonical_level): - """ - Map each box in `box_lists` to a feature map level index and return the assignment - vector. - - Args: - box_lists (list[Boxes] | list[RotatedBoxes]): A list of N Boxes or N RotatedBoxes, - where N is the number of images in the batch. - min_level (int): Smallest feature map level index. The input is considered index 0, - the output of stage 1 is index 1, and so. - max_level (int): Largest feature map level index. - canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). - canonical_level (int): The feature map level index on which a canonically-sized box - should be placed. - - Returns: - A tensor of length M, where M is the total number of boxes aggregated over all - N batch images. The memory layout corresponds to the concatenation of boxes - from all images. Each element is the feature map index, as an offset from - `self.min_level`, for the corresponding box (so value i means the box is at - `self.min_level + i`). - """ - eps = sys.float_info.epsilon - box_sizes = torch.sqrt(cat([boxes.area() for boxes in box_lists])) - # Eqn.(1) in FPN paper - level_assignments = torch.floor( - canonical_level + torch.log2(box_sizes / canonical_box_size + eps) - ) - # clamp level to (min, max), in case the box size is too large or too small - # for the available feature maps - level_assignments = torch.clamp(level_assignments, min=min_level, max=max_level) - return level_assignments.to(torch.int64) - min_level - - -def convert_boxes_to_pooler_format(box_lists): - """ - Convert all boxes in `box_lists` to the low-level format used by ROI pooling ops - (see description under Returns). - - Args: - box_lists (list[Boxes] | list[RotatedBoxes]): - A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. - - Returns: - When input is list[Boxes]: - A tensor of shape (M, 5), where M is the total number of boxes aggregated over all - N batch images. - The 5 columns are (batch index, x0, y0, x1, y1), where batch index - is the index in [0, N) identifying which batch image the box with corners at - (x0, y0, x1, y1) comes from. - When input is list[RotatedBoxes]: - A tensor of shape (M, 6), where M is the total number of boxes aggregated over all - N batch images. - The 6 columns are (batch index, x_ctr, y_ctr, width, height, angle_degrees), - where batch index is the index in [0, N) identifying which batch image the - rotated box (x_ctr, y_ctr, width, height, angle_degrees) comes from. - """ - - def fmt_box_list(box_tensor, batch_index): - repeated_index = torch.full( - (len(box_tensor), 1), batch_index, dtype=box_tensor.dtype, device=box_tensor.device - ) - return cat((repeated_index, box_tensor), dim=1) - - pooler_fmt_boxes = cat( - [fmt_box_list(box_list.tensor, i) for i, box_list in enumerate(box_lists)], dim=0 - ) - - return pooler_fmt_boxes - - -class ROIPooler(nn.Module): - """ - Region of interest feature map pooler that supports pooling from one or more - feature maps. - """ - - def __init__( - self, - output_size, - scales, - sampling_ratio, - pooler_type, - canonical_box_size=224, - canonical_level=4, - ): - """ - Args: - output_size (int, tuple[int] or list[int]): output size of the pooled region, - e.g., 14 x 14. If tuple or list is given, the length must be 2. - scales (list[float]): The scale for each low-level pooling op relative to - the input image. For a feature map with stride s relative to the input - image, scale is defined as a 1 / s. The stride must be power of 2. - When there are multiple scales, they must form a pyramid, i.e. they must be - a monotically decreasing geometric sequence with a factor of 1/2. - sampling_ratio (int): The `sampling_ratio` parameter for the ROIAlign op. - pooler_type (string): Name of the type of pooling operation that should be applied. - For instance, "ROIPool" or "ROIAlignV2". - canonical_box_size (int): A canonical box size in pixels (sqrt(box area)). The default - is heuristically defined as 224 pixels in the FPN paper (based on ImageNet - pre-training). - canonical_level (int): The feature map level index from which a canonically-sized box - should be placed. The default is defined as level 4 (stride=16) in the FPN paper, - i.e., a box of size 224x224 will be placed on the feature with stride=16. - The box placement for all boxes will be determined from their sizes w.r.t - canonical_box_size. For example, a box whose area is 4x that of a canonical box - should be used to pool features from feature level ``canonical_level+1``. - - Note that the actual input feature maps given to this module may not have - sufficiently many levels for the input boxes. If the boxes are too large or too - small for the input feature maps, the closest level will be used. - """ - super().__init__() - - if isinstance(output_size, int): - output_size = (output_size, output_size) - assert len(output_size) == 2 - assert isinstance(output_size[0], int) and isinstance(output_size[1], int) - self.output_size = output_size - - if pooler_type == "ROIAlign": - self.level_poolers = nn.ModuleList( - ROIAlign( - output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=False - ) - for scale in scales - ) - elif pooler_type == "ROIAlignV2": - self.level_poolers = nn.ModuleList( - ROIAlign( - output_size, spatial_scale=scale, sampling_ratio=sampling_ratio, aligned=True - ) - for scale in scales - ) - elif pooler_type == "ROIPool": - self.level_poolers = nn.ModuleList( - RoIPool(output_size, spatial_scale=scale) for scale in scales - ) - elif pooler_type == "ROIAlignRotated": - self.level_poolers = nn.ModuleList( - ROIAlignRotated(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio) - for scale in scales - ) - else: - raise ValueError("Unknown pooler type: {}".format(pooler_type)) - - # Map scale (defined as 1 / stride) to its feature map level under the - # assumption that stride is a power of 2. - min_level = -(math.log2(scales[0])) - max_level = -(math.log2(scales[-1])) - assert math.isclose(min_level, int(min_level)) and math.isclose( - max_level, int(max_level) - ), "Featuremap stride is not power of 2!" - self.min_level = int(min_level) - self.max_level = int(max_level) - assert ( - len(scales) == self.max_level - self.min_level + 1 - ), "[ROIPooler] Sizes of input featuremaps do not form a pyramid!" - assert 0 < self.min_level and self.min_level <= self.max_level - self.canonical_level = canonical_level - assert canonical_box_size > 0 - self.canonical_box_size = canonical_box_size - - def forward(self, x, box_lists): - """ - Args: - x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those - used to construct this module. - box_lists (list[Boxes] | list[RotatedBoxes]): - A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch. - The box coordinates are defined on the original image and - will be scaled by the `scales` argument of :class:`ROIPooler`. - - Returns: - Tensor: - A tensor of shape (M, C, output_size, output_size) where M is the total number of - boxes aggregated over all N batch images and C is the number of channels in `x`. - """ - num_level_assignments = len(self.level_poolers) - - assert isinstance(x, list) and isinstance( - box_lists, list - ), "Arguments to pooler must be lists" - assert ( - len(x) == num_level_assignments - ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format( - num_level_assignments, len(x) - ) - - assert len(box_lists) == x[0].size( - 0 - ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format( - x[0].size(0), len(box_lists) - ) - - pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists) - - if num_level_assignments == 1: - return self.level_poolers[0](x[0], pooler_fmt_boxes) - - level_assignments = assign_boxes_to_levels( - box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level - ) - - num_boxes = len(pooler_fmt_boxes) - num_channels = x[0].shape[1] - output_size = self.output_size[0] - - dtype, device = x[0].dtype, x[0].device - output = torch.zeros( - (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device - ) - - for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)): - inds = torch.nonzero(level_assignments == level, as_tuple=True)[0] - pooler_fmt_boxes_level = pooler_fmt_boxes[inds] - output[inds] = pooler(x_level, pooler_fmt_boxes_level) - - return output diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py deleted file mode 100644 index e85541f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/postprocessing.py +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from torch.nn import functional as F - -from detectron2.layers import paste_masks_in_image -from detectron2.structures import Instances -from detectron2.utils.memory import retry_if_cuda_oom - - -def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): - """ - Resize the output instances. - The input images are often resized when entering an object detector. - As a result, we often need the outputs of the detector in a different - resolution from its inputs. - - This function will resize the raw outputs of an R-CNN detector - to produce outputs according to the desired output resolution. - - Args: - results (Instances): the raw outputs from the detector. - `results.image_size` contains the input image resolution the detector sees. - This object might be modified in-place. - output_height, output_width: the desired output resolution. - - Returns: - Instances: the resized output from the model, based on the output resolution - """ - scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) - results = Instances((output_height, output_width), **results.get_fields()) - - if results.has("pred_boxes"): - output_boxes = results.pred_boxes - elif results.has("proposal_boxes"): - output_boxes = results.proposal_boxes - - output_boxes.scale(scale_x, scale_y) - output_boxes.clip(results.image_size) - - results = results[output_boxes.nonempty()] - - if results.has("pred_masks"): - results.pred_masks = retry_if_cuda_oom(paste_masks_in_image)( - results.pred_masks[:, 0, :, :], # N, 1, M, M - results.pred_boxes, - results.image_size, - threshold=mask_threshold, - ) - - if results.has("pred_keypoints"): - results.pred_keypoints[:, :, 0] *= scale_x - results.pred_keypoints[:, :, 1] *= scale_y - - return results - - -def sem_seg_postprocess(result, img_size, output_height, output_width): - """ - Return semantic segmentation predictions in the original resolution. - - The input images are often resized when entering semantic segmentor. Moreover, in same - cases, they also padded inside segmentor to be divisible by maximum network stride. - As a result, we often need the predictions of the segmentor in a different - resolution from its inputs. - - Args: - result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), - where C is the number of classes, and H, W are the height and width of the prediction. - img_size (tuple): image size that segmentor is taking as input. - output_height, output_width: the desired output resolution. - - Returns: - semantic segmentation prediction (Tensor): A tensor of the shape - (C, output_height, output_width) that contains per-pixel soft predictions. - """ - result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) - result = F.interpolate( - result, size=(output_height, output_width), mode="bilinear", align_corners=False - )[0] - return result diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py deleted file mode 100644 index 64fb6d4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator -from .rpn import RPN_HEAD_REGISTRY, build_rpn_head, RPN diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py deleted file mode 100644 index 7f252bc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/build.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from detectron2.utils.registry import Registry - -PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") -PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ -Registry for proposal generator, which produces object proposals from feature maps. - -The registered object will be called with `obj(cfg, input_shape)`. -The call should return a `nn.Module` object. -""" - -from . import rpn, rrpn # noqa F401 isort:skip - - -def build_proposal_generator(cfg, input_shape): - """ - Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. - The name can be "PrecomputedProposals" to use no proposal generator. - """ - name = cfg.MODEL.PROPOSAL_GENERATOR.NAME - if name == "PrecomputedProposals": - return None - - return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py deleted file mode 100644 index d4af905..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/proposal_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -import torch - -from detectron2.structures import Instances - - -def add_ground_truth_to_proposals(gt_boxes, proposals): - """ - Call `add_ground_truth_to_proposals_single_image` for all images. - - Args: - gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes - representing the gound-truth for image i. - proposals (list[Instances]): list of N elements. Element i is a Instances - representing the proposals for image i. - - Returns: - list[Instances]: list of N Instances. Each is the proposals for the image, - with field "proposal_boxes" and "objectness_logits". - """ - assert gt_boxes is not None - - assert len(proposals) == len(gt_boxes) - if len(proposals) == 0: - return proposals - - return [ - add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) - for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) - ] - - -def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): - """ - Augment `proposals` with ground-truth boxes from `gt_boxes`. - - Args: - Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals - per image. - - Returns: - Same as `add_ground_truth_to_proposals`, but for only one image. - """ - device = proposals.objectness_logits.device - # Concatenating gt_boxes with proposals requires them to have the same fields - # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. - gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) - - gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) - gt_proposal = Instances(proposals.image_size) - - gt_proposal.proposal_boxes = gt_boxes - gt_proposal.objectness_logits = gt_logits - new_proposals = Instances.cat([proposals, gt_proposal]) - - return new_proposals diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py deleted file mode 100644 index 8eb93b8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import Dict, List -import torch -import torch.nn.functional as F -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec -from detectron2.structures import Boxes, Instances, pairwise_iou -from detectron2.utils.memory import retry_if_cuda_oom -from detectron2.utils.registry import Registry - -from ..anchor_generator import build_anchor_generator -from ..box_regression import Box2BoxTransform -from ..matcher import Matcher -from ..sampling import subsample_labels -from .build import PROPOSAL_GENERATOR_REGISTRY -from .rpn_outputs import RPNOutputs, find_top_rpn_proposals - -RPN_HEAD_REGISTRY = Registry("RPN_HEAD") -RPN_HEAD_REGISTRY.__doc__ = """ -Registry for RPN heads, which take feature maps and perform -objectness classification and bounding box regression for anchors. - -The registered object will be called with `obj(cfg, input_shape)`. -The call should return a `nn.Module` object. -""" - - -def build_rpn_head(cfg, input_shape): - """ - Build an RPN head defined by `cfg.MODEL.RPN.HEAD_NAME`. - """ - name = cfg.MODEL.RPN.HEAD_NAME - return RPN_HEAD_REGISTRY.get(name)(cfg, input_shape) - - -@RPN_HEAD_REGISTRY.register() -class StandardRPNHead(nn.Module): - """ - Standard RPN classification and regression heads described in :paper:`Faster R-CNN`. - Uses a 3x3 conv to produce a shared hidden state from which one 1x1 conv predicts - objectness logits for each anchor and a second 1x1 conv predicts bounding-box deltas - specifying how to deform each anchor into an object proposal. - """ - - @configurable - def __init__(self, *, in_channels: int, num_anchors: int, box_dim: int = 4): - """ - NOTE: this interface is experimental. - - Args: - in_channels (int): number of input feature channels. When using multiple - input features, they must have the same number of channels. - num_anchors (int): number of anchors to predict for *each spatial position* - on the feature map. The total number of anchors for each - feature map will be `num_anchors * H * W`. - box_dim (int): dimension of a box, which is also the number of box regression - predictions to make for each anchor. An axis aligned box has - box_dim=4, while a rotated box has box_dim=5. - """ - super().__init__() - # 3x3 conv for the hidden representation - self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1) - # 1x1 conv for predicting objectness logits - self.objectness_logits = nn.Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) - # 1x1 conv for predicting box2box transform deltas - self.anchor_deltas = nn.Conv2d(in_channels, num_anchors * box_dim, kernel_size=1, stride=1) - - for l in [self.conv, self.objectness_logits, self.anchor_deltas]: - nn.init.normal_(l.weight, std=0.01) - nn.init.constant_(l.bias, 0) - - @classmethod - def from_config(cls, cfg, input_shape): - # Standard RPN is shared across levels: - in_channels = [s.channels for s in input_shape] - assert len(set(in_channels)) == 1, "Each level must have the same channel!" - in_channels = in_channels[0] - - # RPNHead should take the same input as anchor generator - # NOTE: it assumes that creating an anchor generator does not have unwanted side effect. - anchor_generator = build_anchor_generator(cfg, input_shape) - num_anchors = anchor_generator.num_anchors - box_dim = anchor_generator.box_dim - assert ( - len(set(num_anchors)) == 1 - ), "Each level must have the same number of anchors per spatial position" - return {"in_channels": in_channels, "num_anchors": num_anchors[0], "box_dim": box_dim} - - def forward(self, features): - """ - Args: - features (list[Tensor]): list of feature maps - - Returns: - list[Tensor]: A list of L elements. - Element i is a tensor of shape (N, A, Hi, Wi) representing - the predicted objectness logits for all anchors. A is the number of cell anchors. - list[Tensor]: A list of L elements. Element i is a tensor of shape - (N, A*box_dim, Hi, Wi) representing the predicted "deltas" used to transform anchors - to proposals. - """ - pred_objectness_logits = [] - pred_anchor_deltas = [] - for x in features: - t = F.relu(self.conv(x)) - pred_objectness_logits.append(self.objectness_logits(t)) - pred_anchor_deltas.append(self.anchor_deltas(t)) - return pred_objectness_logits, pred_anchor_deltas - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class RPN(nn.Module): - """ - Region Proposal Network, introduced by :paper:`Faster R-CNN`. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - - # fmt: off - self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE - self.in_features = cfg.MODEL.RPN.IN_FEATURES - self.nms_thresh = cfg.MODEL.RPN.NMS_THRESH - self.batch_size_per_image = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE - self.positive_fraction = cfg.MODEL.RPN.POSITIVE_FRACTION - self.smooth_l1_beta = cfg.MODEL.RPN.SMOOTH_L1_BETA - self.loss_weight = cfg.MODEL.RPN.LOSS_WEIGHT - # fmt: on - - # Map from self.training state to train/test settings - self.pre_nms_topk = { - True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN, - False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST, - } - self.post_nms_topk = { - True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN, - False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST, - } - self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH - - self.anchor_generator = build_anchor_generator( - cfg, [input_shape[f] for f in self.in_features] - ) - self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) - self.anchor_matcher = Matcher( - cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True - ) - self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features]) - - def _subsample_labels(self, label): - """ - Randomly sample a subset of positive and negative examples, and overwrite - the label vector to the ignore value (-1) for all elements that are not - included in the sample. - - Args: - labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned. - """ - pos_idx, neg_idx = subsample_labels( - label, self.batch_size_per_image, self.positive_fraction, 0 - ) - # Fill with the ignore label (-1), then set positive and negative labels - label.fill_(-1) - label.scatter_(0, pos_idx, 1) - label.scatter_(0, neg_idx, 0) - return label - - @torch.no_grad() - def label_and_sample_anchors(self, anchors: List[Boxes], gt_instances: List[Instances]): - """ - Args: - anchors (list[Boxes]): anchors for each feature map. - gt_instances: the ground-truth instances for each image. - - Returns: - list[Tensor]: - List of #demo tensors. i-th element is a vector of labels whose length is - the total number of anchors across feature maps. Label values are in {-1, 0, 1}, - with meanings: -1 = ignore; 0 = negative class; 1 = positive class. - list[Tensor]: - i-th element is a Nx4 tensor, where N is the total number of anchors across - feature maps. The values are the matched gt boxes for each anchor. - Values are undefined for those anchors not labeled as 1. - """ - anchors = Boxes.cat(anchors) - - gt_boxes = [x.gt_boxes for x in gt_instances] - image_sizes = [x.image_size for x in gt_instances] - del gt_instances - - gt_labels = [] - matched_gt_boxes = [] - for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): - """ - image_size_i: (h, w) for the i-th image - gt_boxes_i: ground-truth boxes for i-th image - """ - - match_quality_matrix = retry_if_cuda_oom(pairwise_iou)(gt_boxes_i, anchors) - matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) - # Matching is memory-expensive and may result in CPU tensors. But the result is small - gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) - del match_quality_matrix - - if self.boundary_threshold >= 0: - # Discard anchors that go out of the boundaries of the image - # NOTE: This is legacy functionality that is turned off by default in Detectron2 - anchors_inside_image = anchors.inside_box(image_size_i, self.boundary_threshold) - gt_labels_i[~anchors_inside_image] = -1 - - # A vector of labels (-1, 0, 1) for each anchor - gt_labels_i = self._subsample_labels(gt_labels_i) - - if len(gt_boxes_i) == 0: - # These values won't be used anyway since the anchor is labeled as background - matched_gt_boxes_i = torch.zeros_like(anchors.tensor) - else: - # TODO wasted indexing computation for ignored boxes - matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor - - gt_labels.append(gt_labels_i) # N,AHW - matched_gt_boxes.append(matched_gt_boxes_i) - return gt_labels, matched_gt_boxes - - def forward(self, images, features, gt_instances=None): - """ - Args: - images (ImageList): input images of length `N` - features (dict[str: Tensor]): input data as a mapping from feature - map name to tensor. Axis 0 represents the number of images `N` in - the input data; axes 1-3 are channels, height, and width, which may - vary between feature maps (e.g., if a feature pyramid is used). - gt_instances (list[Instances], optional): a length `N` list of `Instances`s. - Each `Instances` stores ground-truth instances for the corresponding image. - - Returns: - proposals: list[Instances]: contains fields "proposal_boxes", "objectness_logits" - loss: dict[Tensor] or None - """ - features = [features[f] for f in self.in_features] - pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) - anchors = self.anchor_generator(features) - - if self.training: - gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) - else: - gt_labels, gt_boxes = None, None - - outputs = RPNOutputs( - self.box2box_transform, - self.batch_size_per_image, - images, - pred_objectness_logits, - pred_anchor_deltas, - anchors, - gt_labels, - gt_boxes, - self.smooth_l1_beta, - ) - - if self.training: - losses = {k: v * self.loss_weight for k, v in outputs.losses().items()} - else: - losses = {} - - with torch.no_grad(): - # Find the top proposals by applying NMS and removing boxes that - # are too small. The proposals are treated as fixed for approximate - # joint training with roi heads. This approach ignores the derivative - # w.r.t. the proposal boxes’ coordinates that are also network - # responses, so is approximate. - proposals = find_top_rpn_proposals( - outputs.predict_proposals(), - outputs.predict_objectness_logits(), - images, - self.nms_thresh, - self.pre_nms_topk[self.training], - self.post_nms_topk[self.training], - self.min_box_side_len, - self.training, - ) - - return proposals, losses diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py deleted file mode 100644 index 44f846f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rpn_outputs.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -import logging -import torch -import torch.nn.functional as F -from fvcore.nn import smooth_l1_loss - -from detectron2.layers import batched_nms, cat -from detectron2.structures import Boxes, Instances -from detectron2.utils.events import get_event_storage - -logger = logging.getLogger(__name__) - -# TODO: comments for future refactoring of this module -# -# From @rbg: -# This code involves a significant amount of tensor reshaping and permuting. Look for -# ways to simplify this. - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - L: number of feature maps per image on which RPN is run - A: number of cell anchors (must be the same for all feature maps) - Hi, Wi: height and width of the i-th feature map - 4: size of the box parameterization - -Naming convention: - - objectness: refers to the binary classification of an anchor as object vs. not - object. - - deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransform`). - - pred_objectness_logits: predicted objectness scores in [-inf, +inf]; use - sigmoid(pred_objectness_logits) to estimate P(object). - - gt_labels: ground-truth binary classification labels for objectness - - pred_anchor_deltas: predicted box2box transform deltas - - gt_anchor_deltas: ground-truth box2box transform deltas -""" - - -def find_top_rpn_proposals( - proposals, - pred_objectness_logits, - images, - nms_thresh, - pre_nms_topk, - post_nms_topk, - min_box_side_len, - training, -): - """ - For each feature map, select the `pre_nms_topk` highest scoring proposals, - apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` - highest scoring proposals among all the feature maps if `training` is True, - otherwise, returns the highest `post_nms_topk` scoring proposals for each - feature map. - - Args: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 4). - All proposal predictions on the feature maps. - pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). - images (ImageList): Input images as an :class:`ImageList`. - nms_thresh (float): IoU threshold to use for NMS - pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. - When RPN is run on multiple feature maps (as in FPN) this number is per - feature map. - post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. - When RPN is run on multiple feature maps (as in FPN) this number is total, - over all feature maps. - min_box_side_len (float): minimum proposal box side length in pixels (absolute units - wrt input images). - training (bool): True if proposals are to be used in training, otherwise False. - This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." - comment. - - Returns: - proposals (list[Instances]): list of N Instances. The i-th Instances - stores post_nms_topk object proposals for image i, sorted by their - objectness score in descending order. - """ - image_sizes = images.image_sizes # in (h, w) order - num_images = len(image_sizes) - device = proposals[0].device - - # 1. Select top-k anchor for every level and every image - topk_scores = [] # #lvl Tensor, each of shape N x topk - topk_proposals = [] - level_ids = [] # #lvl Tensor, each of shape (topk,) - batch_idx = torch.arange(num_images, device=device) - for level_id, proposals_i, logits_i in zip( - itertools.count(), proposals, pred_objectness_logits - ): - Hi_Wi_A = logits_i.shape[1] - num_proposals_i = min(pre_nms_topk, Hi_Wi_A) - - # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) - # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) - logits_i, idx = logits_i.sort(descending=True, dim=1) - topk_scores_i = logits_i[batch_idx, :num_proposals_i] - topk_idx = idx[batch_idx, :num_proposals_i] - - # each is N x topk - topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 4 - - topk_proposals.append(topk_proposals_i) - topk_scores.append(topk_scores_i) - level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) - - # 2. Concat all levels together - topk_scores = cat(topk_scores, dim=1) - topk_proposals = cat(topk_proposals, dim=1) - level_ids = cat(level_ids, dim=0) - - # 3. For each image, run a per-level NMS, and choose topk results. - results = [] - for n, image_size in enumerate(image_sizes): - boxes = Boxes(topk_proposals[n]) - scores_per_img = topk_scores[n] - lvl = level_ids - - valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) - if not valid_mask.all(): - if training: - raise FloatingPointError( - "Predicted boxes or scores contain Inf/NaN. Training has diverged." - ) - boxes = boxes[valid_mask] - scores_per_img = scores_per_img[valid_mask] - lvl = lvl[valid_mask] - boxes.clip(image_size) - - # filter empty boxes - keep = boxes.nonempty(threshold=min_box_side_len) - if keep.sum().item() != len(boxes): - boxes, scores_per_img, lvl = boxes[keep], scores_per_img[keep], lvl[keep] - - keep = batched_nms(boxes.tensor, scores_per_img, lvl, nms_thresh) - # In Detectron1, there was different behavior during training vs. testing. - # (https://github.com/facebookresearch/Detectron/issues/459) - # During training, topk is over the proposals from *all* images in the training batch. - # During testing, it is over the proposals for each image separately. - # As a result, the training behavior becomes batch-dependent, - # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. - # This bug is addressed in Detectron2 to make the behavior independent of batch size. - keep = keep[:post_nms_topk] # keep is already sorted - - res = Instances(image_size) - res.proposal_boxes = boxes[keep] - res.objectness_logits = scores_per_img[keep] - results.append(res) - return results - - -def rpn_losses( - gt_labels, gt_anchor_deltas, pred_objectness_logits, pred_anchor_deltas, smooth_l1_beta -): - """ - Args: - gt_labels (Tensor): shape (N,), each element in {-1, 0, 1} representing - ground-truth objectness labels with: -1 = ignore; 0 = not object; 1 = object. - gt_anchor_deltas (Tensor): shape (N, box_dim), row i represents ground-truth - box2box transform targets (dx, dy, dw, dh) or (dx, dy, dw, dh, da) that map anchor i to - its matched ground-truth box. - pred_objectness_logits (Tensor): shape (N,), each element is a predicted objectness - logit. - pred_anchor_deltas (Tensor): shape (N, box_dim), each row is a predicted box2box - transform (dx, dy, dw, dh) or (dx, dy, dw, dh, da) - smooth_l1_beta (float): The transition point between L1 and L2 loss in - the smooth L1 loss function. When set to 0, the loss becomes L1. When - set to +inf, the loss becomes constant 0. - - Returns: - objectness_loss, localization_loss, both unnormalized (summed over samples). - """ - pos_masks = gt_labels == 1 - localization_loss = smooth_l1_loss( - pred_anchor_deltas[pos_masks], gt_anchor_deltas[pos_masks], smooth_l1_beta, reduction="sum" - ) - - valid_masks = gt_labels >= 0 - objectness_loss = F.binary_cross_entropy_with_logits( - pred_objectness_logits[valid_masks], - gt_labels[valid_masks].to(torch.float32), - reduction="sum", - ) - return objectness_loss, localization_loss - - -class RPNOutputs(object): - def __init__( - self, - box2box_transform, - batch_size_per_image, - images, - pred_objectness_logits, - pred_anchor_deltas, - anchors, - gt_labels=None, - gt_boxes=None, - smooth_l1_beta=0.0, - ): - """ - Args: - box2box_transform (Box2BoxTransform): :class:`Box2BoxTransform` instance for - anchor-proposal transformations. - images (ImageList): :class:`ImageList` instance representing N input images - batch_size_per_image (int): number of proposals to sample when training - pred_objectness_logits (list[Tensor]): A list of L elements. - Element i is a tensor of shape (N, A, Hi, Wi) representing - the predicted objectness logits for anchors. - pred_anchor_deltas (list[Tensor]): A list of L elements. Element i is a tensor of shape - (N, A*4 or 5, Hi, Wi) representing the predicted "deltas" used to transform anchors - to proposals. - anchors (list[Boxes or RotatedBoxes]): A list of Boxes/RotatedBoxes storing the all - the anchors for each feature map. See :meth:`AnchorGenerator.forward`. - gt_labels (list[Tensor]): Available on in training. - See :meth:`RPN.label_and_sample_anchors`. - gt_boxes (list[Boxes or RotatedBoxes]): Available on in training. - See :meth:`RPN.label_and_sample_anchors`. - smooth_l1_beta (float): The transition point between L1 and L2 loss in - the smooth L1 loss function. When set to 0, the loss becomes L1. When - set to +inf, the loss becomes constant 0. - """ - self.box2box_transform = box2box_transform - self.batch_size_per_image = batch_size_per_image - - B = anchors[0].tensor.size(1) # box dimension (4 or 5) - self.pred_objectness_logits = [ - # Reshape: (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) - score.permute(0, 2, 3, 1).flatten(1) - for score in pred_objectness_logits - ] - - self.pred_anchor_deltas = [ - # Reshape: (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) - # -> (N, Hi*Wi*A, B) - x.view(x.shape[0], -1, B, x.shape[-2], x.shape[-1]) - .permute(0, 3, 4, 1, 2) - .flatten(1, -2) - for x in pred_anchor_deltas - ] - - self.anchors = anchors - - self.gt_boxes = gt_boxes - self.gt_labels = gt_labels - - self.num_images = len(images) - self.smooth_l1_beta = smooth_l1_beta - - def losses(self): - """ - Return the losses from a set of RPN predictions and their associated ground-truth. - - Returns: - dict[loss name -> loss value]: A dict mapping from loss name to loss value. - Loss names are: `loss_rpn_cls` for objectness classification and - `loss_rpn_loc` for proposal localization. - """ - gt_labels = torch.stack(self.gt_labels) - anchors = self.anchors[0].cat(self.anchors).tensor # Ax(4 or 5) - gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in self.gt_boxes] - gt_anchor_deltas = torch.stack(gt_anchor_deltas) - - # Log the number of positive/negative anchors per-image that's used in training - num_pos_anchors = (gt_labels == 1).sum().item() - num_neg_anchors = (gt_labels == 0).sum().item() - storage = get_event_storage() - storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / self.num_images) - storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / self.num_images) - - objectness_loss, localization_loss = rpn_losses( - gt_labels, - gt_anchor_deltas, - # concat on the Hi*Wi*A dimension - cat(self.pred_objectness_logits, dim=1), - cat(self.pred_anchor_deltas, dim=1), - self.smooth_l1_beta, - ) - normalizer = self.batch_size_per_image * self.num_images - return { - "loss_rpn_cls": objectness_loss / normalizer, - "loss_rpn_loc": localization_loss / normalizer, - } - - def predict_proposals(self): - """ - Transform anchors into proposals by applying the predicted anchor deltas. - - Returns: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape - (N, Hi*Wi*A, B), where B is box dimension (4 or 5). - """ - proposals = [] - # For each feature map - for anchors_i, pred_anchor_deltas_i in zip(self.anchors, self.pred_anchor_deltas): - B = anchors_i.tensor.size(1) - N = self.num_images - pred_anchor_deltas_i = pred_anchor_deltas_i.reshape(-1, B) - # Expand anchors to shape (N*Hi*Wi*A, B) - anchors_i = anchors_i.tensor.unsqueeze(0).expand(N, -1, -1).reshape(-1, B) - proposals_i = self.box2box_transform.apply_deltas(pred_anchor_deltas_i, anchors_i) - # Append feature map proposals with shape (N, Hi*Wi*A, B) - proposals.append(proposals_i.view(N, -1, B)) - return proposals - - def predict_objectness_logits(self): - """ - Return objectness logits in the same format as the proposals returned by - :meth:`predict_proposals`. - - Returns: - pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape - (N, Hi*Wi*A). - """ - return self.pred_objectness_logits diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py deleted file mode 100644 index 8c2ac36..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/proposal_generator/rrpn.py +++ /dev/null @@ -1,233 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -import logging -from typing import Dict, List -import torch - -from detectron2.layers import ShapeSpec, batched_nms_rotated, cat -from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated -from detectron2.utils.memory import retry_if_cuda_oom - -from ..box_regression import Box2BoxTransformRotated -from .build import PROPOSAL_GENERATOR_REGISTRY -from .rpn import RPN -from .rpn_outputs import RPNOutputs - -logger = logging.getLogger(__name__) - - -def find_top_rrpn_proposals( - proposals, - pred_objectness_logits, - images, - nms_thresh, - pre_nms_topk, - post_nms_topk, - min_box_side_len, - training, -): - """ - For each feature map, select the `pre_nms_topk` highest scoring proposals, - apply NMS, clip proposals, and remove small boxes. Return the `post_nms_topk` - highest scoring proposals among all the feature maps if `training` is True, - otherwise, returns the highest `post_nms_topk` scoring proposals for each - feature map. - - Args: - proposals (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A, 5). - All proposal predictions on the feature maps. - pred_objectness_logits (list[Tensor]): A list of L tensors. Tensor i has shape (N, Hi*Wi*A). - images (ImageList): Input images as an :class:`ImageList`. - nms_thresh (float): IoU threshold to use for NMS - pre_nms_topk (int): number of top k scoring proposals to keep before applying NMS. - When RRPN is run on multiple feature maps (as in FPN) this number is per - feature map. - post_nms_topk (int): number of top k scoring proposals to keep after applying NMS. - When RRPN is run on multiple feature maps (as in FPN) this number is total, - over all feature maps. - min_box_side_len (float): minimum proposal box side length in pixels (absolute units - wrt input images). - training (bool): True if proposals are to be used in training, otherwise False. - This arg exists only to support a legacy bug; look for the "NB: Legacy bug ..." - comment. - - Returns: - proposals (list[Instances]): list of N Instances. The i-th Instances - stores post_nms_topk object proposals for image i. - """ - image_sizes = images.image_sizes # in (h, w) order - num_images = len(image_sizes) - device = proposals[0].device - - # 1. Select top-k anchor for every level and every image - topk_scores = [] # #lvl Tensor, each of shape N x topk - topk_proposals = [] - level_ids = [] # #lvl Tensor, each of shape (topk,) - batch_idx = torch.arange(num_images, device=device) - for level_id, proposals_i, logits_i in zip( - itertools.count(), proposals, pred_objectness_logits - ): - Hi_Wi_A = logits_i.shape[1] - num_proposals_i = min(pre_nms_topk, Hi_Wi_A) - - # sort is faster than topk (https://github.com/pytorch/pytorch/issues/22812) - # topk_scores_i, topk_idx = logits_i.topk(num_proposals_i, dim=1) - logits_i, idx = logits_i.sort(descending=True, dim=1) - topk_scores_i = logits_i[batch_idx, :num_proposals_i] - topk_idx = idx[batch_idx, :num_proposals_i] - - # each is N x topk - topk_proposals_i = proposals_i[batch_idx[:, None], topk_idx] # N x topk x 5 - - topk_proposals.append(topk_proposals_i) - topk_scores.append(topk_scores_i) - level_ids.append(torch.full((num_proposals_i,), level_id, dtype=torch.int64, device=device)) - - # 2. Concat all levels together - topk_scores = cat(topk_scores, dim=1) - topk_proposals = cat(topk_proposals, dim=1) - level_ids = cat(level_ids, dim=0) - - # 3. For each image, run a per-level NMS, and choose topk results. - results = [] - for n, image_size in enumerate(image_sizes): - boxes = RotatedBoxes(topk_proposals[n]) - scores_per_img = topk_scores[n] - valid_mask = torch.isfinite(boxes.tensor).all(dim=1) & torch.isfinite(scores_per_img) - if not valid_mask.all(): - boxes = boxes[valid_mask] - scores_per_img = scores_per_img[valid_mask] - boxes.clip(image_size) - - # filter empty boxes - keep = boxes.nonempty(threshold=min_box_side_len) - lvl = level_ids - if keep.sum().item() != len(boxes): - boxes, scores_per_img, lvl = (boxes[keep], scores_per_img[keep], level_ids[keep]) - - keep = batched_nms_rotated(boxes.tensor, scores_per_img, lvl, nms_thresh) - # In Detectron1, there was different behavior during training vs. testing. - # (https://github.com/facebookresearch/Detectron/issues/459) - # During training, topk is over the proposals from *all* images in the training batch. - # During testing, it is over the proposals for each image separately. - # As a result, the training behavior becomes batch-dependent, - # and the configuration "POST_NMS_TOPK_TRAIN" end up relying on the batch size. - # This bug is addressed in Detectron2 to make the behavior independent of batch size. - keep = keep[:post_nms_topk] - - res = Instances(image_size) - res.proposal_boxes = boxes[keep] - res.objectness_logits = scores_per_img[keep] - results.append(res) - return results - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class RRPN(RPN): - """ - Rotated Region Proposal Network described in :paper:`RRPN`. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__(cfg, input_shape) - self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) - if self.boundary_threshold >= 0: - raise NotImplementedError( - "boundary_threshold is a legacy option not implemented for RRPN." - ) - - @torch.no_grad() - def label_and_sample_anchors(self, anchors: List[RotatedBoxes], gt_instances: List[Instances]): - """ - Args: - anchors (list[RotatedBoxes]): anchors for each feature map. - gt_instances: the ground-truth instances for each image. - - Returns: - list[Tensor]: - List of #demo tensors. i-th element is a vector of labels whose length is - the total number of anchors across feature maps. Label values are in {-1, 0, 1}, - with meanings: -1 = ignore; 0 = negative class; 1 = positive class. - list[Tensor]: - i-th element is a Nx5 tensor, where N is the total number of anchors across - feature maps. The values are the matched gt boxes for each anchor. - Values are undefined for those anchors not labeled as 1. - """ - anchors = RotatedBoxes.cat(anchors) - - gt_boxes = [x.gt_boxes for x in gt_instances] - del gt_instances - - gt_labels = [] - matched_gt_boxes = [] - for gt_boxes_i in gt_boxes: - """ - gt_boxes_i: ground-truth boxes for i-th image - """ - match_quality_matrix = retry_if_cuda_oom(pairwise_iou_rotated)(gt_boxes_i, anchors) - matched_idxs, gt_labels_i = retry_if_cuda_oom(self.anchor_matcher)(match_quality_matrix) - # Matching is memory-expensive and may result in CPU tensors. But the result is small - gt_labels_i = gt_labels_i.to(device=gt_boxes_i.device) - - # A vector of labels (-1, 0, 1) for each anchor - gt_labels_i = self._subsample_labels(gt_labels_i) - - if len(gt_boxes_i) == 0: - # These values won't be used anyway since the anchor is labeled as background - matched_gt_boxes_i = torch.zeros_like(anchors.tensor) - else: - # TODO wasted indexing computation for ignored boxes - matched_gt_boxes_i = gt_boxes_i[matched_idxs].tensor - - gt_labels.append(gt_labels_i) # N,AHW - matched_gt_boxes.append(matched_gt_boxes_i) - return gt_labels, matched_gt_boxes - - def forward(self, images, features, gt_instances=None): - # same signature as RPN.forward - features = [features[f] for f in self.in_features] - pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) - anchors = self.anchor_generator(features) - - if self.training: - gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) - else: - gt_labels, gt_boxes = None, None - - outputs = RPNOutputs( - self.box2box_transform, - self.batch_size_per_image, - images, - pred_objectness_logits, - pred_anchor_deltas, - anchors, - gt_labels, - gt_boxes, - self.smooth_l1_beta, - ) - - if self.training: - losses = {k: v * self.loss_weight for k, v in outputs.losses().items()} - else: - losses = {} - - with torch.no_grad(): - # Find the top proposals by applying NMS and removing boxes that - # are too small. The proposals are treated as fixed for approximate - # joint training with roi heads. This approach ignores the derivative - # w.r.t. the proposal boxes’ coordinates that are also network - # responses, so is approximate. - - # Note: this line is the only difference v.s. RPN.forward - proposals = find_top_rrpn_proposals( - outputs.predict_proposals(), - outputs.predict_objectness_logits(), - images, - self.nms_thresh, - self.pre_nms_topk[self.training], - self.post_nms_topk[self.training], - self.min_box_side_len, - self.training, - ) - - return proposals, losses diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py deleted file mode 100644 index a49099a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head -from .keypoint_head import ROI_KEYPOINT_HEAD_REGISTRY, build_keypoint_head, BaseKeypointRCNNHead -from .mask_head import ROI_MASK_HEAD_REGISTRY, build_mask_head, BaseMaskRCNNHead -from .roi_heads import ( - ROI_HEADS_REGISTRY, - ROIHeads, - Res5ROIHeads, - StandardROIHeads, - build_roi_heads, - select_foreground_proposals, -) -from .rotated_fast_rcnn import RROIHeads -from .fast_rcnn import FastRCNNOutputLayers - -from . import cascade_rcnn # isort:skip diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py deleted file mode 100644 index de62d47..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/box_head.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from typing import List -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm -from detectron2.utils.registry import Registry - -ROI_BOX_HEAD_REGISTRY = Registry("ROI_BOX_HEAD") -ROI_BOX_HEAD_REGISTRY.__doc__ = """ -Registry for box heads, which make box predictions from per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -@ROI_BOX_HEAD_REGISTRY.register() -class FastRCNNConvFCHead(nn.Module): - """ - A head with several 3x3 conv layers (each followed by norm & relu) and then - several fc layers (each followed by relu). - """ - - @configurable - def __init__( - self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="" - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature. - conv_dims (list[int]): the output dimensions of the conv layers - fc_dims (list[int]): the output dimensions of the fc layers - conv_norm (str or callable): normalization for the conv layers. - See :func:`detectron2.layers.get_norm` for supported types. - """ - super().__init__() - assert len(conv_dims) + len(fc_dims) > 0 - - self._output_size = (input_shape.channels, input_shape.height, input_shape.width) - - self.conv_norm_relus = [] - for k, conv_dim in enumerate(conv_dims): - conv = Conv2d( - self._output_size[0], - conv_dim, - kernel_size=3, - padding=1, - bias=not conv_norm, - norm=get_norm(conv_norm, conv_dim), - activation=F.relu, - ) - self.add_module("conv{}".format(k + 1), conv) - self.conv_norm_relus.append(conv) - self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) - - self.fcs = [] - for k, fc_dim in enumerate(fc_dims): - fc = Linear(np.prod(self._output_size), fc_dim) - self.add_module("fc{}".format(k + 1), fc) - self.fcs.append(fc) - self._output_size = fc_dim - - for layer in self.conv_norm_relus: - weight_init.c2_msra_fill(layer) - for layer in self.fcs: - weight_init.c2_xavier_fill(layer) - - @classmethod - def from_config(cls, cfg, input_shape): - num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV - conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM - num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC - fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM - return { - "input_shape": input_shape, - "conv_dims": [conv_dim] * num_conv, - "fc_dims": [fc_dim] * num_fc, - "conv_norm": cfg.MODEL.ROI_BOX_HEAD.NORM, - } - - def forward(self, x): - for layer in self.conv_norm_relus: - x = layer(x) - if len(self.fcs): - if x.dim() > 2: - x = torch.flatten(x, start_dim=1) - for layer in self.fcs: - x = F.relu(layer(x)) - return x - - @property - def output_shape(self): - """ - Returns: - ShapeSpec: the output feature shape - """ - o = self._output_size - if isinstance(o, int): - return ShapeSpec(channels=o) - else: - return ShapeSpec(channels=o[0], height=o[1], width=o[2]) - - -def build_box_head(cfg, input_shape): - """ - Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_BOX_HEAD.NAME - return ROI_BOX_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py deleted file mode 100644 index b3efdcf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/cascade_rcnn.py +++ /dev/null @@ -1,298 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import List -import torch -from torch import nn -from torch.autograd.function import Function - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec -from detectron2.structures import Boxes, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage - -from ..box_regression import Box2BoxTransform -from ..matcher import Matcher -from ..poolers import ROIPooler -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers, fast_rcnn_inference -from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads - - -class _ScaleGradient(Function): - @staticmethod - def forward(ctx, input, scale): - ctx.scale = scale - return input - - @staticmethod - def backward(ctx, grad_output): - return grad_output * ctx.scale, None - - -@ROI_HEADS_REGISTRY.register() -class CascadeROIHeads(StandardROIHeads): - """ - Implement :paper:`Cascade R-CNN`. - """ - - @configurable - def __init__( - self, - *, - box_in_features: List[str], - box_pooler: ROIPooler, - box_heads: List[nn.Module], - box_predictors: List[nn.Module], - proposal_matchers: List[Matcher], - **kwargs, - ): - """ - NOTE: this interface is experimental. - - Args: - box_pooler (ROIPooler): pooler that extracts region features from given boxes - box_heads (list[nn.Module]): box head for each cascade stage - box_predictors (list[nn.Module]): box predictor for each cascade stage - proposal_matchers (list[Matcher]): matcher with different IoU thresholds to - match boxes with ground truth for each stage. The first matcher matches - RPN proposals with ground truth, the other matchers use boxes predicted - by the previous stage as proposals and match them with ground truth. - """ - assert "proposal_matcher" not in kwargs, ( - "CascadeROIHeads takes 'proposal_matchers=' for each stage instead " - "of one 'proposal_matcher='." - ) - # The first matcher matches RPN proposals with ground truth, done in the base class - kwargs["proposal_matcher"] = proposal_matchers[0] - num_stages = self.num_cascade_stages = len(box_heads) - box_heads = nn.ModuleList(box_heads) - box_predictors = nn.ModuleList(box_predictors) - assert len(box_predictors) == num_stages, f"{len(box_predictors)} != {num_stages}!" - assert len(proposal_matchers) == num_stages, f"{len(proposal_matchers)} != {num_stages}!" - super().__init__( - box_in_features=box_in_features, - box_pooler=box_pooler, - box_head=box_heads, - box_predictor=box_predictors, - **kwargs, - ) - self.proposal_matchers = proposal_matchers - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - ret.pop("proposal_matcher") - return ret - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS - cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS - assert len(cascade_bbox_reg_weights) == len(cascade_ious) - assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ - "CascadeROIHeads only support class-agnostic regression now!" - assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features] - # Check all channel counts are equal - assert len(set(in_channels)) == 1, in_channels - in_channels = in_channels[0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - pooled_shape = ShapeSpec( - channels=in_channels, width=pooler_resolution, height=pooler_resolution - ) - - box_heads, box_predictors, proposal_matchers = [], [], [] - for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): - box_head = build_box_head(cfg, pooled_shape) - box_heads.append(box_head) - box_predictors.append( - FastRCNNOutputLayers( - cfg, - box_head.output_shape, - box2box_transform=Box2BoxTransform(weights=bbox_reg_weights), - ) - ) - proposal_matchers.append(Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_heads": box_heads, - "box_predictors": box_predictors, - "proposal_matchers": proposal_matchers, - } - - def forward(self, images, features, proposals, targets=None): - del images - if self.training: - proposals = self.label_and_sample_proposals(proposals, targets) - - if self.training: - # Need targets to box head - losses = self._forward_box(features, proposals, targets) - losses.update(self._forward_mask(features, proposals)) - losses.update(self._forward_keypoint(features, proposals)) - return proposals, losses - else: - pred_instances = self._forward_box(features, proposals) - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def _forward_box(self, features, proposals, targets=None): - """ - Args: - features, targets: the same as in - Same as in :meth:`ROIHeads.forward`. - proposals (list[Instances]): the per-image object proposals with - their matching ground truth. - Each has fields "proposal_boxes", and "objectness_logits", - "gt_classes", "gt_boxes". - """ - features = [features[f] for f in self.box_in_features] - head_outputs = [] # (predictor, predictions, proposals) - prev_pred_boxes = None - image_sizes = [x.image_size for x in proposals] - for k in range(self.num_cascade_stages): - if k > 0: - # The output boxes of the previous stage are used to create the input - # proposals of the next stage. - proposals = self._create_proposals_from_boxes(prev_pred_boxes, image_sizes) - if self.training: - proposals = self._match_and_label_boxes(proposals, k, targets) - predictions = self._run_stage(features, proposals, k) - prev_pred_boxes = self.box_predictor[k].predict_boxes(predictions, proposals) - head_outputs.append((self.box_predictor[k], predictions, proposals)) - - if self.training: - losses = {} - storage = get_event_storage() - for stage, (predictor, predictions, proposals) in enumerate(head_outputs): - with storage.name_scope("stage{}".format(stage)): - stage_losses = predictor.losses(predictions, proposals) - losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()}) - return losses - else: - # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1) - scores_per_stage = [h[0].predict_probs(h[1], h[2]) for h in head_outputs] - - # Average the scores across heads - scores = [ - sum(list(scores_per_image)) * (1.0 / self.num_cascade_stages) - for scores_per_image in zip(*scores_per_stage) - ] - # Use the boxes of the last head - predictor, predictions, proposals = head_outputs[-1] - boxes = predictor.predict_boxes(predictions, proposals) - pred_instances, _ = fast_rcnn_inference( - boxes, - scores, - image_sizes, - predictor.test_score_thresh, - predictor.test_nms_thresh, - predictor.test_topk_per_image, - ) - return pred_instances - - @torch.no_grad() - def _match_and_label_boxes(self, proposals, stage, targets): - """ - Match proposals with groundtruth using the matcher at the given stage. - Label the proposals as foreground or background based on the match. - - Args: - proposals (list[Instances]): One Instances for each image, with - the field "proposal_boxes". - stage (int): the current stage - targets (list[Instances]): the ground truth instances - - Returns: - list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" - """ - num_fg_samples, num_bg_samples = [], [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - match_quality_matrix = pairwise_iou( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - # proposal_labels are 0 or 1 - matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) - if len(targets_per_image) > 0: - gt_classes = targets_per_image.gt_classes[matched_idxs] - # Label unmatched proposals (0 label from matcher) as background (label=num_classes) - gt_classes[proposal_labels == 0] = self.num_classes - gt_boxes = targets_per_image.gt_boxes[matched_idxs] - else: - gt_classes = torch.zeros_like(matched_idxs) + self.num_classes - gt_boxes = Boxes( - targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) - ) - proposals_per_image.gt_classes = gt_classes - proposals_per_image.gt_boxes = gt_boxes - - num_fg_samples.append((proposal_labels == 1).sum().item()) - num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) - - # Log the number of fg/bg samples in each stage - storage = get_event_storage() - storage.put_scalar( - "stage{}/roi_head/num_fg_samples".format(stage), - sum(num_fg_samples) / len(num_fg_samples), - ) - storage.put_scalar( - "stage{}/roi_head/num_bg_samples".format(stage), - sum(num_bg_samples) / len(num_bg_samples), - ) - return proposals - - def _run_stage(self, features, proposals, stage): - """ - Args: - features (list[Tensor]): #lvl input features to ROIHeads - proposals (list[Instances]): #image Instances, with the field "proposal_boxes" - stage (int): the current stage - - Returns: - Same output as `FastRCNNOutputLayers.forward()`. - """ - box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) - # The original implementation averages the losses among heads, - # but scale up the parameter gradients of the heads. - # This is equivalent to adding the losses among heads, - # but scale down the gradients on features. - box_features = _ScaleGradient.apply(box_features, 1.0 / self.num_cascade_stages) - box_features = self.box_head[stage](box_features) - return self.box_predictor[stage](box_features) - - def _create_proposals_from_boxes(self, boxes, image_sizes): - """ - Args: - boxes (list[Tensor]): per-image predicted boxes, each of shape Ri x 4 - image_sizes (list[tuple]): list of image shapes in (h, w) - - Returns: - list[Instances]: per-image proposals with the given boxes. - """ - # Just like RPN, the proposals should not have gradients - boxes = [Boxes(b.detach()) for b in boxes] - proposals = [] - for boxes_per_image, image_size in zip(boxes, image_sizes): - boxes_per_image.clip(image_size) - if self.training: - # do not filter empty boxes at inference time, - # because the scores from each stage need to be aligned and added later - boxes_per_image = boxes_per_image[boxes_per_image.nonempty()] - prop = Instances(image_size) - prop.proposal_boxes = boxes_per_image - proposals.append(prop) - return proposals diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py deleted file mode 100644 index ca796ac..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/fast_rcnn.py +++ /dev/null @@ -1,510 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import torch -from fvcore.nn import smooth_l1_loss -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Linear, ShapeSpec, batched_nms, cat -from detectron2.modeling.box_regression import Box2BoxTransform, apply_deltas_broadcast -from detectron2.structures import Boxes, Instances -from detectron2.utils.events import get_event_storage - -__all__ = ["fast_rcnn_inference", "FastRCNNOutputLayers"] - - -logger = logging.getLogger(__name__) - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - R: number of ROIs, combined over all images, in the minibatch - Ri: number of ROIs in image i - K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. - -Naming convention: - - deltas: refers to the 4-d (dx, dy, dw, dh) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransform`). - - pred_class_logits: predicted class scores in [-inf, +inf]; use - softmax(pred_class_logits) to estimate P(class). - - gt_classes: ground-truth classification labels in [0, K], where [0, K) represent - foreground object classes and K represents the background class. - - pred_proposal_deltas: predicted box2box transform deltas for transforming proposals - to detection box predictions. - - gt_proposal_deltas: ground-truth box2box transform deltas -""" - - -def fast_rcnn_inference(boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image): - """ - Call `fast_rcnn_inference_single_image` for all images. - - Args: - boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic - boxes for each image. Element i has shape (Ri, K * 4) if doing - class-specific regression, or (Ri, 4) if doing class-agnostic - regression, where Ri is the number of predicted objects for image i. - This is compatible with the output of :meth:`FastRCNNOutputLayers.predict_boxes`. - scores (list[Tensor]): A list of Tensors of predicted class scores for each image. - Element i has shape (Ri, K + 1), where Ri is the number of predicted objects - for image i. Compatible with the output of :meth:`FastRCNNOutputLayers.predict_probs`. - image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. - score_thresh (float): Only return detections with a confidence score exceeding this - threshold. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - instances: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections. - kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates - the corresponding boxes/scores index in [0, Ri) from the input, for image i. - """ - result_per_image = [ - fast_rcnn_inference_single_image( - boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image - ) - for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) - ] - return [x[0] for x in result_per_image], [x[1] for x in result_per_image] - - -def fast_rcnn_inference_single_image( - boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image -): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Args: - Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes - per image. - - Returns: - Same as `fast_rcnn_inference`, but for only one image. - """ - valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) - if not valid_mask.all(): - boxes = boxes[valid_mask] - scores = scores[valid_mask] - - scores = scores[:, :-1] - num_bbox_reg_classes = boxes.shape[1] // 4 - # Convert to Boxes to use the `clip` function ... - boxes = Boxes(boxes.reshape(-1, 4)) - boxes.clip(image_shape) - boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4) # R x C x 4 - - # Filter results based on detection scores - filter_mask = scores > score_thresh # R x K - # R' x 2. First column contains indices of the R predictions; - # Second column contains indices of classes. - filter_inds = filter_mask.nonzero() - if num_bbox_reg_classes == 1: - boxes = boxes[filter_inds[:, 0], 0] - else: - boxes = boxes[filter_mask] - scores = scores[filter_mask] - - # Apply per-class NMS - keep = batched_nms(boxes, scores, filter_inds[:, 1], nms_thresh) - if topk_per_image >= 0: - keep = keep[:topk_per_image] - boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] - - result = Instances(image_shape) - result.pred_boxes = Boxes(boxes) - result.scores = scores - result.pred_classes = filter_inds[:, 1] - return result, filter_inds[:, 0] - - -class FastRCNNOutputs(object): - """ - A class that stores information about outputs of a Fast R-CNN head. - It provides methods that are used to decode the outputs of a Fast R-CNN head. - """ - - def __init__( - self, - box2box_transform, - pred_class_logits, - pred_proposal_deltas, - proposals, - smooth_l1_beta=0, - ): - """ - Args: - box2box_transform (Box2BoxTransform/Box2BoxTransformRotated): - box2box transform instance for proposal-to-detection transformations. - pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class - logits for all R predicted object instances. - Each row corresponds to a predicted object instance. - pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for - class-specific or class-agnostic regression. It stores the predicted deltas that - transform proposals into final box detections. - B is the box dimension (4 or 5). - When B is 4, each row is [dx, dy, dw, dh (, ....)]. - When B is 5, each row is [dx, dy, dw, dh, da (, ....)]. - proposals (list[Instances]): A list of N Instances, where Instances i stores the - proposals for image i, in the field "proposal_boxes". - When training, each Instances must have ground-truth labels - stored in the field "gt_classes" and "gt_boxes". - The total number of all instances must be equal to R. - smooth_l1_beta (float): The transition point between L1 and L2 loss in - the smooth L1 loss function. When set to 0, the loss becomes L1. When - set to +inf, the loss becomes constant 0. - """ - self.box2box_transform = box2box_transform - self.num_preds_per_image = [len(p) for p in proposals] - self.pred_class_logits = pred_class_logits - self.pred_proposal_deltas = pred_proposal_deltas - self.smooth_l1_beta = smooth_l1_beta - self.image_shapes = [x.image_size for x in proposals] - - if len(proposals): - box_type = type(proposals[0].proposal_boxes) - # cat(..., dim=0) concatenates over all images in the batch - self.proposals = box_type.cat([p.proposal_boxes for p in proposals]) - assert ( - not self.proposals.tensor.requires_grad - ), "Proposals should not require gradients!" - - # The following fields should exist only when training. - if proposals[0].has("gt_boxes"): - self.gt_boxes = box_type.cat([p.gt_boxes for p in proposals]) - assert proposals[0].has("gt_classes") - self.gt_classes = cat([p.gt_classes for p in proposals], dim=0) - else: - self.proposals = Boxes(torch.zeros(0, 4, device=self.pred_proposal_deltas.device)) - self._no_instances = len(proposals) == 0 # no instances found - - def _log_accuracy(self): - """ - Log the accuracy metrics to EventStorage. - """ - num_instances = self.gt_classes.numel() - pred_classes = self.pred_class_logits.argmax(dim=1) - bg_class_ind = self.pred_class_logits.shape[1] - 1 - - fg_inds = (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind) - num_fg = fg_inds.nonzero().numel() - fg_gt_classes = self.gt_classes[fg_inds] - fg_pred_classes = pred_classes[fg_inds] - - num_false_negative = (fg_pred_classes == bg_class_ind).nonzero().numel() - num_accurate = (pred_classes == self.gt_classes).nonzero().numel() - fg_num_accurate = (fg_pred_classes == fg_gt_classes).nonzero().numel() - - storage = get_event_storage() - if num_instances > 0: - storage.put_scalar("fast_rcnn/cls_accuracy", num_accurate / num_instances) - if num_fg > 0: - storage.put_scalar("fast_rcnn/fg_cls_accuracy", fg_num_accurate / num_fg) - storage.put_scalar("fast_rcnn/false_negative", num_false_negative / num_fg) - - def softmax_cross_entropy_loss(self): - """ - Compute the softmax cross entropy loss for box classification. - - Returns: - scalar Tensor - """ - if self._no_instances: - return 0.0 * self.pred_class_logits.sum() - else: - self._log_accuracy() - return F.cross_entropy(self.pred_class_logits, self.gt_classes, reduction="mean") - - def smooth_l1_loss(self): - """ - Compute the smooth L1 loss for box regression. - - Returns: - scalar Tensor - """ - if self._no_instances: - return 0.0 * self.pred_proposal_deltas.sum() - gt_proposal_deltas = self.box2box_transform.get_deltas( - self.proposals.tensor, self.gt_boxes.tensor - ) - box_dim = gt_proposal_deltas.size(1) # 4 or 5 - cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim - device = self.pred_proposal_deltas.device - - bg_class_ind = self.pred_class_logits.shape[1] - 1 - - # Box delta loss is only computed between the prediction for the gt class k - # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions - # for non-gt classes and background. - # Empty fg_inds produces a valid loss of zero as long as the size_average - # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally - # and would produce a nan loss). - fg_inds = torch.nonzero( - (self.gt_classes >= 0) & (self.gt_classes < bg_class_ind), as_tuple=True - )[0] - if cls_agnostic_bbox_reg: - # pred_proposal_deltas only corresponds to foreground class for agnostic - gt_class_cols = torch.arange(box_dim, device=device) - else: - fg_gt_classes = self.gt_classes[fg_inds] - # pred_proposal_deltas for class k are located in columns [b * k : b * k + b], - # where b is the dimension of box representation (4 or 5) - # Note that compared to Detectron1, - # we do not perform bounding box regression for background classes. - gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(box_dim, device=device) - - loss_box_reg = smooth_l1_loss( - self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols], - gt_proposal_deltas[fg_inds], - self.smooth_l1_beta, - reduction="sum", - ) - # The loss is normalized using the total number of regions (R), not the number - # of foreground regions even though the box regression loss is only defined on - # foreground regions. Why? Because doing so gives equal training influence to - # each foreground example. To see how, consider two different minibatches: - # (1) Contains a single foreground region - # (2) Contains 100 foreground regions - # If we normalize by the number of foreground regions, the single example in - # minibatch (1) will be given 100 times as much influence as each foreground - # example in minibatch (2). Normalizing by the total number of regions, R, - # means that the single example in minibatch (1) and each of the 100 examples - # in minibatch (2) are given equal influence. - loss_box_reg = loss_box_reg / self.gt_classes.numel() - return loss_box_reg - - def _predict_boxes(self): - """ - Returns: - Tensor: A Tensors of predicted class-specific or class-agnostic boxes - for all images in a batch. Element i has shape (Ri, K * B) or (Ri, B), where Ri is - the number of predicted objects for image i and B is the box dimension (4 or 5) - """ - return apply_deltas_broadcast( - self.box2box_transform, self.pred_proposal_deltas, self.proposals.tensor - ) - - """ - A subclass is expected to have the following methods because - they are used to query information about the head predictions. - """ - - def losses(self): - """ - Compute the default losses for box head in Fast(er) R-CNN, - with softmax cross entropy loss and smooth L1 loss. - - Returns: - A dict of losses (scalar tensors) containing keys "loss_cls" and "loss_box_reg". - """ - return { - "loss_cls": self.softmax_cross_entropy_loss(), - "loss_box_reg": self.smooth_l1_loss(), - } - - def predict_boxes(self): - """ - Deprecated - """ - return self._predict_boxes().split(self.num_preds_per_image, dim=0) - - def predict_probs(self): - """ - Deprecated - """ - probs = F.softmax(self.pred_class_logits, dim=-1) - return probs.split(self.num_preds_per_image, dim=0) - - def inference(self, score_thresh, nms_thresh, topk_per_image): - """ - Deprecated - """ - boxes = self.predict_boxes() - scores = self.predict_probs() - image_shapes = self.image_shapes - return fast_rcnn_inference( - boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image - ) - - -class FastRCNNOutputLayers(nn.Module): - """ - Two linear layers for predicting Fast R-CNN outputs: - (1) proposal-to-detection box regression deltas - (2) classification scores - """ - - @configurable - def __init__( - self, - input_shape, - *, - box2box_transform, - num_classes, - cls_agnostic_bbox_reg=False, - smooth_l1_beta=0.0, - test_score_thresh=0.0, - test_nms_thresh=0.5, - test_topk_per_image=100, - ): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature to this module - box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): - num_classes (int): number of foreground classes - cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression - smooth_l1_beta (float): transition point from L1 to L2 loss. - test_score_thresh (float): threshold to filter predictions results. - test_nms_thresh (float): NMS threshold for prediction results. - test_topk_per_image (int): number of top predictions to produce per image. - """ - super().__init__() - if isinstance(input_shape, int): # some backward compatibility - input_shape = ShapeSpec(channels=input_shape) - input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) - # The prediction layer for num_classes foreground classes and one background class - # (hence + 1) - self.cls_score = Linear(input_size, num_classes + 1) - num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes - box_dim = len(box2box_transform.weights) - self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) - - nn.init.normal_(self.cls_score.weight, std=0.01) - nn.init.normal_(self.bbox_pred.weight, std=0.001) - for l in [self.cls_score, self.bbox_pred]: - nn.init.constant_(l.bias, 0) - - self.box2box_transform = box2box_transform - self.smooth_l1_beta = smooth_l1_beta - self.test_score_thresh = test_score_thresh - self.test_nms_thresh = test_nms_thresh - self.test_topk_per_image = test_topk_per_image - - @classmethod - def from_config(cls, cfg, input_shape): - return { - "input_shape": input_shape, - "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), - # fmt: off - "num_classes" : cfg.MODEL.ROI_HEADS.NUM_CLASSES, - "cls_agnostic_bbox_reg" : cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, - "smooth_l1_beta" : cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, - "test_score_thresh" : cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, - "test_nms_thresh" : cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, - "test_topk_per_image" : cfg.TEST.DETECTIONS_PER_IMAGE - # fmt: on - } - - def forward(self, x): - """ - Returns: - Tensor: Nx(K+1) scores for each box - Tensor: Nx4 or Nx(Kx4) bounding box regression deltas. - """ - if x.dim() > 2: - x = torch.flatten(x, start_dim=1) - scores = self.cls_score(x) - proposal_deltas = self.bbox_pred(x) - return scores, proposal_deltas - - # TODO: move the implementation to this class. - def losses(self, predictions, proposals): - """ - Args: - predictions: return values of :meth:`forward()`. - proposals (list[Instances]): proposals that match the features - that were used to compute predictions. - """ - scores, proposal_deltas = predictions - return FastRCNNOutputs( - self.box2box_transform, scores, proposal_deltas, proposals, self.smooth_l1_beta - ).losses() - - def inference(self, predictions, proposals): - """ - Returns: - list[Instances]: same as `fast_rcnn_inference`. - list[Tensor]: same as `fast_rcnn_inference`. - """ - boxes = self.predict_boxes(predictions, proposals) - scores = self.predict_probs(predictions, proposals) - image_shapes = [x.image_size for x in proposals] - return fast_rcnn_inference( - boxes, - scores, - image_shapes, - self.test_score_thresh, - self.test_nms_thresh, - self.test_topk_per_image, - ) - - def predict_boxes_for_gt_classes(self, predictions, proposals): - """ - Returns: - list[Tensor]: A list of Tensors of predicted boxes for GT classes in case of - class-specific box head. Element i of the list has shape (Ri, B), where Ri is - the number of predicted objects for image i and B is the box dimension (4 or 5) - """ - if not len(proposals): - return [] - scores, proposal_deltas = predictions - proposal_boxes = [p.proposal_boxes for p in proposals] - proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor - N, B = proposal_boxes.shape - predict_boxes = apply_deltas_broadcast( - self.box2box_transform, proposal_deltas, proposal_boxes - ) # Nx(KxB) - - K = predict_boxes.shape[1] // B - if K > 1: - gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) - # Some proposals are ignored or have a background class. Their gt_classes - # cannot be used as index. - gt_classes = gt_classes.clamp_(0, K - 1) - - predict_boxes = predict_boxes.view(N, K, B)[ - torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes - ] - num_prop_per_image = [len(p) for p in proposals] - return predict_boxes.split(num_prop_per_image) - - def predict_boxes(self, predictions, proposals): - """ - Returns: - list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes - for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is - the number of predicted objects for image i and B is the box dimension (4 or 5) - """ - if not len(proposals): - return [] - _, proposal_deltas = predictions - num_prop_per_image = [len(p) for p in proposals] - proposal_boxes = [p.proposal_boxes for p in proposals] - proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor - predict_boxes = apply_deltas_broadcast( - self.box2box_transform, proposal_deltas, proposal_boxes - ) # Nx(KxB) - return predict_boxes.split(num_prop_per_image) - - def predict_probs(self, predictions, proposals): - """ - Returns: - list[Tensor]: A list of Tensors of predicted class probabilities for each image. - Element i has shape (Ri, K + 1), where Ri is the number of predicted objects - for image i. - """ - scores, _ = predictions - num_inst_per_image = [len(p) for p in proposals] - probs = F.softmax(scores, dim=-1) - return probs.split(num_inst_per_image, dim=0) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py deleted file mode 100644 index c7990c8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/keypoint_head.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import List -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ConvTranspose2d, cat, interpolate -from detectron2.structures import Instances, heatmaps_to_keypoints -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -_TOTAL_SKIPPED = 0 - -ROI_KEYPOINT_HEAD_REGISTRY = Registry("ROI_KEYPOINT_HEAD") -ROI_KEYPOINT_HEAD_REGISTRY.__doc__ = """ -Registry for keypoint heads, which make keypoint predictions from per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -def build_keypoint_head(cfg, input_shape): - """ - Build a keypoint head from `cfg.MODEL.ROI_KEYPOINT_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_KEYPOINT_HEAD.NAME - return ROI_KEYPOINT_HEAD_REGISTRY.get(name)(cfg, input_shape) - - -def keypoint_rcnn_loss(pred_keypoint_logits, instances, normalizer): - """ - Arguments: - pred_keypoint_logits (Tensor): A tensor of shape (N, K, S, S) where N is the total number - of instances in the batch, K is the number of keypoints, and S is the side length - of the keypoint heatmap. The values are spatial logits. - instances (list[Instances]): A list of M Instances, where M is the batch size. - These instances are predictions from the model - that are in 1:1 correspondence with pred_keypoint_logits. - Each Instances should contain a `gt_keypoints` field containing a `structures.Keypoint` - instance. - normalizer (float): Normalize the loss by this amount. - If not specified, we normalize by the number of visible keypoints in the minibatch. - - Returns a scalar tensor containing the loss. - """ - heatmaps = [] - valid = [] - - keypoint_side_len = pred_keypoint_logits.shape[2] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - keypoints = instances_per_image.gt_keypoints - heatmaps_per_image, valid_per_image = keypoints.to_heatmap( - instances_per_image.proposal_boxes.tensor, keypoint_side_len - ) - heatmaps.append(heatmaps_per_image.view(-1)) - valid.append(valid_per_image.view(-1)) - - if len(heatmaps): - keypoint_targets = cat(heatmaps, dim=0) - valid = cat(valid, dim=0).to(dtype=torch.uint8) - valid = torch.nonzero(valid).squeeze(1) - - # torch.mean (in binary_cross_entropy_with_logits) doesn't - # accept empty tensors, so handle it separately - if len(heatmaps) == 0 or valid.numel() == 0: - global _TOTAL_SKIPPED - _TOTAL_SKIPPED += 1 - storage = get_event_storage() - storage.put_scalar("kpts_num_skipped_batches", _TOTAL_SKIPPED, smoothing_hint=False) - return pred_keypoint_logits.sum() * 0 - - N, K, H, W = pred_keypoint_logits.shape - pred_keypoint_logits = pred_keypoint_logits.view(N * K, H * W) - - keypoint_loss = F.cross_entropy( - pred_keypoint_logits[valid], keypoint_targets[valid], reduction="sum" - ) - - # If a normalizer isn't specified, normalize by the number of visible keypoints in the minibatch - if normalizer is None: - normalizer = valid.numel() - keypoint_loss /= normalizer - - return keypoint_loss - - -def keypoint_rcnn_inference(pred_keypoint_logits, pred_instances): - """ - Post process each predicted keypoint heatmap in `pred_keypoint_logits` into (x, y, score) - and add it to the `pred_instances` as a `pred_keypoints` field. - - Args: - pred_keypoint_logits (Tensor): A tensor of shape (R, K, S, S) where R is the total number - of instances in the batch, K is the number of keypoints, and S is the side length of - the keypoint heatmap. The values are spatial logits. - pred_instances (list[Instances]): A list of N Instances, where N is the number of images. - - Returns: - None. Each element in pred_instances will contain an extra "pred_keypoints" field. - The field is a tensor of shape (#instance, K, 3) where the last - dimension corresponds to (x, y, score). - The scores are larger than 0. - """ - # flatten all bboxes from all images together (list[Boxes] -> Rx4 tensor) - bboxes_flat = cat([b.pred_boxes.tensor for b in pred_instances], dim=0) - - keypoint_results = heatmaps_to_keypoints(pred_keypoint_logits.detach(), bboxes_flat.detach()) - num_instances_per_image = [len(i) for i in pred_instances] - keypoint_results = keypoint_results[:, :, [0, 1, 3]].split(num_instances_per_image, dim=0) - - for keypoint_results_per_image, instances_per_image in zip(keypoint_results, pred_instances): - # keypoint_results_per_image is (num instances)x(num keypoints)x(x, y, score) - instances_per_image.pred_keypoints = keypoint_results_per_image - - -class BaseKeypointRCNNHead(nn.Module): - """ - Implement the basic Keypoint R-CNN losses and inference logic described in :paper:`Mask R-CNN`. - """ - - @configurable - def __init__(self, *, num_keypoints, loss_weight=1.0, loss_normalizer=1.0): - """ - NOTE: this interface is experimental. - - Args: - num_keypoints (int): number of keypoints to predict - loss_weight (float): weight to multiple on the keypoint loss - loss_normalizer (float or str): - If float, divide the loss by `loss_normalizer * #images`. - If 'visible', the loss is normalized by the total number of - visible keypoints across images. - """ - super().__init__() - self.num_keypoints = num_keypoints - self.loss_weight = loss_weight - assert loss_normalizer == "visible" or isinstance(loss_normalizer, float), loss_normalizer - self.loss_normalizer = loss_normalizer - - @classmethod - def from_config(cls, cfg, input_shape): - ret = { - "loss_weight": cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT, - "num_keypoints": cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS, - } - normalize_by_visible = ( - cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS - ) # noqa - if not normalize_by_visible: - batch_size_per_image = cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE - positive_sample_fraction = cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION - ret["loss_normalizer"] = ( - ret["num_keypoints"] * batch_size_per_image * positive_sample_fraction - ) - else: - ret["loss_normalizer"] = "visible" - return ret - - def forward(self, x, instances: List[Instances]): - """ - Args: - x: input region feature(s) provided by :class:`ROIHeads`. - instances (list[Instances]): contains the boxes & labels corresponding - to the input features. - Exact format is up to its caller to decide. - Typically, this is the foreground instances in training, with - "proposal_boxes" field and other gt annotations. - In inference, it contains boxes that are already predicted. - - Returns: - A dict of losses if in training. The predicted "instances" if in inference. - """ - x = self.layers(x) - if self.training: - num_images = len(instances) - normalizer = ( - None if self.loss_normalizer == "visible" else num_images * self.loss_normalizer - ) - return { - "loss_keypoint": keypoint_rcnn_loss(x, instances, normalizer=normalizer) - * self.loss_weight - } - else: - keypoint_rcnn_inference(x, instances) - return instances - - def layers(self, x): - """ - Neural network layers that makes predictions from regional input features. - """ - raise NotImplementedError - - -@ROI_KEYPOINT_HEAD_REGISTRY.register() -class KRCNNConvDeconvUpsampleHead(BaseKeypointRCNNHead): - """ - A standard keypoint head containing a series of 3x3 convs, followed by - a transpose convolution and bilinear interpolation for upsampling. - """ - - @configurable - def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - conv_dims: an iterable of output channel counts for each conv in the head - e.g. (512, 512, 512) for three convs outputting 512 channels. - """ - super().__init__(num_keypoints=num_keypoints, **kwargs) - - # default up_scale to 2 (this can be made an option) - up_scale = 2 - in_channels = input_shape.channels - - self.blocks = [] - for idx, layer_channels in enumerate(conv_dims, 1): - module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) - self.add_module("conv_fcn{}".format(idx), module) - self.blocks.append(module) - in_channels = layer_channels - - deconv_kernel = 4 - self.score_lowres = ConvTranspose2d( - in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1 - ) - self.up_scale = up_scale - - for name, param in self.named_parameters(): - if "bias" in name: - nn.init.constant_(param, 0) - elif "weight" in name: - # Caffe2 implementation uses MSRAFill, which in fact - # corresponds to kaiming_normal_ in PyTorch - nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - ret["input_shape"] = input_shape - ret["conv_dims"] = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS - return ret - - def layers(self, x): - for layer in self.blocks: - x = F.relu(layer(x)) - x = self.score_lowres(x) - x = interpolate(x, scale_factor=self.up_scale, mode="bilinear", align_corners=False) - return x diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py deleted file mode 100644 index 5209722..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/mask_head.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import List -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import configurable -from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm -from detectron2.structures import Instances -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -ROI_MASK_HEAD_REGISTRY = Registry("ROI_MASK_HEAD") -ROI_MASK_HEAD_REGISTRY.__doc__ = """ -Registry for mask heads, which predicts instance masks given -per-region features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -def mask_rcnn_loss(pred_mask_logits, instances, vis_period=0): - """ - Compute the mask prediction loss defined in the Mask R-CNN paper. - - Args: - pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) - for class-specific or class-agnostic, where B is the total number of predicted masks - in all images, C is the number of foreground classes, and Hmask, Wmask are the height - and width of the mask predictions. The values are logits. - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. These instances are in 1:1 - correspondence with the pred_mask_logits. The ground-truth labels (class, box, mask, - ...) associated with each instance are stored in fields. - vis_period (int): the period (in steps) to dump visualization. - - Returns: - mask_loss (Tensor): A scalar tensor containing the loss. - """ - cls_agnostic_mask = pred_mask_logits.size(1) == 1 - total_num_masks = pred_mask_logits.size(0) - mask_side_len = pred_mask_logits.size(2) - assert pred_mask_logits.size(2) == pred_mask_logits.size(3), "Mask prediction must be square!" - - gt_classes = [] - gt_masks = [] - for instances_per_image in instances: - if len(instances_per_image) == 0: - continue - if not cls_agnostic_mask: - gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) - gt_classes.append(gt_classes_per_image) - - gt_masks_per_image = instances_per_image.gt_masks.crop_and_resize( - instances_per_image.proposal_boxes.tensor, mask_side_len - ).to(device=pred_mask_logits.device) - # A tensor of shape (N, M, M), N=#instances in the image; M=mask_side_len - gt_masks.append(gt_masks_per_image) - - if len(gt_masks) == 0: - return pred_mask_logits.sum() * 0 - - gt_masks = cat(gt_masks, dim=0) - - if cls_agnostic_mask: - pred_mask_logits = pred_mask_logits[:, 0] - else: - indices = torch.arange(total_num_masks) - gt_classes = cat(gt_classes, dim=0) - pred_mask_logits = pred_mask_logits[indices, gt_classes] - - if gt_masks.dtype == torch.bool: - gt_masks_bool = gt_masks - else: - # Here we allow gt_masks to be float as well (depend on the implementation of rasterize()) - gt_masks_bool = gt_masks > 0.5 - gt_masks = gt_masks.to(dtype=torch.float32) - - # Log the training accuracy (using gt classes and 0.5 threshold) - mask_incorrect = (pred_mask_logits > 0.0) != gt_masks_bool - mask_accuracy = 1 - (mask_incorrect.sum().item() / max(mask_incorrect.numel(), 1.0)) - num_positive = gt_masks_bool.sum().item() - false_positive = (mask_incorrect & ~gt_masks_bool).sum().item() / max( - gt_masks_bool.numel() - num_positive, 1.0 - ) - false_negative = (mask_incorrect & gt_masks_bool).sum().item() / max(num_positive, 1.0) - - storage = get_event_storage() - storage.put_scalar("mask_rcnn/accuracy", mask_accuracy) - storage.put_scalar("mask_rcnn/false_positive", false_positive) - storage.put_scalar("mask_rcnn/false_negative", false_negative) - if vis_period > 0 and storage.iter % vis_period == 0: - pred_masks = pred_mask_logits.sigmoid() - vis_masks = torch.cat([pred_masks, gt_masks], axis=2) - name = "Left: mask prediction; Right: mask GT" - for idx, vis_mask in enumerate(vis_masks): - vis_mask = torch.stack([vis_mask] * 3, axis=0) - storage.put_image(name + f" ({idx})", vis_mask) - - mask_loss = F.binary_cross_entropy_with_logits(pred_mask_logits, gt_masks, reduction="mean") - return mask_loss - - -def mask_rcnn_inference(pred_mask_logits, pred_instances): - """ - Convert pred_mask_logits to estimated foreground probability masks while also - extracting only the masks for the predicted classes in pred_instances. For each - predicted box, the mask of the same class is attached to the instance by adding a - new "pred_masks" field to pred_instances. - - Args: - pred_mask_logits (Tensor): A tensor of shape (B, C, Hmask, Wmask) or (B, 1, Hmask, Wmask) - for class-specific or class-agnostic, where B is the total number of predicted masks - in all images, C is the number of foreground classes, and Hmask, Wmask are the height - and width of the mask predictions. The values are logits. - pred_instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. Each Instances must have field "pred_classes". - - Returns: - None. pred_instances will contain an extra "pred_masks" field storing a mask of size (Hmask, - Wmask) for predicted class. Note that the masks are returned as a soft (non-quantized) - masks the resolution predicted by the network; post-processing steps, such as resizing - the predicted masks to the original image resolution and/or binarizing them, is left - to the caller. - """ - cls_agnostic_mask = pred_mask_logits.size(1) == 1 - - if cls_agnostic_mask: - mask_probs_pred = pred_mask_logits.sigmoid() - else: - # Select masks corresponding to the predicted classes - num_masks = pred_mask_logits.shape[0] - class_pred = cat([i.pred_classes for i in pred_instances]) - indices = torch.arange(num_masks, device=class_pred.device) - mask_probs_pred = pred_mask_logits[indices, class_pred][:, None].sigmoid() - # mask_probs_pred.shape: (B, 1, Hmask, Wmask) - - num_boxes_per_image = [len(i) for i in pred_instances] - mask_probs_pred = mask_probs_pred.split(num_boxes_per_image, dim=0) - - for prob, instances in zip(mask_probs_pred, pred_instances): - instances.pred_masks = prob # (1, Hmask, Wmask) - - -class BaseMaskRCNNHead(nn.Module): - """ - Implement the basic Mask R-CNN losses and inference logic described in :paper:`Mask R-CNN` - """ - - @configurable - def __init__(self, *, vis_period=0): - """ - NOTE: this interface is experimental. - - Args: - vis_period (int): visualization period - """ - super().__init__() - self.vis_period = vis_period - - @classmethod - def from_config(cls, cfg, input_shape): - return {"vis_period": cfg.VIS_PERIOD} - - def forward(self, x, instances: List[Instances]): - """ - Args: - x: input region feature(s) provided by :class:`ROIHeads`. - instances (list[Instances]): contains the boxes & labels corresponding - to the input features. - Exact format is up to its caller to decide. - Typically, this is the foreground instances in training, with - "proposal_boxes" field and other gt annotations. - In inference, it contains boxes that are already predicted. - - Returns: - A dict of losses in training. The predicted "instances" in inference. - """ - x = self.layers(x) - if self.training: - return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period)} - else: - mask_rcnn_inference(x, instances) - return instances - - def layers(self, x): - """ - Neural network layers that makes predictions from input features. - """ - raise NotImplementedError - - -@ROI_MASK_HEAD_REGISTRY.register() -class MaskRCNNConvUpsampleHead(BaseMaskRCNNHead): - """ - A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). - Predictions are made with a final 1x1 conv layer. - """ - - @configurable - def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): - """ - NOTE: this interface is experimental. - - Args: - input_shape (ShapeSpec): shape of the input feature - num_classes (int): the number of classes. 1 if using class agnostic prediction. - conv_dims (list[int]): a list of N>0 integers representing the output dimensions - of N-1 conv layers and the last upsample layer. - conv_norm (str or callable): normalization for the conv layers. - See :func:`detectron2.layers.get_norm` for supported types. - """ - super().__init__(**kwargs) - assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" - - self.conv_norm_relus = [] - - cur_channels = input_shape.channels - for k, conv_dim in enumerate(conv_dims[:-1]): - conv = Conv2d( - cur_channels, - conv_dim, - kernel_size=3, - stride=1, - padding=1, - bias=not conv_norm, - norm=get_norm(conv_norm, conv_dim), - activation=F.relu, - ) - self.add_module("mask_fcn{}".format(k + 1), conv) - self.conv_norm_relus.append(conv) - cur_channels = conv_dim - - self.deconv = ConvTranspose2d( - cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0 - ) - cur_channels = conv_dims[-1] - - self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0) - - for layer in self.conv_norm_relus + [self.deconv]: - weight_init.c2_msra_fill(layer) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.predictor.weight, std=0.001) - if self.predictor.bias is not None: - nn.init.constant_(self.predictor.bias, 0) - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg, input_shape) - conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM - num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV - ret.update( - conv_dims=[conv_dim] * (num_conv + 1), # +1 for ConvTranspose - conv_norm=cfg.MODEL.ROI_MASK_HEAD.NORM, - input_shape=input_shape, - ) - if cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK: - ret["num_classes"] = 1 - else: - ret["num_classes"] = cfg.MODEL.ROI_HEADS.NUM_CLASSES - return ret - - def layers(self, x): - for layer in self.conv_norm_relus: - x = layer(x) - x = F.relu(self.deconv(x)) - return self.predictor(x) - - -def build_mask_head(cfg, input_shape): - """ - Build a mask head defined by `cfg.MODEL.ROI_MASK_HEAD.NAME`. - """ - name = cfg.MODEL.ROI_MASK_HEAD.NAME - return ROI_MASK_HEAD_REGISTRY.get(name)(cfg, input_shape) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py deleted file mode 100644 index f35588e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/roi_heads.py +++ /dev/null @@ -1,812 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import inspect -import logging -import numpy as np -from typing import Dict, List, Optional, Tuple, Union -import torch -from torch import nn - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec -from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -from ..backbone.resnet import BottleneckBlock, make_stage -from ..matcher import Matcher -from ..poolers import ROIPooler -from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals -from ..sampling import subsample_labels -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers -from .keypoint_head import build_keypoint_head -from .mask_head import build_mask_head - -ROI_HEADS_REGISTRY = Registry("ROI_HEADS") -ROI_HEADS_REGISTRY.__doc__ = """ -Registry for ROI heads in a generalized R-CNN model. -ROIHeads take feature maps and region proposals, and -perform per-region computation. - -The registered object will be called with `obj(cfg, input_shape)`. -The call is expected to return an :class:`ROIHeads`. -""" - -logger = logging.getLogger(__name__) - - -def build_roi_heads(cfg, input_shape): - """ - Build ROIHeads defined by `cfg.MODEL.ROI_HEADS.NAME`. - """ - name = cfg.MODEL.ROI_HEADS.NAME - return ROI_HEADS_REGISTRY.get(name)(cfg, input_shape) - - -def select_foreground_proposals( - proposals: List[Instances], bg_label: int -) -> Tuple[List[Instances], List[torch.Tensor]]: - """ - Given a list of N Instances (for N images), each containing a `gt_classes` field, - return a list of Instances that contain only instances with `gt_classes != -1 && - gt_classes != bg_label`. - - Args: - proposals (list[Instances]): A list of N Instances, where N is the number of - images in the batch. - bg_label: label index of background class. - - Returns: - list[Instances]: N Instances, each contains only the selected foreground instances. - list[Tensor]: N boolean vector, correspond to the selection mask of - each Instances object. True for selected instances. - """ - assert isinstance(proposals, (list, tuple)) - assert isinstance(proposals[0], Instances) - assert proposals[0].has("gt_classes") - fg_proposals = [] - fg_selection_masks = [] - for proposals_per_image in proposals: - gt_classes = proposals_per_image.gt_classes - fg_selection_mask = (gt_classes != -1) & (gt_classes != bg_label) - fg_idxs = fg_selection_mask.nonzero().squeeze(1) - fg_proposals.append(proposals_per_image[fg_idxs]) - fg_selection_masks.append(fg_selection_mask) - return fg_proposals, fg_selection_masks - - -def select_proposals_with_visible_keypoints(proposals: List[Instances]) -> List[Instances]: - """ - Args: - proposals (list[Instances]): a list of N Instances, where N is the - number of images. - - Returns: - proposals: only contains proposals with at least one visible keypoint. - - Note that this is still slightly different from Detectron. - In Detectron, proposals for training keypoint head are re-sampled from - all the proposals with IOU>threshold & >=1 visible keypoint. - - Here, the proposals are first sampled from all proposals with - IOU>threshold, then proposals with no visible keypoint are filtered out. - This strategy seems to make no difference on Detectron and is easier to implement. - """ - ret = [] - all_num_fg = [] - for proposals_per_image in proposals: - # If empty/unannotated image (hard negatives), skip filtering for train - if len(proposals_per_image) == 0: - ret.append(proposals_per_image) - continue - gt_keypoints = proposals_per_image.gt_keypoints.tensor - # #fg x K x 3 - vis_mask = gt_keypoints[:, :, 2] >= 1 - xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] - proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(dim=1) # #fg x 1 x 4 - kp_in_box = ( - (xs >= proposal_boxes[:, :, 0]) - & (xs <= proposal_boxes[:, :, 2]) - & (ys >= proposal_boxes[:, :, 1]) - & (ys <= proposal_boxes[:, :, 3]) - ) - selection = (kp_in_box & vis_mask).any(dim=1) - selection_idxs = torch.nonzero(selection, as_tuple=True)[0] - all_num_fg.append(selection_idxs.numel()) - ret.append(proposals_per_image[selection_idxs]) - - storage = get_event_storage() - storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) - return ret - - -class ROIHeads(torch.nn.Module): - """ - ROIHeads perform all per-region computation in an R-CNN. - - It typically contains logic to - 1. (in training only) match proposals with ground truth and sample them - 2. crop the regions and extract per-region features using proposals - 3. make per-region predictions with different heads - - It can have many variants, implemented as subclasses of this class. - This base class contains the logic to match/sample proposals. - But it is not necessary to inherit this class if the sampling logic is not needed. - """ - - @configurable - def __init__( - self, - *, - num_classes, - batch_size_per_image, - positive_sample_fraction, - proposal_matcher, - proposal_append_gt=True - ): - """ - NOTE: this interface is experimental. - - Args: - num_classes (int): number of classes. Used to label background proposals. - batch_size_per_image (int): number of proposals to use for training - positive_sample_fraction (float): fraction of positive (foreground) proposals - to use for training. - proposal_matcher (Matcher): matcher that matches proposals and ground truth - proposal_append_gt (bool): whether to include ground truth as proposals as well - """ - super().__init__() - self.batch_size_per_image = batch_size_per_image - self.positive_sample_fraction = positive_sample_fraction - self.num_classes = num_classes - self.proposal_matcher = proposal_matcher - self.proposal_append_gt = proposal_append_gt - - @classmethod - def from_config(cls, cfg): - return { - "batch_size_per_image": cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, - "positive_sample_fraction": cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION, - "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, - "proposal_append_gt": cfg.MODEL.ROI_HEADS.PROPOSAL_APPEND_GT, - # Matcher to assign box proposals to gt boxes - "proposal_matcher": Matcher( - cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS, - cfg.MODEL.ROI_HEADS.IOU_LABELS, - allow_low_quality_matches=False, - ), - } - - def _sample_proposals( - self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Based on the matching between N proposals and M groundtruth, - sample the proposals and set their classification labels. - - Args: - matched_idxs (Tensor): a vector of length N, each is the best-matched - gt index in [0, M) for each proposal. - matched_labels (Tensor): a vector of length N, the matcher's label - (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. - gt_classes (Tensor): a vector of length M. - - Returns: - Tensor: a vector of indices of sampled proposals. Each is in [0, N). - Tensor: a vector of the same length, the classification label for - each sampled proposal. Each sample is labeled as either a category in - [0, num_classes) or the background (num_classes). - """ - has_gt = gt_classes.numel() > 0 - # Get the corresponding GT for each proposal - if has_gt: - gt_classes = gt_classes[matched_idxs] - # Label unmatched proposals (0 label from matcher) as background (label=num_classes) - gt_classes[matched_labels == 0] = self.num_classes - # Label ignore proposals (-1 label) - gt_classes[matched_labels == -1] = -1 - else: - gt_classes = torch.zeros_like(matched_idxs) + self.num_classes - - sampled_fg_idxs, sampled_bg_idxs = subsample_labels( - gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes - ) - - sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) - return sampled_idxs, gt_classes[sampled_idxs] - - @torch.no_grad() - def label_and_sample_proposals( - self, proposals: List[Instances], targets: List[Instances] - ) -> List[Instances]: - """ - Prepare some proposals to be used to train the ROI heads. - It performs box matching between `proposals` and `targets`, and assigns - training labels to the proposals. - It returns ``self.batch_size_per_image`` random samples from proposals and groundtruth - boxes, with a fraction of positives that is no larger than - ``self.positive_sample_fraction``. - - Args: - See :meth:`ROIHeads.forward` - - Returns: - list[Instances]: - length `N` list of `Instances`s containing the proposals - sampled for training. Each `Instances` has the following fields: - - - proposal_boxes: the proposal boxes - - gt_boxes: the ground-truth box that the proposal is assigned to - (this is only meaningful if the proposal has a label > 0; if label = 0 - then the ground-truth box is random) - - Other fields such as "gt_classes", "gt_masks", that's included in `targets`. - """ - gt_boxes = [x.gt_boxes for x in targets] - # Augment proposals with ground-truth boxes. - # In the case of learned proposals (e.g., RPN), when training starts - # the proposals will be low quality due to random initialization. - # It's possible that none of these initial - # proposals have high enough overlap with the gt objects to be used - # as positive examples for the second stage components (box head, - # cls head, mask head). Adding the gt boxes to the set of proposals - # ensures that the second stage components will have some positive - # examples from the start of training. For RPN, this augmentation improves - # convergence and empirically improves box AP on COCO by about 0.5 - # points (under one tested configuration). - if self.proposal_append_gt: - proposals = add_ground_truth_to_proposals(gt_boxes, proposals) - - proposals_with_gt = [] - - num_fg_samples = [] - num_bg_samples = [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - has_gt = len(targets_per_image) > 0 - match_quality_matrix = pairwise_iou( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) - sampled_idxs, gt_classes = self._sample_proposals( - matched_idxs, matched_labels, targets_per_image.gt_classes - ) - - # Set target attributes of the sampled proposals: - proposals_per_image = proposals_per_image[sampled_idxs] - proposals_per_image.gt_classes = gt_classes - - # We index all the attributes of targets that start with "gt_" - # and have not been added to proposals yet (="gt_classes"). - if has_gt: - sampled_targets = matched_idxs[sampled_idxs] - # NOTE: here the indexing waste some compute, because heads - # like masks, keypoints, etc, will filter the proposals again, - # (by foreground/background, or number of keypoints in the image, etc) - # so we essentially index the data twice. - for (trg_name, trg_value) in targets_per_image.get_fields().items(): - if trg_name.startswith("gt_") and not proposals_per_image.has(trg_name): - proposals_per_image.set(trg_name, trg_value[sampled_targets]) - else: - gt_boxes = Boxes( - targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 4)) - ) - proposals_per_image.gt_boxes = gt_boxes - - num_bg_samples.append((gt_classes == self.num_classes).sum().item()) - num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) - proposals_with_gt.append(proposals_per_image) - - # Log the number of fg/bg samples that are selected for training ROI heads - storage = get_event_storage() - storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) - storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) - - return proposals_with_gt - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: - """ - Args: - images (ImageList): - features (dict[str,Tensor]): input data as a mapping from feature - map name to tensor. Axis 0 represents the number of images `N` in - the input data; axes 1-3 are channels, height, and width, which may - vary between feature maps (e.g., if a feature pyramid is used). - proposals (list[Instances]): length `N` list of `Instances`. The i-th - `Instances` contains object proposals for the i-th input image, - with fields "proposal_boxes" and "objectness_logits". - targets (list[Instances], optional): length `N` list of `Instances`. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. Specify `targets` during training only. - It may have the following fields: - - - gt_boxes: the bounding box of each instance. - - gt_classes: the label for each instance with a category ranging in [0, #class]. - - gt_masks: PolygonMasks or BitMasks, the ground-truth masks of each instance. - - gt_keypoints: NxKx3, the groud-truth keypoints for each instance. - - Returns: - list[Instances]: length `N` list of `Instances` containing the - detected instances. Returned during inference only; may be [] during training. - - dict[str->Tensor]: - mapping from a named loss to a tensor storing the loss. Used during training only. - """ - raise NotImplementedError() - - -@ROI_HEADS_REGISTRY.register() -class Res5ROIHeads(ROIHeads): - """ - The ROIHeads in a typical "C4" R-CNN model, where - the box and mask head share the cropping and - the per-region feature computation by a Res5 block. - """ - - def __init__(self, cfg, input_shape): - super().__init__(cfg) - - # fmt: off - self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, ) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - self.mask_on = cfg.MODEL.MASK_ON - # fmt: on - assert not cfg.MODEL.KEYPOINT_ON - assert len(self.in_features) == 1 - - self.pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - - self.res5, out_channels = self._build_res5_block(cfg) - self.box_predictor = FastRCNNOutputLayers( - cfg, ShapeSpec(channels=out_channels, height=1, width=1) - ) - - if self.mask_on: - self.mask_head = build_mask_head( - cfg, - ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), - ) - - def _build_res5_block(self, cfg): - # fmt: off - stage_channel_factor = 2 ** 3 # res5 is 8x res2 - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group * stage_channel_factor - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - norm = cfg.MODEL.RESNETS.NORM - assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ - "Deformable conv is not yet supported in res5 head." - # fmt: on - - blocks = make_stage( - BottleneckBlock, - 3, - first_stride=2, - in_channels=out_channels // 2, - bottleneck_channels=bottleneck_channels, - out_channels=out_channels, - num_groups=num_groups, - norm=norm, - stride_in_1x1=stride_in_1x1, - ) - return nn.Sequential(*blocks), out_channels - - def _shared_roi_transform(self, features, boxes): - x = self.pooler(features, boxes) - return self.res5(x) - - def forward(self, images, features, proposals, targets=None): - """ - See :meth:`ROIHeads.forward`. - """ - del images - - if self.training: - assert targets - proposals = self.label_and_sample_proposals(proposals, targets) - del targets - - proposal_boxes = [x.proposal_boxes for x in proposals] - box_features = self._shared_roi_transform( - [features[f] for f in self.in_features], proposal_boxes - ) - predictions = self.box_predictor(box_features.mean(dim=[2, 3])) - - if self.training: - del features - losses = self.box_predictor.losses(predictions, proposals) - if self.mask_on: - proposals, fg_selection_masks = select_foreground_proposals( - proposals, self.num_classes - ) - # Since the ROI feature transform is shared between boxes and masks, - # we don't need to recompute features. The mask loss is only defined - # on foreground proposals, so we need to select out the foreground - # features. - mask_features = box_features[torch.cat(fg_selection_masks, dim=0)] - del box_features - losses.update(self.mask_head(mask_features, proposals)) - return [], losses - else: - pred_instances, _ = self.box_predictor.inference(predictions, proposals) - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def forward_with_given_boxes(self, features, instances): - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - instances (Instances): - the same `Instances` object, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - assert not self.training - assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") - - if self.mask_on: - features = [features[f] for f in self.in_features] - x = self._shared_roi_transform(features, [x.pred_boxes for x in instances]) - return self.mask_head(x, instances) - else: - return instances - - -@ROI_HEADS_REGISTRY.register() -class StandardROIHeads(ROIHeads): - """ - It's "standard" in a sense that there is no ROI transform sharing - or feature sharing between tasks. - Each head independently processes the input features by each head's - own pooler and head. - - This class is used by most models, such as FPN and C5. - To implement more models, you can subclass it and implement a different - :meth:`forward()` or a head. - """ - - @configurable - def __init__( - self, - *, - box_in_features: List[str], - box_pooler: ROIPooler, - box_head: nn.Module, - box_predictor: nn.Module, - mask_in_features: Optional[List[str]] = None, - mask_pooler: Optional[ROIPooler] = None, - mask_head: Optional[nn.Module] = None, - keypoint_in_features: Optional[List[str]] = None, - keypoint_pooler: Optional[ROIPooler] = None, - keypoint_head: Optional[nn.Module] = None, - train_on_pred_boxes: bool = False, - **kwargs - ): - """ - NOTE: this interface is experimental. - - Args: - box_in_features (list[str]): list of feature names to use for the box head. - box_pooler (ROIPooler): pooler to extra region features for box head - box_head (nn.Module): transform features to make box predictions - box_predictor (nn.Module): make box predictions from the feature. - Should have the same interface as :class:`FastRCNNOutputLayers`. - mask_in_features (list[str]): list of feature names to use for the mask head. - None if not using mask head. - mask_pooler (ROIPooler): pooler to extra region features for mask head - mask_head (nn.Module): transform features to make mask predictions - keypoint_in_features, keypoint_pooler, keypoint_head: similar to ``mask*``. - train_on_pred_boxes (bool): whether to use proposal boxes or - predicted boxes from the box head to train other heads. - """ - super().__init__(**kwargs) - # keep self.in_features for backward compatibility - self.in_features = self.box_in_features = box_in_features - self.box_pooler = box_pooler - self.box_head = box_head - self.box_predictor = box_predictor - - self.mask_on = mask_in_features is not None - if self.mask_on: - self.mask_in_features = mask_in_features - self.mask_pooler = mask_pooler - self.mask_head = mask_head - self.keypoint_on = keypoint_in_features is not None - if self.keypoint_on: - self.keypoint_in_features = keypoint_in_features - self.keypoint_pooler = keypoint_pooler - self.keypoint_head = keypoint_head - - self.train_on_pred_boxes = train_on_pred_boxes - - @classmethod - def from_config(cls, cfg, input_shape): - ret = super().from_config(cfg) - ret["train_on_pred_boxes"] = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES - # Subclasses that have not been updated to use from_config style construction - # may have overridden _init_*_head methods. In this case, those overridden methods - # will not be classmethods and we need to avoid trying to call them here. - # We test for this with ismethod which only returns True for bound methods of cls. - # Such subclasses will need to handle calling their overridden _init_*_head methods. - if inspect.ismethod(cls._init_box_head): - ret.update(cls._init_box_head(cfg, input_shape)) - if inspect.ismethod(cls._init_mask_head): - ret.update(cls._init_mask_head(cfg, input_shape)) - if inspect.ismethod(cls._init_keypoint_head): - ret.update(cls._init_keypoint_head(cfg, input_shape)) - return ret - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - # fmt: on - - # If StandardROIHeads is applied on multiple feature maps (as in FPN), - # then we share the same predictors and therefore the channel counts must be the same - in_channels = [input_shape[f].channels for f in in_features] - # Check all channel counts are equal - assert len(set(in_channels)) == 1, in_channels - in_channels = in_channels[0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - # Here we split "box head" and "box predictor", which is mainly due to historical reasons. - # They are used together so the "box predictor" layers should be part of the "box head". - # New subclasses of ROIHeads do not need "box predictor"s. - box_head = build_box_head( - cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) - ) - box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_head": box_head, - "box_predictor": box_predictor, - } - - @classmethod - def _init_mask_head(cls, cfg, input_shape): - if not cfg.MODEL.MASK_ON: - return {} - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features][0] - - ret = {"mask_in_features": in_features} - ret["mask_pooler"] = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - ret["mask_head"] = build_mask_head( - cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) - ) - return ret - - @classmethod - def _init_keypoint_head(cls, cfg, input_shape): - if not cfg.MODEL.KEYPOINT_ON: - return {} - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa - sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE - # fmt: on - - in_channels = [input_shape[f].channels for f in in_features][0] - - ret = {"keypoint_in_features": in_features} - ret["keypoint_pooler"] = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - ret["keypoint_head"] = build_keypoint_head( - cfg, ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) - ) - return ret - - def forward( - self, - images: ImageList, - features: Dict[str, torch.Tensor], - proposals: List[Instances], - targets: Optional[List[Instances]] = None, - ) -> Tuple[List[Instances], Dict[str, torch.Tensor]]: - """ - See :class:`ROIHeads.forward`. - """ - del images - if self.training: - assert targets - proposals = self.label_and_sample_proposals(proposals, targets) - del targets - - if self.training: - losses = self._forward_box(features, proposals) - # Usually the original proposals used by the box head are used by the mask, keypoint - # heads. But when `self.train_on_pred_boxes is True`, proposals will contain boxes - # predicted by the box head. - losses.update(self._forward_mask(features, proposals)) - losses.update(self._forward_keypoint(features, proposals)) - return proposals, losses - else: - pred_instances = self._forward_box(features, proposals) - # During inference cascaded prediction is used: the mask and keypoints heads are only - # applied to the top scoring box detections. - pred_instances = self.forward_with_given_boxes(features, pred_instances) - return pred_instances, {} - - def forward_with_given_boxes( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ) -> List[Instances]: - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - This is useful for downstream tasks where a box is known, but need to obtain - other attributes (outputs of other heads). - Test-time augmentation also uses this. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - instances (list[Instances]): - the same `Instances` objects, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - assert not self.training - assert instances[0].has("pred_boxes") and instances[0].has("pred_classes") - - instances = self._forward_mask(features, instances) - instances = self._forward_keypoint(features, instances) - return instances - - def _forward_box( - self, features: Dict[str, torch.Tensor], proposals: List[Instances] - ) -> Union[Dict[str, torch.Tensor], List[Instances]]: - """ - Forward logic of the box prediction branch. If `self.train_on_pred_boxes is True`, - the function puts predicted boxes in the `proposal_boxes` field of `proposals` argument. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - proposals (list[Instances]): the per-image object proposals with - their matching ground truth. - Each has fields "proposal_boxes", and "objectness_logits", - "gt_classes", "gt_boxes". - - Returns: - In training, a dict of losses. - In inference, a list of `Instances`, the predicted instances. - """ - features = [features[f] for f in self.box_in_features] - box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals]) - box_features = self.box_head(box_features) - predictions = self.box_predictor(box_features) - del box_features - - if self.training: - losses = self.box_predictor.losses(predictions, proposals) - # proposals is modified in-place below, so losses must be computed first. - if self.train_on_pred_boxes: - with torch.no_grad(): - pred_boxes = self.box_predictor.predict_boxes_for_gt_classes( - predictions, proposals - ) - for proposals_per_image, pred_boxes_per_image in zip(proposals, pred_boxes): - proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image) - return losses - else: - pred_instances, _ = self.box_predictor.inference(predictions, proposals) - return pred_instances - - def _forward_mask( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ) -> Union[Dict[str, torch.Tensor], List[Instances]]: - """ - Forward logic of the mask prediction branch. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - instances (list[Instances]): the per-image instances to train/predict masks. - In training, they can be the proposals. - In inference, they can be the predicted boxes. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "pred_masks" and return it. - """ - if not self.mask_on: - return {} if self.training else instances - - features = [features[f] for f in self.mask_in_features] - - if self.training: - # The loss is only defined on positive proposals. - proposals, _ = select_foreground_proposals(instances, self.num_classes) - proposal_boxes = [x.proposal_boxes for x in proposals] - mask_features = self.mask_pooler(features, proposal_boxes) - return self.mask_head(mask_features, proposals) - else: - pred_boxes = [x.pred_boxes for x in instances] - mask_features = self.mask_pooler(features, pred_boxes) - return self.mask_head(mask_features, instances) - - def _forward_keypoint( - self, features: Dict[str, torch.Tensor], instances: List[Instances] - ) -> Union[Dict[str, torch.Tensor], List[Instances]]: - """ - Forward logic of the keypoint prediction branch. - - Args: - features (dict[str, Tensor]): mapping from feature map names to tensor. - Same as in :meth:`ROIHeads.forward`. - instances (list[Instances]): the per-image instances to train/predict keypoints. - In training, they can be the proposals. - In inference, they can be the predicted boxes. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "pred_keypoints" and return it. - """ - if not self.keypoint_on: - return {} if self.training else instances - - features = [features[f] for f in self.keypoint_in_features] - - if self.training: - # The loss is defined on positive proposals with >=1 visible keypoints. - proposals, _ = select_foreground_proposals(instances, self.num_classes) - proposals = select_proposals_with_visible_keypoints(proposals) - proposal_boxes = [x.proposal_boxes for x in proposals] - - keypoint_features = self.keypoint_pooler(features, proposal_boxes) - return self.keypoint_head(keypoint_features, proposals) - else: - pred_boxes = [x.pred_boxes for x in instances] - keypoint_features = self.keypoint_pooler(features, pred_boxes) - return self.keypoint_head(keypoint_features, instances) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py deleted file mode 100644 index 3d7362d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/roi_heads/rotated_fast_rcnn.py +++ /dev/null @@ -1,276 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import numpy as np -import torch - -from detectron2.config import configurable -from detectron2.layers import ShapeSpec, batched_nms_rotated -from detectron2.structures import Instances, RotatedBoxes, pairwise_iou_rotated -from detectron2.utils.events import get_event_storage - -from ..box_regression import Box2BoxTransformRotated -from ..poolers import ROIPooler -from ..proposal_generator.proposal_utils import add_ground_truth_to_proposals -from .box_head import build_box_head -from .fast_rcnn import FastRCNNOutputLayers -from .roi_heads import ROI_HEADS_REGISTRY, StandardROIHeads - -logger = logging.getLogger(__name__) - -""" -Shape shorthand in this module: - - N: number of images in the minibatch - R: number of ROIs, combined over all images, in the minibatch - Ri: number of ROIs in image i - K: number of foreground classes. E.g.,there are 80 foreground classes in COCO. - -Naming convention: - - deltas: refers to the 5-d (dx, dy, dw, dh, da) deltas that parameterize the box2box - transform (see :class:`box_regression.Box2BoxTransformRotated`). - - pred_class_logits: predicted class scores in [-inf, +inf]; use - softmax(pred_class_logits) to estimate P(class). - - gt_classes: ground-truth classification labels in [0, K], where [0, K) represent - foreground object classes and K represents the background class. - - pred_proposal_deltas: predicted rotated box2box transform deltas for transforming proposals - to detection box predictions. - - gt_proposal_deltas: ground-truth rotated box2box transform deltas -""" - - -def fast_rcnn_inference_rotated( - boxes, scores, image_shapes, score_thresh, nms_thresh, topk_per_image -): - """ - Call `fast_rcnn_inference_single_image_rotated` for all images. - - Args: - boxes (list[Tensor]): A list of Tensors of predicted class-specific or class-agnostic - boxes for each image. Element i has shape (Ri, K * 5) if doing - class-specific regression, or (Ri, 5) if doing class-agnostic - regression, where Ri is the number of predicted objects for image i. - This is compatible with the output of :meth:`FastRCNNOutputs.predict_boxes`. - scores (list[Tensor]): A list of Tensors of predicted class scores for each image. - Element i has shape (Ri, K + 1), where Ri is the number of predicted objects - for image i. Compatible with the output of :meth:`FastRCNNOutputs.predict_probs`. - image_shapes (list[tuple]): A list of (width, height) tuples for each image in the batch. - score_thresh (float): Only return detections with a confidence score exceeding this - threshold. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - instances: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections. - kept_indices: (list[Tensor]): A list of 1D tensor of length of N, each element indicates - the corresponding boxes/scores index in [0, Ri) from the input, for image i. - """ - result_per_image = [ - fast_rcnn_inference_single_image_rotated( - boxes_per_image, scores_per_image, image_shape, score_thresh, nms_thresh, topk_per_image - ) - for scores_per_image, boxes_per_image, image_shape in zip(scores, boxes, image_shapes) - ] - return [x[0] for x in result_per_image], [x[1] for x in result_per_image] - - -def fast_rcnn_inference_single_image_rotated( - boxes, scores, image_shape, score_thresh, nms_thresh, topk_per_image -): - """ - Single-image inference. Return rotated bounding-box detection results by thresholding - on scores and applying rotated non-maximum suppression (Rotated NMS). - - Args: - Same as `fast_rcnn_inference_rotated`, but with rotated boxes, scores, and image shapes - per image. - - Returns: - Same as `fast_rcnn_inference_rotated`, but for only one image. - """ - valid_mask = torch.isfinite(boxes).all(dim=1) & torch.isfinite(scores).all(dim=1) - if not valid_mask.all(): - boxes = boxes[valid_mask] - scores = scores[valid_mask] - - B = 5 # box dimension - scores = scores[:, :-1] - num_bbox_reg_classes = boxes.shape[1] // B - # Convert to Boxes to use the `clip` function ... - boxes = RotatedBoxes(boxes.reshape(-1, B)) - boxes.clip(image_shape) - boxes = boxes.tensor.view(-1, num_bbox_reg_classes, B) # R x C x B - # Filter results based on detection scores - filter_mask = scores > score_thresh # R x K - # R' x 2. First column contains indices of the R predictions; - # Second column contains indices of classes. - filter_inds = filter_mask.nonzero() - if num_bbox_reg_classes == 1: - boxes = boxes[filter_inds[:, 0], 0] - else: - boxes = boxes[filter_mask] - scores = scores[filter_mask] - - # Apply per-class Rotated NMS - keep = batched_nms_rotated(boxes, scores, filter_inds[:, 1], nms_thresh) - if topk_per_image >= 0: - keep = keep[:topk_per_image] - boxes, scores, filter_inds = boxes[keep], scores[keep], filter_inds[keep] - - result = Instances(image_shape) - result.pred_boxes = RotatedBoxes(boxes) - result.scores = scores - result.pred_classes = filter_inds[:, 1] - - return result, filter_inds[:, 0] - - -class RotatedFastRCNNOutputLayers(FastRCNNOutputLayers): - """ - Two linear layers for predicting Rotated Fast R-CNN outputs. - """ - - @classmethod - def from_config(cls, cfg, input_shape): - args = super().from_config(cfg, input_shape) - args["box2box_transform"] = Box2BoxTransformRotated( - weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS - ) - return args - - def inference(self, predictions, proposals): - """ - Returns: - list[Instances]: same as `fast_rcnn_inference_rotated`. - list[Tensor]: same as `fast_rcnn_inference_rotated`. - """ - boxes = self.predict_boxes(predictions, proposals) - scores = self.predict_probs(predictions, proposals) - image_shapes = [x.image_size for x in proposals] - - return fast_rcnn_inference_rotated( - boxes, - scores, - image_shapes, - self.test_score_thresh, - self.test_nms_thresh, - self.test_topk_per_image, - ) - - -@ROI_HEADS_REGISTRY.register() -class RROIHeads(StandardROIHeads): - """ - This class is used by Rotated Fast R-CNN to detect rotated boxes. - For now, it only supports box predictions but not mask or keypoints. - """ - - @configurable - def __init__(self, **kwargs): - """ - NOTE: this interface is experimental. - """ - super().__init__(**kwargs) - assert ( - not self.mask_on and not self.keypoint_on - ), "Mask/Keypoints not supported in Rotated ROIHeads." - assert not self.train_on_pred_boxes, "train_on_pred_boxes not implemented for RROIHeads!" - - @classmethod - def _init_box_head(cls, cfg, input_shape): - # fmt: off - in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES - pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION - pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) - sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO - pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE - # fmt: on - assert pooler_type in ["ROIAlignRotated"], pooler_type - # assume all channel counts are equal - in_channels = [input_shape[f].channels for f in in_features][0] - - box_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type=pooler_type, - ) - box_head = build_box_head( - cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution) - ) - # This line is the only difference v.s. StandardROIHeads - box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) - return { - "box_in_features": in_features, - "box_pooler": box_pooler, - "box_head": box_head, - "box_predictor": box_predictor, - } - - @torch.no_grad() - def label_and_sample_proposals(self, proposals, targets): - """ - Prepare some proposals to be used to train the RROI heads. - It performs box matching between `proposals` and `targets`, and assigns - training labels to the proposals. - It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, - with a fraction of positives that is no larger than `self.positive_sample_fraction. - - Args: - See :meth:`StandardROIHeads.forward` - - Returns: - list[Instances]: length `N` list of `Instances`s containing the proposals - sampled for training. Each `Instances` has the following fields: - - proposal_boxes: the rotated proposal boxes - - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to - (this is only meaningful if the proposal has a label > 0; if label = 0 - then the ground-truth box is random) - - gt_classes: the ground-truth classification lable for each proposal - """ - gt_boxes = [x.gt_boxes for x in targets] - if self.proposal_append_gt: - proposals = add_ground_truth_to_proposals(gt_boxes, proposals) - - proposals_with_gt = [] - - num_fg_samples = [] - num_bg_samples = [] - for proposals_per_image, targets_per_image in zip(proposals, targets): - has_gt = len(targets_per_image) > 0 - match_quality_matrix = pairwise_iou_rotated( - targets_per_image.gt_boxes, proposals_per_image.proposal_boxes - ) - matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) - sampled_idxs, gt_classes = self._sample_proposals( - matched_idxs, matched_labels, targets_per_image.gt_classes - ) - - proposals_per_image = proposals_per_image[sampled_idxs] - proposals_per_image.gt_classes = gt_classes - - if has_gt: - sampled_targets = matched_idxs[sampled_idxs] - proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] - else: - gt_boxes = RotatedBoxes( - targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5)) - ) - proposals_per_image.gt_boxes = gt_boxes - - num_bg_samples.append((gt_classes == self.num_classes).sum().item()) - num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) - proposals_with_gt.append(proposals_per_image) - - # Log the number of fg/bg samples that are selected for training ROI heads - storage = get_event_storage() - storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) - storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) - - return proposals_with_gt diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py deleted file mode 100644 index ecf251a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/sampling.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch - -__all__ = ["subsample_labels"] - - -def subsample_labels(labels, num_samples, positive_fraction, bg_label): - """ - Return `num_samples` (or fewer, if not enough found) - random samples from `labels` which is a mixture of positives & negatives. - It will try to return as many positives as possible without - exceeding `positive_fraction * num_samples`, and then try to - fill the remaining slots with negatives. - - Args: - labels (Tensor): (N, ) label vector with values: - * -1: ignore - * bg_label: background ("negative") class - * otherwise: one or more foreground ("positive") classes - num_samples (int): The total number of labels with value >= 0 to return. - Values that are not sampled will be filled with -1 (ignore). - positive_fraction (float): The number of subsampled labels with values > 0 - is `min(num_positives, int(positive_fraction * num_samples))`. The number - of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. - In order words, if there are not enough positives, the sample is filled with - negatives. If there are also not enough negatives, then as many elements are - sampled as is possible. - bg_label (int): label index of background ("negative") class. - - Returns: - pos_idx, neg_idx (Tensor): - 1D vector of indices. The total length of both is `num_samples` or fewer. - """ - positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=True)[0] - negative = torch.nonzero(labels == bg_label, as_tuple=True)[0] - - num_pos = int(num_samples * positive_fraction) - # protect against not enough positive examples - num_pos = min(positive.numel(), num_pos) - num_neg = num_samples - num_pos - # protect against not enough negative examples - num_neg = min(negative.numel(), num_neg) - - # randomly select positive and negative examples - perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] - perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] - - pos_idx = positive[perm1] - neg_idx = negative[perm2] - return pos_idx, neg_idx diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py deleted file mode 100644 index 1e5bcf0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/modeling/test_time_augmentation.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import numpy as np -from contextlib import contextmanager -from itertools import count -import torch -from torch import nn -from torch.nn.parallel import DistributedDataParallel - -from detectron2.data.detection_utils import read_image -from detectron2.data.transforms import ResizeShortestEdge -from detectron2.structures import Instances - -from .meta_arch import GeneralizedRCNN -from .postprocessing import detector_postprocess -from .roi_heads.fast_rcnn import fast_rcnn_inference_single_image - -__all__ = ["DatasetMapperTTA", "GeneralizedRCNNWithTTA"] - - -class DatasetMapperTTA: - """ - Implement test-time augmentation for detection data. - It is a callable which takes a dataset dict from a detection dataset, - and returns a list of dataset dicts where the images - are augmented from the input image by the transformations defined in the config. - This is used for test-time augmentation. - """ - - def __init__(self, cfg): - self.min_sizes = cfg.TEST.AUG.MIN_SIZES - self.max_size = cfg.TEST.AUG.MAX_SIZE - self.flip = cfg.TEST.AUG.FLIP - self.image_format = cfg.INPUT.FORMAT - - def __call__(self, dataset_dict): - """ - Args: - dict: a detection dataset dict - - Returns: - list[dict]: - a list of dataset dicts, which contain augmented version of the input image. - The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. - """ - ret = [] - if "image" not in dataset_dict: - numpy_image = read_image(dataset_dict["file_name"], self.image_format) - else: - numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy().astype("uint8") - for min_size in self.min_sizes: - image = np.copy(numpy_image) - tfm = ResizeShortestEdge(min_size, self.max_size).get_transform(image) - resized = tfm.apply_image(image) - resized = torch.as_tensor(resized.transpose(2, 0, 1).astype("float32")) - - dic = copy.deepcopy(dataset_dict) - dic["horiz_flip"] = False - dic["image"] = resized - ret.append(dic) - - if self.flip: - dic = copy.deepcopy(dataset_dict) - dic["horiz_flip"] = True - dic["image"] = torch.flip(resized, dims=[2]) - ret.append(dic) - return ret - - -class GeneralizedRCNNWithTTA(nn.Module): - """ - A GeneralizedRCNN with test-time augmentation enabled. - Its :meth:`__call__` method has the same interface as :meth:`GeneralizedRCNN.forward`. - """ - - def __init__(self, cfg, model, tta_mapper=None, batch_size=3): - """ - Args: - cfg (CfgNode): - model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. - tta_mapper (callable): takes a dataset dict and returns a list of - augmented versions of the dataset dict. Defaults to - `DatasetMapperTTA(cfg)`. - batch_size (int): batch the augmented images into this batch size for inference. - """ - super().__init__() - if isinstance(model, DistributedDataParallel): - model = model.module - assert isinstance( - model, GeneralizedRCNN - ), "TTA is only supported on GeneralizedRCNN. Got a model of type {}".format(type(model)) - self.cfg = cfg.clone() - assert not self.cfg.MODEL.KEYPOINT_ON, "TTA for keypoint is not supported yet" - assert ( - not self.cfg.MODEL.LOAD_PROPOSALS - ), "TTA for pre-computed proposals is not supported yet" - - self.model = model - - if tta_mapper is None: - tta_mapper = DatasetMapperTTA(cfg) - self.tta_mapper = tta_mapper - self.batch_size = batch_size - - @contextmanager - def _turn_off_roi_heads(self, attrs): - """ - Open a context where some heads in `model.roi_heads` are temporarily turned off. - Args: - attr (list[str]): the attribute in `model.roi_heads` which can be used - to turn off a specific head, e.g., "mask_on", "keypoint_on". - """ - roi_heads = self.model.roi_heads - old = {} - for attr in attrs: - try: - old[attr] = getattr(roi_heads, attr) - except AttributeError: - # The head may not be implemented in certain ROIHeads - pass - - if len(old.keys()) == 0: - yield - else: - for attr in old.keys(): - setattr(roi_heads, attr, False) - yield - for attr in old.keys(): - setattr(roi_heads, attr, old[attr]) - - def _batch_inference(self, batched_inputs, detected_instances=None, do_postprocess=True): - """ - Execute inference on a list of inputs, - using batch size = self.batch_size, instead of the length of the list. - - Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference` - """ - if detected_instances is None: - detected_instances = [None] * len(batched_inputs) - - outputs = [] - inputs, instances = [], [] - for idx, input, instance in zip(count(), batched_inputs, detected_instances): - inputs.append(input) - instances.append(instance) - if len(inputs) == self.batch_size or idx == len(batched_inputs) - 1: - outputs.extend( - self.model.inference( - inputs, - instances if instances[0] is not None else None, - do_postprocess=do_postprocess, - ) - ) - inputs, instances = [], [] - return outputs - - def __call__(self, batched_inputs): - """ - Same input/output format as :meth:`GeneralizedRCNN.forward` - """ - return [self._inference_one_image(x) for x in batched_inputs] - - def _detector_postprocess(self, outputs, aug_vars): - return detector_postprocess(outputs, aug_vars["height"], aug_vars["width"]) - - def _inference_one_image(self, input): - """ - Args: - input (dict): one dataset dict - - Returns: - dict: one output dict - """ - - augmented_inputs, aug_vars = self._get_augmented_inputs(input) - # Detect boxes from all augmented versions - with self._turn_off_roi_heads(["mask_on", "keypoint_on"]): - # temporarily disable roi heads - all_boxes, all_scores, all_classes = self._get_augmented_boxes( - augmented_inputs, aug_vars - ) - merged_instances = self._merge_detections( - all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"]) - ) - - if self.cfg.MODEL.MASK_ON: - # Use the detected boxes to obtain new fields - augmented_instances = self._rescale_detected_boxes( - augmented_inputs, merged_instances, aug_vars - ) - # run forward on the detected boxes - outputs = self._batch_inference( - augmented_inputs, augmented_instances, do_postprocess=False - ) - # Delete now useless variables to avoid being out of memory - del augmented_inputs, augmented_instances, merged_instances - # average the predictions - outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars) - # postprocess - output = self._detector_postprocess(outputs[0], aug_vars) - return {"instances": output} - else: - return {"instances": merged_instances} - - def _get_augmented_inputs(self, input): - augmented_inputs = self.tta_mapper(input) - - do_hflip = [k.pop("horiz_flip", False) for k in augmented_inputs] - heights = [k["height"] for k in augmented_inputs] - widths = [k["width"] for k in augmented_inputs] - assert ( - len(set(heights)) == 1 and len(set(widths)) == 1 - ), "Augmented version of the inputs should have the same original resolution!" - height = heights[0] - width = widths[0] - aug_vars = {"height": height, "width": width, "do_hflip": do_hflip} - - return augmented_inputs, aug_vars - - def _get_augmented_boxes(self, augmented_inputs, aug_vars): - # 1: forward with all augmented images - outputs = self._batch_inference(augmented_inputs, do_postprocess=False) - # 2: union the results - all_boxes = [] - all_scores = [] - all_classes = [] - for idx, output in enumerate(outputs): - rescaled_output = self._detector_postprocess(output, aug_vars) - pred_boxes = rescaled_output.pred_boxes.tensor - if aug_vars["do_hflip"][idx]: - pred_boxes[:, [0, 2]] = aug_vars["width"] - pred_boxes[:, [2, 0]] - all_boxes.append(pred_boxes) - all_scores.extend(rescaled_output.scores) - all_classes.extend(rescaled_output.pred_classes) - all_boxes = torch.cat(all_boxes, dim=0).cpu() - return all_boxes, all_scores, all_classes - - def _merge_detections(self, all_boxes, all_scores, all_classes, shape_hw): - # select from the union of all results - num_boxes = len(all_boxes) - num_classes = self.cfg.MODEL.ROI_HEADS.NUM_CLASSES - # +1 because fast_rcnn_inference expects background scores as well - all_scores_2d = torch.zeros(num_boxes, num_classes + 1, device=all_boxes.device) - for idx, cls, score in zip(count(), all_classes, all_scores): - all_scores_2d[idx, cls] = score - - merged_instances, _ = fast_rcnn_inference_single_image( - all_boxes, - all_scores_2d, - shape_hw, - 1e-8, - self.cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, - self.cfg.TEST.DETECTIONS_PER_IMAGE, - ) - - return merged_instances - - def _rescale_detected_boxes(self, augmented_inputs, merged_instances, aug_vars): - augmented_instances = [] - for idx, input in enumerate(augmented_inputs): - actual_height, actual_width = input["image"].shape[1:3] - scale_x = actual_width * 1.0 / aug_vars["width"] - scale_y = actual_height * 1.0 / aug_vars["height"] - pred_boxes = merged_instances.pred_boxes.clone() - pred_boxes.tensor[:, 0::2] *= scale_x - pred_boxes.tensor[:, 1::2] *= scale_y - if aug_vars["do_hflip"][idx]: - pred_boxes.tensor[:, [0, 2]] = actual_width - pred_boxes.tensor[:, [2, 0]] - - aug_instances = Instances( - image_size=(actual_height, actual_width), - pred_boxes=pred_boxes, - pred_classes=merged_instances.pred_classes, - scores=merged_instances.scores, - ) - augmented_instances.append(aug_instances) - return augmented_instances - - def _reduce_pred_masks(self, outputs, aug_vars): - for idx, output in enumerate(outputs): - if aug_vars["do_hflip"][idx]: - output.pred_masks = output.pred_masks.flip(dims=[3]) - all_pred_masks = torch.stack([o.pred_masks for o in outputs], dim=0) - avg_pred_masks = torch.mean(all_pred_masks, dim=0) - return avg_pred_masks diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py deleted file mode 100644 index 10f84e1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .build import build_lr_scheduler, build_optimizer -from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py deleted file mode 100644 index 6d9d0ee..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/build.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from enum import Enum -from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union -import torch - -from detectron2.config import CfgNode - -from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR - -_GradientClipperInput = Union[torch.Tensor, Iterable[torch.Tensor]] -_GradientClipper = Callable[[_GradientClipperInput], None] - - -class GradientClipType(Enum): - VALUE = "value" - NORM = "norm" - - -def _create_gradient_clipper(cfg: CfgNode) -> _GradientClipper: - """ - Creates gradient clipping closure to clip by value or by norm, - according to the provided config. - """ - cfg = cfg.clone() - - def clip_grad_norm(p: _GradientClipperInput): - torch.nn.utils.clip_grad_norm_(p, cfg.CLIP_VALUE, cfg.NORM_TYPE) - - def clip_grad_value(p: _GradientClipperInput): - torch.nn.utils.clip_grad_value_(p, cfg.CLIP_VALUE) - - _GRADIENT_CLIP_TYPE_TO_CLIPPER = { - GradientClipType.VALUE: clip_grad_value, - GradientClipType.NORM: clip_grad_norm, - } - return _GRADIENT_CLIP_TYPE_TO_CLIPPER[GradientClipType(cfg.CLIP_TYPE)] - - -def _generate_optimizer_class_with_gradient_clipping( - optimizer_type: Type[torch.optim.Optimizer], gradient_clipper: _GradientClipper -) -> Type[torch.optim.Optimizer]: - """ - Dynamically creates a new type that inherits the type of a given instance - and overrides the `step` method to add gradient clipping - """ - - def optimizer_wgc_step(self, closure=None): - for group in self.param_groups: - for p in group["params"]: - gradient_clipper(p) - super(type(self), self).step(closure) - - OptimizerWithGradientClip = type( - optimizer_type.__name__ + "WithGradientClip", - (optimizer_type,), - {"step": optimizer_wgc_step}, - ) - return OptimizerWithGradientClip - - -def maybe_add_gradient_clipping( - cfg: CfgNode, optimizer: torch.optim.Optimizer -) -> torch.optim.Optimizer: - """ - If gradient clipping is enabled through config options, wraps the existing - optimizer instance of some type OptimizerType to become an instance - of the new dynamically created class OptimizerTypeWithGradientClip - that inherits OptimizerType and overrides the `step` method to - include gradient clipping. - - Args: - cfg: CfgNode - configuration options - optimizer: torch.optim.Optimizer - existing optimizer instance - - Return: - optimizer: torch.optim.Optimizer - either the unmodified optimizer instance (if gradient clipping is - disabled), or the same instance with adjusted __class__ to override - the `step` method and include gradient clipping - """ - if not cfg.SOLVER.CLIP_GRADIENTS.ENABLED: - return optimizer - grad_clipper = _create_gradient_clipper(cfg.SOLVER.CLIP_GRADIENTS) - OptimizerWithGradientClip = _generate_optimizer_class_with_gradient_clipping( - type(optimizer), grad_clipper - ) - optimizer.__class__ = OptimizerWithGradientClip - return optimizer - - -def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: - """ - Build an optimizer from config. - """ - norm_module_types = ( - torch.nn.BatchNorm1d, - torch.nn.BatchNorm2d, - torch.nn.BatchNorm3d, - torch.nn.SyncBatchNorm, - # NaiveSyncBatchNorm inherits from BatchNorm2d - torch.nn.GroupNorm, - torch.nn.InstanceNorm1d, - torch.nn.InstanceNorm2d, - torch.nn.InstanceNorm3d, - torch.nn.LayerNorm, - torch.nn.LocalResponseNorm, - ) - params: List[Dict[str, Any]] = [] - memo: Set[torch.nn.parameter.Parameter] = set() - for module in model.modules(): - for key, value in module.named_parameters(recurse=False): - if not value.requires_grad: - continue - # Avoid duplicating parameters - if value in memo: - continue - memo.add(value) - lr = cfg.SOLVER.BASE_LR - weight_decay = cfg.SOLVER.WEIGHT_DECAY - if isinstance(module, norm_module_types): - weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM - elif key == "bias": - # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 - # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer - # hyperparameters are by default exactly the same as for regular - # weights. - lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR - weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS - params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] - - optimizer = torch.optim.SGD( - params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV - ) - optimizer = maybe_add_gradient_clipping(cfg, optimizer) - return optimizer - - -def build_lr_scheduler( - cfg: CfgNode, optimizer: torch.optim.Optimizer -) -> torch.optim.lr_scheduler._LRScheduler: - """ - Build a LR scheduler from config. - """ - name = cfg.SOLVER.LR_SCHEDULER_NAME - if name == "WarmupMultiStepLR": - return WarmupMultiStepLR( - optimizer, - cfg.SOLVER.STEPS, - cfg.SOLVER.GAMMA, - warmup_factor=cfg.SOLVER.WARMUP_FACTOR, - warmup_iters=cfg.SOLVER.WARMUP_ITERS, - warmup_method=cfg.SOLVER.WARMUP_METHOD, - ) - elif name == "WarmupCosineLR": - return WarmupCosineLR( - optimizer, - cfg.SOLVER.MAX_ITER, - warmup_factor=cfg.SOLVER.WARMUP_FACTOR, - warmup_iters=cfg.SOLVER.WARMUP_ITERS, - warmup_method=cfg.SOLVER.WARMUP_METHOD, - ) - else: - raise ValueError("Unknown LR scheduler: {}".format(name)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py deleted file mode 100644 index 6148d86..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/solver/lr_scheduler.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from bisect import bisect_right -from typing import List -import torch - -# NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes -# only on epoch boundaries. We typically use iteration based schedules instead. -# As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean -# "iteration" instead. - -# FIXME: ideally this would be achieved with a CombinedLRScheduler, separating -# MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. - - -class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): - def __init__( - self, - optimizer: torch.optim.Optimizer, - milestones: List[int], - gamma: float = 0.1, - warmup_factor: float = 0.001, - warmup_iters: int = 1000, - warmup_method: str = "linear", - last_epoch: int = -1, - ): - if not list(milestones) == sorted(milestones): - raise ValueError( - "Milestones should be a list of" " increasing integers. Got {}", milestones - ) - self.milestones = milestones - self.gamma = gamma - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - super().__init__(optimizer, last_epoch) - - def get_lr(self) -> List[float]: - warmup_factor = _get_warmup_factor_at_iter( - self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor - ) - return [ - base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) - for base_lr in self.base_lrs - ] - - def _compute_values(self) -> List[float]: - # The new interface - return self.get_lr() - - -class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): - def __init__( - self, - optimizer: torch.optim.Optimizer, - max_iters: int, - warmup_factor: float = 0.001, - warmup_iters: int = 1000, - warmup_method: str = "linear", - last_epoch: int = -1, - ): - self.max_iters = max_iters - self.warmup_factor = warmup_factor - self.warmup_iters = warmup_iters - self.warmup_method = warmup_method - super().__init__(optimizer, last_epoch) - - def get_lr(self) -> List[float]: - warmup_factor = _get_warmup_factor_at_iter( - self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor - ) - # Different definitions of half-cosine with warmup are possible. For - # simplicity we multiply the standard half-cosine schedule by the warmup - # factor. An alternative is to start the period of the cosine at warmup_iters - # instead of at 0. In the case that warmup_iters << max_iters the two are - # very close to each other. - return [ - base_lr - * warmup_factor - * 0.5 - * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) - for base_lr in self.base_lrs - ] - - def _compute_values(self) -> List[float]: - # The new interface - return self.get_lr() - - -def _get_warmup_factor_at_iter( - method: str, iter: int, warmup_iters: int, warmup_factor: float -) -> float: - """ - Return the learning rate warmup factor at a specific iteration. - See :paper:`in1k1h` for more details. - - Args: - method (str): warmup method; either "constant" or "linear". - iter (int): iteration at which to calculate the warmup factor. - warmup_iters (int): the number of warmup iterations. - warmup_factor (float): the base warmup factor (the meaning changes according - to the method used). - - Returns: - float: the effective warmup factor at the given iteration. - """ - if iter >= warmup_iters: - return 1.0 - - if method == "constant": - return warmup_factor - elif method == "linear": - alpha = iter / warmup_iters - return warmup_factor * (1 - alpha) + alpha - else: - raise ValueError("Unknown warmup method: {}".format(method)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py deleted file mode 100644 index 618f526..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .boxes import Boxes, BoxMode, pairwise_iou -from .image_list import ImageList - -from .instances import Instances -from .keypoints import Keypoints, heatmaps_to_keypoints -from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box, polygons_to_bitmask -from .rotated_boxes import RotatedBoxes -from .rotated_boxes import pairwise_iou as pairwise_iou_rotated - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py deleted file mode 100644 index e625803..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/boxes.py +++ /dev/null @@ -1,367 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -import numpy as np -from enum import IntEnum, unique -from typing import Iterator, List, Tuple, Union -import torch - -_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray] - - -@unique -class BoxMode(IntEnum): - """ - Enum of different ways to represent a box. - """ - - XYXY_ABS = 0 - """ - (x0, y0, x1, y1) in absolute floating points coordinates. - The coordinates in range [0, width or height]. - """ - XYWH_ABS = 1 - """ - (x0, y0, w, h) in absolute floating points coordinates. - """ - XYXY_REL = 2 - """ - Not yet supported! - (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image. - """ - XYWH_REL = 3 - """ - Not yet supported! - (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image. - """ - XYWHA_ABS = 4 - """ - (xc, yc, w, h, a) in absolute floating points coordinates. - (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw. - """ - - @staticmethod - def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType: - """ - Args: - box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5 - from_mode, to_mode (BoxMode) - - Returns: - The converted box of the same type. - """ - if from_mode == to_mode: - return box - - original_type = type(box) - is_numpy = isinstance(box, np.ndarray) - single_box = isinstance(box, (list, tuple)) - if single_box: - assert len(box) == 4 or len(box) == 5, ( - "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor," - " where k == 4 or 5" - ) - arr = torch.tensor(box)[None, :] - else: - # avoid modifying the input box - if is_numpy: - arr = torch.from_numpy(np.asarray(box)).clone() - else: - arr = box.clone() - - assert to_mode.value not in [ - BoxMode.XYXY_REL, - BoxMode.XYWH_REL, - ] and from_mode.value not in [ - BoxMode.XYXY_REL, - BoxMode.XYWH_REL, - ], "Relative mode not yet supported!" - - if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS: - assert ( - arr.shape[-1] == 5 - ), "The last dimension of input shape must be 5 for XYWHA format" - original_dtype = arr.dtype - arr = arr.double() - - w = arr[:, 2] - h = arr[:, 3] - a = arr[:, 4] - c = torch.abs(torch.cos(a * math.pi / 180.0)) - s = torch.abs(torch.sin(a * math.pi / 180.0)) - # This basically computes the horizontal bounding rectangle of the rotated box - new_w = c * w + s * h - new_h = c * h + s * w - - # convert center to top-left corner - arr[:, 0] -= new_w / 2.0 - arr[:, 1] -= new_h / 2.0 - # bottom-right corner - arr[:, 2] = arr[:, 0] + new_w - arr[:, 3] = arr[:, 1] + new_h - - arr = arr[:, :4].to(dtype=original_dtype) - elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS: - original_dtype = arr.dtype - arr = arr.double() - arr[:, 0] += arr[:, 2] / 2.0 - arr[:, 1] += arr[:, 3] / 2.0 - angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype) - arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype) - else: - if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS: - arr[:, 2] += arr[:, 0] - arr[:, 3] += arr[:, 1] - elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS: - arr[:, 2] -= arr[:, 0] - arr[:, 3] -= arr[:, 1] - else: - raise NotImplementedError( - "Conversion from BoxMode {} to {} is not supported yet".format( - from_mode, to_mode - ) - ) - - if single_box: - return original_type(arr.flatten().tolist()) - if is_numpy: - return arr.numpy() - else: - return arr - - -class Boxes: - """ - This structure stores a list of boxes as a Nx4 torch.Tensor. - It supports some common methods about boxes - (`area`, `clip`, `nonempty`, etc), - and also behaves like a Tensor - (support indexing, `to(device)`, `.device`, and iteration over all boxes) - - Attributes: - tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2). - """ - - BoxSizeType = Union[List[int], Tuple[int, int]] - - def __init__(self, tensor: torch.Tensor): - """ - Args: - tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2). - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) - if tensor.numel() == 0: - # Use reshape, so we don't end up creating a new tensor that does not depend on - # the inputs (and consequently confuses jit) - tensor = tensor.reshape((0, 4)).to(dtype=torch.float32, device=device) - assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size() - - self.tensor = tensor - - def clone(self) -> "Boxes": - """ - Clone the Boxes. - - Returns: - Boxes - """ - return Boxes(self.tensor.clone()) - - def to(self, device: str) -> "Boxes": - return Boxes(self.tensor.to(device)) - - def area(self) -> torch.Tensor: - """ - Computes the area of all the boxes. - - Returns: - torch.Tensor: a vector with areas of each box. - """ - box = self.tensor - area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]) - return area - - def clip(self, box_size: BoxSizeType) -> None: - """ - Clip (in place) the boxes by limiting x coordinates to the range [0, width] - and y coordinates to the range [0, height]. - - Args: - box_size (height, width): The clipping box's size. - """ - assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!" - h, w = box_size - self.tensor[:, 0].clamp_(min=0, max=w) - self.tensor[:, 1].clamp_(min=0, max=h) - self.tensor[:, 2].clamp_(min=0, max=w) - self.tensor[:, 3].clamp_(min=0, max=h) - - def nonempty(self, threshold: float = 0.0) -> torch.Tensor: - """ - Find boxes that are non-empty. - A box is considered empty, if either of its side is no larger than threshold. - - Returns: - Tensor: - a binary vector which represents whether each box is empty - (False) or non-empty (True). - """ - box = self.tensor - widths = box[:, 2] - box[:, 0] - heights = box[:, 3] - box[:, 1] - keep = (widths > threshold) & (heights > threshold) - return keep - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Boxes": - """ - Returns: - Boxes: Create a new :class:`Boxes` by indexing. - - The following usage are allowed: - - 1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box. - 2. `new_boxes = boxes[2:10]`: return a slice of boxes. - 3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor - with `length = len(boxes)`. Nonzero elements in the vector will be selected. - - Note that the returned Boxes might share storage with this Boxes, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return Boxes(self.tensor[item].view(1, -1)) - b = self.tensor[item] - assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item) - return Boxes(b) - - def __len__(self) -> int: - return self.tensor.shape[0] - - def __repr__(self) -> str: - return "Boxes(" + str(self.tensor) + ")" - - def inside_box(self, box_size: BoxSizeType, boundary_threshold: int = 0) -> torch.Tensor: - """ - Args: - box_size (height, width): Size of the reference box. - boundary_threshold (int): Boxes that extend beyond the reference box - boundary by more than boundary_threshold are considered "outside". - - Returns: - a binary vector, indicating whether each box is inside the reference box. - """ - height, width = box_size - inds_inside = ( - (self.tensor[..., 0] >= -boundary_threshold) - & (self.tensor[..., 1] >= -boundary_threshold) - & (self.tensor[..., 2] < width + boundary_threshold) - & (self.tensor[..., 3] < height + boundary_threshold) - ) - return inds_inside - - def get_centers(self) -> torch.Tensor: - """ - Returns: - The box centers in a Nx2 array of (x, y). - """ - return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2 - - def scale(self, scale_x: float, scale_y: float) -> None: - """ - Scale the box with horizontal and vertical scaling factors - """ - self.tensor[:, 0::2] *= scale_x - self.tensor[:, 1::2] *= scale_y - - @classmethod - def cat(cls, boxes_list: List["Boxes"]) -> "Boxes": - """ - Concatenates a list of Boxes into a single Boxes - - Arguments: - boxes_list (list[Boxes]) - - Returns: - Boxes: the concatenated Boxes - """ - assert isinstance(boxes_list, (list, tuple)) - if len(boxes_list) == 0: - return cls(torch.empty(0)) - assert all(isinstance(box, Boxes) for box in boxes_list) - - # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input - cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0)) - return cat_boxes - - @property - def device(self) -> torch.device: - return self.tensor.device - - def __iter__(self) -> Iterator[torch.Tensor]: - """ - Yield a box as a Tensor of shape (4,) at a time. - """ - yield from self.tensor - - -# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py -# with slight modifications -def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Given two lists of boxes of size N and M, - compute the IoU (intersection over union) - between __all__ N x M pairs of boxes. - The box order must be (xmin, ymin, xmax, ymax). - - Args: - boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively. - - Returns: - Tensor: IoU, sized [N,M]. - """ - area1 = boxes1.area() - area2 = boxes2.area() - - boxes1, boxes2 = boxes1.tensor, boxes2.tensor - - width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max( - boxes1[:, None, :2], boxes2[:, :2] - ) # [N,M,2] - - width_height.clamp_(min=0) # [N,M,2] - inter = width_height.prod(dim=2) # [N,M] - del width_height - - # handle empty boxes - iou = torch.where( - inter > 0, - inter / (area1[:, None] + area2 - inter), - torch.zeros(1, dtype=inter.dtype, device=inter.device), - ) - return iou - - -def matched_boxlist_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor: - """ - Compute pairwise intersection over union (IOU) of two sets of matched - boxes. The box order must be (xmin, ymin, xmax, ymax). - Similar to boxlist_iou, but computes only diagonal elements of the matrix - Arguments: - boxes1: (Boxes) bounding boxes, sized [N,4]. - boxes2: (Boxes) bounding boxes, sized [N,4]. - Returns: - (tensor) iou, sized [N]. - """ - assert len(boxes1) == len( - boxes2 - ), "boxlists should have the same" "number of entries, got {}, {}".format( - len(boxes1), len(boxes2) - ) - area1 = boxes1.area() # [N] - area2 = boxes2.area() # [N] - box1, box2 = boxes1.tensor, boxes2.tensor - lt = torch.max(box1[:, :2], box2[:, :2]) # [N,2] - rb = torch.min(box1[:, 2:], box2[:, 2:]) # [N,2] - wh = (rb - lt).clamp(min=0) # [N,2] - inter = wh[:, 0] * wh[:, 1] # [N] - iou = inter / (area1 + area2 - inter) # [N] - return iou diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py deleted file mode 100644 index 2d89224..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/image_list.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -from __future__ import division -from typing import Any, List, Sequence, Tuple, Union -import torch -from torch.nn import functional as F - - -class ImageList(object): - """ - Structure that holds a list of images (of possibly - varying sizes) as a single tensor. - This works by padding the images to the same size, - and storing in a field the original sizes of each image - - Attributes: - image_sizes (list[tuple[int, int]]): each tuple is (h, w) - """ - - def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): - """ - Arguments: - tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 - image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can - be smaller than (H, W) due to padding. - """ - self.tensor = tensor - self.image_sizes = image_sizes - - def __len__(self) -> int: - return len(self.image_sizes) - - def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor: - """ - Access the individual image in its original size. - - Returns: - Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 - """ - size = self.image_sizes[idx] - return self.tensor[idx, ..., : size[0], : size[1]] # type: ignore - - def to(self, *args: Any, **kwargs: Any) -> "ImageList": - cast_tensor = self.tensor.to(*args, **kwargs) - return ImageList(cast_tensor, self.image_sizes) - - @property - def device(self) -> torch.device: - return self.tensor.device - - @staticmethod - def from_tensors( - tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 - ) -> "ImageList": - """ - Args: - tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or - (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded - to the same shape with `pad_value`. - size_divisibility (int): If `size_divisibility > 0`, add padding to ensure - the common height and width is divisible by `size_divisibility`. - This depends on the model and many models need a divisibility of 32. - pad_value (float): value to pad - - Returns: - an `ImageList`. - """ - assert len(tensors) > 0 - assert isinstance(tensors, (tuple, list)) - for t in tensors: - assert isinstance(t, torch.Tensor), type(t) - assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape - # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors - max_size = ( - # In tracing mode, x.shape[i] is Tensor, and should not be converted - # to int: this will cause the traced graph to have hard-coded shapes. - # Instead we should make max_size a Tensor that depends on these tensors. - # Using torch.stack twice seems to be the best way to convert - # list[list[ScalarTensor]] to a Tensor - torch.stack( - [ - torch.stack([torch.as_tensor(dim) for dim in size]) - for size in [tuple(img.shape) for img in tensors] - ] - ) - .max(0) - .values - ) - - if size_divisibility > 0: - stride = size_divisibility - # the last two dims are H,W, both subject to divisibility requirement - max_size = torch.cat([max_size[:-2], (max_size[-2:] + (stride - 1)) // stride * stride]) - - image_sizes = [tuple(im.shape[-2:]) for im in tensors] - - if len(tensors) == 1: - # This seems slightly (2%) faster. - # TODO: check whether it's faster for multiple images as well - image_size = image_sizes[0] - padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]] - if all(x == 0 for x in padding_size): # https://github.com/pytorch/pytorch/issues/31734 - batched_imgs = tensors[0].unsqueeze(0) - else: - padded = F.pad(tensors[0], padding_size, value=pad_value) - batched_imgs = padded.unsqueeze_(0) - else: - # max_size can be a tensor in tracing mode, therefore use tuple() - batch_shape = (len(tensors),) + tuple(max_size) - batched_imgs = tensors[0].new_full(batch_shape, pad_value) - for img, pad_img in zip(tensors, batched_imgs): - pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) - - return ImageList(batched_imgs.contiguous(), image_sizes) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py deleted file mode 100644 index 373de08..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/instances.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import itertools -from typing import Any, Dict, List, Tuple, Union -import torch - - -class Instances: - """ - This class represents a list of instances in an image. - It stores the attributes of instances (e.g., boxes, masks, labels, scores) as "fields". - All fields must have the same ``__len__`` which is the number of instances. - - All other (non-field) attributes of this class are considered private: - they must start with '_' and are not modifiable by a user. - - Some basic usage: - - 1. Set/Get a field: - - .. code-block:: python - - instances.gt_boxes = Boxes(...) - print(instances.pred_masks) # a tensor of shape (N, H, W) - print('gt_masks' in instances) - - 2. ``len(instances)`` returns the number of instances - 3. Indexing: ``instances[indices]`` will apply the indexing on all the fields - and returns a new :class:`Instances`. - Typically, ``indices`` is a integer vector of indices, - or a binary mask of length ``num_instances``, - """ - - def __init__(self, image_size: Tuple[int, int], **kwargs: Any): - """ - Args: - image_size (height, width): the spatial size of the image. - kwargs: fields to add to this `Instances`. - """ - self._image_size = image_size - self._fields: Dict[str, Any] = {} - for k, v in kwargs.items(): - self.set(k, v) - - @property - def image_size(self) -> Tuple[int, int]: - """ - Returns: - tuple: height, width - """ - return self._image_size - - def __setattr__(self, name: str, val: Any) -> None: - if name.startswith("_"): - super().__setattr__(name, val) - else: - self.set(name, val) - - def __getattr__(self, name: str) -> Any: - if name == "_fields" or name not in self._fields: - raise AttributeError("Cannot find field '{}' in the given Instances!".format(name)) - return self._fields[name] - - def set(self, name: str, value: Any) -> None: - """ - Set the field named `name` to `value`. - The length of `value` must be the number of instances, - and must agree with other existing fields in this object. - """ - data_len = len(value) - if len(self._fields): - assert ( - len(self) == data_len - ), "Adding a field of length {} to a Instances of length {}".format(data_len, len(self)) - self._fields[name] = value - - def has(self, name: str) -> bool: - """ - Returns: - bool: whether the field called `name` exists. - """ - return name in self._fields - - def remove(self, name: str) -> None: - """ - Remove the field called `name`. - """ - del self._fields[name] - - def get(self, name: str) -> Any: - """ - Returns the field called `name`. - """ - return self._fields[name] - - def get_fields(self) -> Dict[str, Any]: - """ - Returns: - dict: a dict which maps names (str) to data of the fields - - Modifying the returned dict will modify this instance. - """ - return self._fields - - # Tensor-like methods - def to(self, device: str) -> "Instances": - """ - Returns: - Instances: all fields are called with a `to(device)`, if the field has this method. - """ - ret = Instances(self._image_size) - for k, v in self._fields.items(): - if hasattr(v, "to"): - v = v.to(device) - ret.set(k, v) - return ret - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Instances": - """ - Args: - item: an index-like object and will be used to index all the fields. - - Returns: - If `item` is a string, return the data in the corresponding field. - Otherwise, returns an `Instances` where all fields are indexed by `item`. - """ - if type(item) == int: - if item >= len(self) or item < -len(self): - raise IndexError("Instances index out of range!") - else: - item = slice(item, None, len(self)) - - ret = Instances(self._image_size) - for k, v in self._fields.items(): - ret.set(k, v[item]) - return ret - - def __len__(self) -> int: - for v in self._fields.values(): - return len(v) - raise NotImplementedError("Empty Instances does not support __len__!") - - def __iter__(self): - raise NotImplementedError("`Instances` object is not iterable!") - - @staticmethod - def cat(instance_lists: List["Instances"]) -> "Instances": - """ - Args: - instance_lists (list[Instances]) - - Returns: - Instances - """ - assert all(isinstance(i, Instances) for i in instance_lists) - assert len(instance_lists) > 0 - if len(instance_lists) == 1: - return instance_lists[0] - - image_size = instance_lists[0].image_size - for i in instance_lists[1:]: - assert i.image_size == image_size - ret = Instances(image_size) - for k in instance_lists[0]._fields.keys(): - values = [i.get(k) for i in instance_lists] - v0 = values[0] - if isinstance(v0, torch.Tensor): - values = torch.cat(values, dim=0) - elif isinstance(v0, list): - values = list(itertools.chain(*values)) - elif hasattr(type(v0), "cat"): - values = type(v0).cat(values) - else: - raise ValueError("Unsupported type {} for concatenation".format(type(v0))) - ret.set(k, values) - return ret - - def __str__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={}, ".format(len(self)) - s += "image_height={}, ".format(self._image_size[0]) - s += "image_width={}, ".format(self._image_size[1]) - s += "fields=[{}])".format(", ".join((f"{k}: {v}" for k, v in self._fields.items()))) - return s - - __repr__ = __str__ diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py deleted file mode 100644 index 2242815..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/keypoints.py +++ /dev/null @@ -1,209 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from typing import Any, List, Tuple, Union -import torch - -from detectron2.layers import interpolate - - -class Keypoints: - """ - Stores keypoint annotation data. GT Instances have a `gt_keypoints` property - containing the x,y location and visibility flag of each keypoint. This tensor has shape - (N, K, 3) where N is the number of instances and K is the number of keypoints per instance. - - The visibility flag follows the COCO format and must be one of three integers: - * v=0: not labeled (in which case x=y=0) - * v=1: labeled but not visible - * v=2: labeled and visible - """ - - def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]): - """ - Arguments: - keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint. - The shape should be (N, K, 3) where N is the number of - instances, and K is the number of keypoints per instance. - """ - device = keypoints.device if isinstance(keypoints, torch.Tensor) else torch.device("cpu") - keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device) - assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape - self.tensor = keypoints - - def __len__(self) -> int: - return self.tensor.size(0) - - def to(self, *args: Any, **kwargs: Any) -> "Keypoints": - return type(self)(self.tensor.to(*args, **kwargs)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor: - """ - Arguments: - boxes: Nx4 tensor, the boxes to draw the keypoints to - - Returns: - heatmaps: - A tensor of shape (N, K) containing an integer spatial label - in the range [0, heatmap_size**2 - 1] for each keypoint in the input. - valid: - A tensor of shape (N, K) containing whether each keypoint is in the roi or not. - """ - return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size) - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints": - """ - Create a new `Keypoints` by indexing on this `Keypoints`. - - The following usage are allowed: - - 1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance. - 2. `new_kpts = kpts[2:10]`: return a slice of key points. - 3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor - with `length = len(kpts)`. Nonzero elements in the vector will be selected. - - Note that the returned Keypoints might share storage with this Keypoints, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return Keypoints([self.tensor[item]]) - return Keypoints(self.tensor[item]) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - -# TODO make this nicer, this is a direct translation from C2 (but removing the inner loop) -def _keypoints_to_heatmap( - keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space. - - Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the - closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the - continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"): - d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. - - Arguments: - keypoints: tensor of keypoint locations in of shape (N, K, 3). - rois: Nx4 tensor of rois in xyxy format - heatmap_size: integer side length of square heatmap. - - Returns: - heatmaps: A tensor of shape (N, K) containing an integer spatial label - in the range [0, heatmap_size**2 - 1] for each keypoint in the input. - valid: A tensor of shape (N, K) containing whether each keypoint is in - the roi or not. - """ - - if rois.numel() == 0: - return rois.new().long(), rois.new().long() - offset_x = rois[:, 0] - offset_y = rois[:, 1] - scale_x = heatmap_size / (rois[:, 2] - rois[:, 0]) - scale_y = heatmap_size / (rois[:, 3] - rois[:, 1]) - - offset_x = offset_x[:, None] - offset_y = offset_y[:, None] - scale_x = scale_x[:, None] - scale_y = scale_y[:, None] - - x = keypoints[..., 0] - y = keypoints[..., 1] - - x_boundary_inds = x == rois[:, 2][:, None] - y_boundary_inds = y == rois[:, 3][:, None] - - x = (x - offset_x) * scale_x - x = x.floor().long() - y = (y - offset_y) * scale_y - y = y.floor().long() - - x[x_boundary_inds] = heatmap_size - 1 - y[y_boundary_inds] = heatmap_size - 1 - - valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size) - vis = keypoints[..., 2] > 0 - valid = (valid_loc & vis).long() - - lin_ind = y * heatmap_size + x - heatmaps = lin_ind * valid - - return heatmaps, valid - - -@torch.no_grad() -def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor: - """ - Extract predicted keypoint locations from heatmaps. - - Args: - maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for - each ROI and each keypoint. - rois (Tensor): (#ROIs, 4). The box of each ROI. - - Returns: - Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to - (x, y, logit, score) for each keypoint. - - When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate, - we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from - Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. - """ - offset_x = rois[:, 0] - offset_y = rois[:, 1] - - widths = (rois[:, 2] - rois[:, 0]).clamp(min=1) - heights = (rois[:, 3] - rois[:, 1]).clamp(min=1) - widths_ceil = widths.ceil() - heights_ceil = heights.ceil() - - num_rois, num_keypoints = maps.shape[:2] - xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4) - - width_corrections = widths / widths_ceil - height_corrections = heights / heights_ceil - - keypoints_idx = torch.arange(num_keypoints, device=maps.device) - - for i in range(num_rois): - outsize = (int(heights_ceil[i]), int(widths_ceil[i])) - roi_map = interpolate(maps[[i]], size=outsize, mode="bicubic", align_corners=False).squeeze( - 0 - ) # #keypoints x H x W - - # softmax over the spatial region - max_score, _ = roi_map.view(num_keypoints, -1).max(1) - max_score = max_score.view(num_keypoints, 1, 1) - tmp_full_resolution = (roi_map - max_score).exp_() - tmp_pool_resolution = (maps[i] - max_score).exp_() - # Produce scores over the region H x W, but normalize with POOL_H x POOL_W, - # so that the scores of objects of different absolute sizes will be more comparable - roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum((1, 2), keepdim=True) - - w = roi_map.shape[2] - pos = roi_map.view(num_keypoints, -1).argmax(1) - - x_int = pos % w - y_int = (pos - x_int) // w - - assert ( - roi_map_scores[keypoints_idx, y_int, x_int] - == roi_map_scores.view(num_keypoints, -1).max(1)[0] - ).all() - - x = (x_int.float() + 0.5) * width_corrections[i] - y = (y_int.float() + 0.5) * height_corrections[i] - - xy_preds[i, :, 0] = x + offset_x[i] - xy_preds[i, :, 1] = y + offset_y[i] - xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int] - xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int] - - return xy_preds diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py deleted file mode 100644 index e363baf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/masks.py +++ /dev/null @@ -1,424 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import itertools -import numpy as np -from typing import Any, Iterator, List, Union -import pycocotools.mask as mask_utils -import torch - -from detectron2.layers.roi_align import ROIAlign - -from .boxes import Boxes - - -def polygon_area(x, y): - # Using the shoelace formula - # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1))) - - -def polygons_to_bitmask(polygons: List[np.ndarray], height: int, width: int) -> np.ndarray: - """ - Args: - polygons (list[ndarray]): each array has shape (Nx2,) - height, width (int) - - Returns: - ndarray: a bool mask of shape (height, width) - """ - assert len(polygons) > 0, "COCOAPI does not support empty polygons" - rles = mask_utils.frPyObjects(polygons, height, width) - rle = mask_utils.merge(rles) - return mask_utils.decode(rle).astype(np.bool) - - -def rasterize_polygons_within_box( - polygons: List[np.ndarray], box: np.ndarray, mask_size: int -) -> torch.Tensor: - """ - Rasterize the polygons into a mask image and - crop the mask content in the given box. - The cropped mask is resized to (mask_size, mask_size). - - This function is used when generating training targets for mask head in Mask R-CNN. - Given original ground-truth masks for an image, new ground-truth mask - training targets in the size of `mask_size x mask_size` - must be provided for each predicted box. This function will be called to - produce such targets. - - Args: - polygons (list[ndarray[float]]): a list of polygons, which represents an instance. - box: 4-element numpy array - mask_size (int): - - Returns: - Tensor: BoolTensor of shape (mask_size, mask_size) - """ - # 1. Shift the polygons w.r.t the boxes - w, h = box[2] - box[0], box[3] - box[1] - - polygons = copy.deepcopy(polygons) - for p in polygons: - p[0::2] = p[0::2] - box[0] - p[1::2] = p[1::2] - box[1] - - # 2. Rescale the polygons to the new box size - # max() to avoid division by small number - ratio_h = mask_size / max(h, 0.1) - ratio_w = mask_size / max(w, 0.1) - - if ratio_h == ratio_w: - for p in polygons: - p *= ratio_h - else: - for p in polygons: - p[0::2] *= ratio_w - p[1::2] *= ratio_h - - # 3. Rasterize the polygons with coco api - mask = polygons_to_bitmask(polygons, mask_size, mask_size) - mask = torch.from_numpy(mask) - return mask - - -class BitMasks: - """ - This class stores the segmentation masks for all objects in one image, in - the form of bitmaps. - - Attributes: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - - def __init__(self, tensor: Union[torch.Tensor, np.ndarray]): - """ - Args: - tensor: bool Tensor of N,H,W, representing N instances in the image. - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.bool, device=device) - assert tensor.dim() == 3, tensor.size() - self.image_size = tensor.shape[1:] - self.tensor = tensor - - def to(self, device: str) -> "BitMasks": - return BitMasks(self.tensor.to(device)) - - @property - def device(self) -> torch.device: - return self.tensor.device - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks": - """ - Returns: - BitMasks: Create a new :class:`BitMasks` by indexing. - - The following usage are allowed: - - 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask. - 2. `new_masks = masks[2:10]`: return a slice of masks. - 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor - with `length = len(masks)`. Nonzero elements in the vector will be selected. - - Note that the returned object might share storage with this object, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return BitMasks(self.tensor[item].view(1, -1)) - m = self.tensor[item] - assert m.dim() == 3, "Indexing on BitMasks with {} returns a tensor with shape {}!".format( - item, m.shape - ) - return BitMasks(m) - - def __iter__(self) -> torch.Tensor: - yield from self.tensor - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.tensor)) - return s - - def __len__(self) -> int: - return self.tensor.shape[0] - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: a BoolTensor which represents - whether each mask is empty (False) or non-empty (True). - """ - return self.tensor.flatten(1).any(dim=1) - - @staticmethod - def from_polygon_masks( - polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]], height: int, width: int - ) -> "BitMasks": - """ - Args: - polygon_masks (list[list[ndarray]] or PolygonMasks) - height, width (int) - """ - if isinstance(polygon_masks, PolygonMasks): - polygon_masks = polygon_masks.polygons - masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks] - return BitMasks(torch.stack([torch.from_numpy(x) for x in masks])) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each bitmask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - It has less reconstruction error compared to rasterization with polygons. - However we observe no difference in accuracy, - but BitMasks requires more memory to store all the masks. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: - A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - device = self.tensor.device - - batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[:, None] - rois = torch.cat([batch_inds, boxes], dim=1) # Nx5 - - bit_masks = self.tensor.to(dtype=torch.float32) - rois = rois.to(device=device) - output = ( - ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True) - .forward(bit_masks[:, None, :, :], rois) - .squeeze(1) - ) - output = output >= 0.5 - return output - - def get_bounding_boxes(self) -> None: - # not needed now - raise NotImplementedError - - @staticmethod - def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks": - """ - Concatenates a list of BitMasks into a single BitMasks - - Arguments: - bitmasks_list (list[BitMasks]) - - Returns: - BitMasks: the concatenated BitMasks - """ - assert isinstance(bitmasks_list, (list, tuple)) - assert len(bitmasks_list) > 0 - assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list) - - cat_bitmasks = type(bitmasks_list[0])(torch.cat([bm.tensor for bm in bitmasks_list], dim=0)) - return cat_bitmasks - - -class PolygonMasks: - """ - This class stores the segmentation masks for all objects in one image, in the form of polygons. - - Attributes: - polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon. - """ - - def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]): - """ - Arguments: - polygons (list[list[np.ndarray]]): The first - level of the list correspond to individual instances, - the second level to all the polygons that compose the - instance, and the third level to the polygon coordinates. - The third level array should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - """ - assert isinstance(polygons, list), ( - "Cannot create PolygonMasks: Expect a list of list of polygons per image. " - "Got '{}' instead.".format(type(polygons)) - ) - - def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray: - # Use float64 for higher precision, because why not? - # Always put polygons on CPU (self.to is a no-op) since they - # are supposed to be small tensors. - # May need to change this assumption if GPU placement becomes useful - if isinstance(t, torch.Tensor): - t = t.cpu().numpy() - return np.asarray(t).astype("float64") - - def process_polygons( - polygons_per_instance: List[Union[torch.Tensor, np.ndarray]] - ) -> List[np.ndarray]: - assert isinstance(polygons_per_instance, list), ( - "Cannot create polygons: Expect a list of polygons per instance. " - "Got '{}' instead.".format(type(polygons_per_instance)) - ) - # transform the polygon to a tensor - polygons_per_instance = [_make_array(p) for p in polygons_per_instance] - for polygon in polygons_per_instance: - assert len(polygon) % 2 == 0 and len(polygon) >= 6 - return polygons_per_instance - - self.polygons: List[List[np.ndarray]] = [ - process_polygons(polygons_per_instance) for polygons_per_instance in polygons - ] - - def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks": - return self - - @property - def device(self) -> torch.device: - return torch.device("cpu") - - def get_bounding_boxes(self) -> Boxes: - """ - Returns: - Boxes: tight bounding boxes around polygon masks. - """ - boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32) - for idx, polygons_per_instance in enumerate(self.polygons): - minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32) - maxxy = torch.zeros(2, dtype=torch.float32) - for polygon in polygons_per_instance: - coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32) - minxy = torch.min(minxy, torch.min(coords, dim=0).values) - maxxy = torch.max(maxxy, torch.max(coords, dim=0).values) - boxes[idx, :2] = minxy - boxes[idx, 2:] = maxxy - return Boxes(boxes) - - def nonempty(self) -> torch.Tensor: - """ - Find masks that are non-empty. - - Returns: - Tensor: - a BoolTensor which represents whether each mask is empty (False) or not (True). - """ - keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons] - return torch.from_numpy(np.asarray(keep, dtype=np.bool)) - - def __getitem__(self, item: Union[int, slice, List[int], torch.BoolTensor]) -> "PolygonMasks": - """ - Support indexing over the instances and return a `PolygonMasks` object. - `item` can be: - - 1. An integer. It will return an object with only one instance. - 2. A slice. It will return an object with the selected instances. - 3. A list[int]. It will return an object with the selected instances, - correpsonding to the indices in the list. - 4. A vector mask of type BoolTensor, whose length is num_instances. - It will return an object with the instances whose mask is nonzero. - """ - if isinstance(item, int): - selected_polygons = [self.polygons[item]] - elif isinstance(item, slice): - selected_polygons = self.polygons[item] - elif isinstance(item, list): - selected_polygons = [self.polygons[i] for i in item] - elif isinstance(item, torch.Tensor): - # Polygons is a list, so we have to move the indices back to CPU. - if item.dtype == torch.bool: - assert item.dim() == 1, item.shape - item = item.nonzero().squeeze(1).cpu().numpy().tolist() - elif item.dtype in [torch.int32, torch.int64]: - item = item.cpu().numpy().tolist() - else: - raise ValueError("Unsupported tensor dtype={} for indexing!".format(item.dtype)) - selected_polygons = [self.polygons[i] for i in item] - return PolygonMasks(selected_polygons) - - def __iter__(self) -> Iterator[List[np.ndarray]]: - """ - Yields: - list[ndarray]: the polygons for one instance. - Each Tensor is a float64 vector representing a polygon. - """ - return iter(self.polygons) - - def __repr__(self) -> str: - s = self.__class__.__name__ + "(" - s += "num_instances={})".format(len(self.polygons)) - return s - - def __len__(self) -> int: - return len(self.polygons) - - def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor: - """ - Crop each mask by the given box, and resize results to (mask_size, mask_size). - This can be used to prepare training targets for Mask R-CNN. - - Args: - boxes (Tensor): Nx4 tensor storing the boxes for each mask - mask_size (int): the size of the rasterized mask. - - Returns: - Tensor: A bool tensor of shape (N, mask_size, mask_size), where - N is the number of predicted boxes for this image. - """ - assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self)) - - device = boxes.device - # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise - # (several small tensors for representing a single instance mask) - boxes = boxes.to(torch.device("cpu")) - - results = [ - rasterize_polygons_within_box(poly, box.numpy(), mask_size) - for poly, box in zip(self.polygons, boxes) - ] - """ - poly: list[list[float]], the polygons for one instance - box: a tensor of shape (4,) - """ - if len(results) == 0: - return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device) - return torch.stack(results, dim=0).to(device=device) - - def area(self): - """ - Computes area of the mask. - Only works with Polygons, using the shoelace formula: - https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates - - Returns: - Tensor: a vector, area for each instance - """ - - area = [] - for polygons_per_instance in self.polygons: - area_per_instance = 0 - for p in polygons_per_instance: - area_per_instance += polygon_area(p[0::2], p[1::2]) - area.append(area_per_instance) - - return torch.tensor(area) - - @staticmethod - def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks": - """ - Concatenates a list of PolygonMasks into a single PolygonMasks - - Arguments: - polymasks_list (list[PolygonMasks]) - - Returns: - PolygonMasks: the concatenated PolygonMasks - """ - assert isinstance(polymasks_list, (list, tuple)) - assert len(polymasks_list) > 0 - assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list) - - cat_polymasks = type(polymasks_list[0])( - list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list)) - ) - return cat_polymasks diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py deleted file mode 100644 index 823cfb6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/structures/rotated_boxes.py +++ /dev/null @@ -1,481 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from typing import Iterator, Union -import torch - -from detectron2.layers.rotated_boxes import pairwise_iou_rotated - -from .boxes import Boxes - - -class RotatedBoxes(Boxes): - """ - This structure stores a list of rotated boxes as a Nx5 torch.Tensor. - It supports some common methods about boxes - (`area`, `clip`, `nonempty`, etc), - and also behaves like a Tensor - (support indexing, `to(device)`, `.device`, and iteration over all boxes) - """ - - def __init__(self, tensor: torch.Tensor): - """ - Args: - tensor (Tensor[float]): a Nx5 matrix. Each row is - (x_center, y_center, width, height, angle), - in which angle is represented in degrees. - While there's no strict range restriction for it, - the recommended principal range is between [-180, 180) degrees. - - Assume we have a horizontal box B = (x_center, y_center, width, height), - where width is along the x-axis and height is along the y-axis. - The rotated box B_rot (x_center, y_center, width, height, angle) - can be seen as: - - 1. When angle == 0: - B_rot == B - 2. When angle > 0: - B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW; - 3. When angle < 0: - B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW. - - Mathematically, since the right-handed coordinate system for image space - is (y, x), where y is top->down and x is left->right, the 4 vertices of the - rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from - the vertices of the horizontal rectangle (y_i, x_i) (i = 1, 2, 3, 4) - in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians, - (y_c, x_c) is the center of the rectangle): - - .. math:: - - yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c, - - xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c, - - which is the standard rigid-body rotation transformation. - - Intuitively, the angle is - (1) the rotation angle from y-axis in image space - to the height vector (top->down in the box's local coordinate system) - of the box in CCW, and - (2) the rotation angle from x-axis in image space - to the width vector (left->right in the box's local coordinate system) - of the box in CCW. - - More intuitively, consider the following horizontal box ABCD represented - in (x1, y1, x2, y2): (3, 2, 7, 4), - covering the [3, 7] x [2, 4] region of the continuous coordinate system - which looks like this: - - .. code:: none - - O--------> x - | - | A---B - | | | - | D---C - | - v y - - Note that each capital letter represents one 0-dimensional geometric point - instead of a 'square pixel' here. - - In the example above, using (x, y) to represent a point we have: - - .. math:: - - O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4) - - We name vector AB = vector DC as the width vector in box's local coordinate system, and - vector AD = vector BC as the height vector in box's local coordinate system. Initially, - when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis - in the image space, respectively. - - For better illustration, we denote the center of the box as E, - - .. code:: none - - O--------> x - | - | A---B - | | E | - | D---C - | - v y - - where the center E = ((3+7)/2, (2+4)/2) = (5, 3). - - Also, - - .. math:: - - width = |AB| = |CD| = 7 - 3 = 4, - height = |AD| = |BC| = 4 - 2 = 2. - - Therefore, the corresponding representation for the same shape in rotated box in - (x_center, y_center, width, height, angle) format is: - - (5, 3, 4, 2, 0), - - Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees - CCW (counter-clockwise) by definition. It looks like this: - - .. code:: none - - O--------> x - | B-C - | | | - | |E| - | | | - | A-D - v y - - The center E is still located at the same point (5, 3), while the vertices - ABCD are rotated by 90 degrees CCW with regard to E: - A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5) - - Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to - vector AD or vector BC (the top->down height vector in box's local coordinate system), - or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right - width vector in box's local coordinate system). - - .. math:: - - width = |AB| = |CD| = 5 - 1 = 4, - height = |AD| = |BC| = 6 - 4 = 2. - - Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise) - by definition? It looks like this: - - .. code:: none - - O--------> x - | D-A - | | | - | |E| - | | | - | C-B - v y - - The center E is still located at the same point (5, 3), while the vertices - ABCD are rotated by 90 degrees CW with regard to E: - A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1) - - .. math:: - - width = |AB| = |CD| = 5 - 1 = 4, - height = |AD| = |BC| = 6 - 4 = 2. - - This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU - will be 1. However, these two will generate different RoI Pooling results and - should not be treated as an identical box. - - On the other hand, it's easy to see that (X, Y, W, H, A) is identical to - (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be - identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is - equivalent to rotating the same shape 90 degrees CW. - - We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180): - - .. code:: none - - O--------> x - | - | C---D - | | E | - | B---A - | - v y - - .. math:: - - A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2), - - width = |AB| = |CD| = 7 - 3 = 4, - height = |AD| = |BC| = 4 - 2 = 2. - - Finally, this is a very inaccurate (heavily quantized) illustration of - how (5, 3, 4, 2, 60) looks like in case anyone wonders: - - .. code:: none - - O--------> x - | B\ - | / C - | /E / - | A / - | `D - v y - - It's still a rectangle with center of (5, 3), width of 4 and height of 2, - but its angle (and thus orientation) is somewhere between - (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90). - """ - device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") - tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) - if tensor.numel() == 0: - # Use reshape, so we don't end up creating a new tensor that does not depend on - # the inputs (and consequently confuses jit) - tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device) - assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size() - - self.tensor = tensor - - def clone(self) -> "RotatedBoxes": - """ - Clone the RotatedBoxes. - - Returns: - RotatedBoxes - """ - return RotatedBoxes(self.tensor.clone()) - - def to(self, device: str) -> "RotatedBoxes": - return RotatedBoxes(self.tensor.to(device)) - - def area(self) -> torch.Tensor: - """ - Computes the area of all the boxes. - - Returns: - torch.Tensor: a vector with areas of each box. - """ - box = self.tensor - area = box[:, 2] * box[:, 3] - return area - - def normalize_angles(self) -> None: - """ - Restrict angles to the range of [-180, 180) degrees - """ - self.tensor[:, 4] = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0 - - def clip(self, box_size: Boxes.BoxSizeType, clip_angle_threshold: float = 1.0) -> None: - """ - Clip (in place) the boxes by limiting x coordinates to the range [0, width] - and y coordinates to the range [0, height]. - - For RRPN: - Only clip boxes that are almost horizontal with a tolerance of - clip_angle_threshold to maintain backward compatibility. - - Rotated boxes beyond this threshold are not clipped for two reasons: - - 1. There are potentially multiple ways to clip a rotated box to make it - fit within the image. - 2. It's tricky to make the entire rectangular box fit within the image - and still be able to not leave out pixels of interest. - - Therefore we rely on ops like RoIAlignRotated to safely handle this. - - Args: - box_size (height, width): The clipping box's size. - clip_angle_threshold: - Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees), - we do the clipping as horizontal boxes. - """ - h, w = box_size - - # normalize angles to be within (-180, 180] degrees - self.normalize_angles() - - idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0] - - # convert to (x1, y1, x2, y2) - x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0 - y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0 - x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0 - y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0 - - # clip - x1.clamp_(min=0, max=w) - y1.clamp_(min=0, max=h) - x2.clamp_(min=0, max=w) - y2.clamp_(min=0, max=h) - - # convert back to (xc, yc, w, h) - self.tensor[idx, 0] = (x1 + x2) / 2.0 - self.tensor[idx, 1] = (y1 + y2) / 2.0 - # make sure widths and heights do not increase due to numerical errors - self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1) - self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1) - - def nonempty(self, threshold: float = 0.0) -> torch.Tensor: - """ - Find boxes that are non-empty. - A box is considered empty, if either of its side is no larger than threshold. - - Returns: - Tensor: a binary vector which represents - whether each box is empty (False) or non-empty (True). - """ - box = self.tensor - widths = box[:, 2] - heights = box[:, 3] - keep = (widths > threshold) & (heights > threshold) - return keep - - def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "RotatedBoxes": - """ - Returns: - RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing. - - The following usage are allowed: - - 1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box. - 2. `new_boxes = boxes[2:10]`: return a slice of boxes. - 3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor - with `length = len(boxes)`. Nonzero elements in the vector will be selected. - - Note that the returned RotatedBoxes might share storage with this RotatedBoxes, - subject to Pytorch's indexing semantics. - """ - if isinstance(item, int): - return RotatedBoxes(self.tensor[item].view(1, -1)) - b = self.tensor[item] - assert b.dim() == 2, "Indexing on RotatedBoxes with {} failed to return a matrix!".format( - item - ) - return RotatedBoxes(b) - - def __len__(self) -> int: - return self.tensor.shape[0] - - def __repr__(self) -> str: - return "RotatedBoxes(" + str(self.tensor) + ")" - - def inside_box(self, box_size: Boxes.BoxSizeType, boundary_threshold: int = 0) -> torch.Tensor: - """ - Args: - box_size (height, width): Size of the reference box covering - [0, width] x [0, height] - boundary_threshold (int): Boxes that extend beyond the reference box - boundary by more than boundary_threshold are considered "outside". - - For RRPN, it might not be necessary to call this function since it's common - for rotated box to extend to outside of the image boundaries - (the clip function only clips the near-horizontal boxes) - - Returns: - a binary vector, indicating whether each box is inside the reference box. - """ - height, width = box_size - - cnt_x = self.tensor[..., 0] - cnt_y = self.tensor[..., 1] - half_w = self.tensor[..., 2] / 2.0 - half_h = self.tensor[..., 3] / 2.0 - a = self.tensor[..., 4] - c = torch.abs(torch.cos(a * math.pi / 180.0)) - s = torch.abs(torch.sin(a * math.pi / 180.0)) - # This basically computes the horizontal bounding rectangle of the rotated box - max_rect_dx = c * half_w + s * half_h - max_rect_dy = c * half_h + s * half_w - - inds_inside = ( - (cnt_x - max_rect_dx >= -boundary_threshold) - & (cnt_y - max_rect_dy >= -boundary_threshold) - & (cnt_x + max_rect_dx < width + boundary_threshold) - & (cnt_y + max_rect_dy < height + boundary_threshold) - ) - - return inds_inside - - def get_centers(self) -> torch.Tensor: - """ - Returns: - The box centers in a Nx2 array of (x, y). - """ - return self.tensor[:, :2] - - def scale(self, scale_x: float, scale_y: float) -> None: - """ - Scale the rotated box with horizontal and vertical scaling factors - Note: when scale_factor_x != scale_factor_y, - the rotated box does not preserve the rectangular shape when the angle - is not a multiple of 90 degrees under resize transformation. - Instead, the shape is a parallelogram (that has skew) - Here we make an approximation by fitting a rotated rectangle to the parallelogram. - """ - self.tensor[:, 0] *= scale_x - self.tensor[:, 1] *= scale_y - theta = self.tensor[:, 4] * math.pi / 180.0 - c = torch.cos(theta) - s = torch.sin(theta) - - # In image space, y is top->down and x is left->right - # Consider the local coordintate system for the rotated box, - # where the box center is located at (0, 0), and the four vertices ABCD are - # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2) - # the midpoint of the left edge AD of the rotated box E is: - # E = (A+D)/2 = (-w / 2, 0) - # the midpoint of the top edge AB of the rotated box F is: - # F(0, -h / 2) - # To get the old coordinates in the global system, apply the rotation transformation - # (Note: the right-handed coordinate system for image space is yOx): - # (old_x, old_y) = (s * y + c * x, c * y - s * x) - # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2) - # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2) - # After applying the scaling factor (sfx, sfy): - # E(new) = (-sfx * c * w / 2, sfy * s * w / 2) - # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2) - # The new width after scaling tranformation becomes: - - # w(new) = |E(new) - O| * 2 - # = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2 - # = sqrt[(sfx * c)^2 + (sfy * s)^2] * w - # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2] - # - # For example, - # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x; - # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y - self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2) - - # h(new) = |F(new) - O| * 2 - # = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2 - # = sqrt[(sfx * s)^2 + (sfy * c)^2] * h - # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2] - # - # For example, - # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y; - # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x - self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2) - - # The angle is the rotation angle from y-axis in image space to the height - # vector (top->down in the box's local coordinate system) of the box in CCW. - # - # angle(new) = angle_yOx(O - F(new)) - # = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) ) - # = atan2(sfx * s * h / 2, sfy * c * h / 2) - # = atan2(sfx * s, sfy * c) - # - # For example, - # when sfx == sfy, angle(new) == atan2(s, c) == angle(old) - self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi - - @property - def device(self) -> str: - return self.tensor.device - - def __iter__(self) -> Iterator[torch.Tensor]: - """ - Yield a box as a Tensor of shape (5,) at a time. - """ - yield from self.tensor - - -def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None: - """ - Given two lists of rotated boxes of size N and M, - compute the IoU (intersection over union) - between __all__ N x M pairs of boxes. - The box order must be (x_center, y_center, width, height, angle). - - Args: - boxes1, boxes2 (RotatedBoxes): - two `RotatedBoxes`. Contains N & M rotated boxes, respectively. - - Returns: - Tensor: IoU, sized [N,M]. - """ - - return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md deleted file mode 100644 index 9765b24..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Utility functions - -This folder contain utility functions that are not used in the -core library, but are useful for building models or training -code using the config system. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py deleted file mode 100644 index 168f997..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py deleted file mode 100644 index c48e376..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/analysis.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# -*- coding: utf-8 -*- - -import logging -import typing -import torch -from fvcore.nn import activation_count, flop_count, parameter_count, parameter_count_table -from torch import nn - -from detectron2.structures import BitMasks, Boxes, ImageList, Instances - -from .logger import log_first_n - -__all__ = [ - "activation_count_operators", - "flop_count_operators", - "parameter_count_table", - "parameter_count", -] - -FLOPS_MODE = "flops" -ACTIVATIONS_MODE = "activations" - - -# some extra ops to ignore from counting. -_IGNORED_OPS = [ - "aten::add", - "aten::add_", - "aten::batch_norm", - "aten::constant_pad_nd", - "aten::div", - "aten::div_", - "aten::exp", - "aten::log2", - "aten::max_pool2d", - "aten::meshgrid", - "aten::mul", - "aten::mul_", - "aten::nonzero_numpy", - "aten::relu", - "aten::relu_", - "aten::rsub", - "aten::sigmoid", - "aten::sigmoid_", - "aten::softmax", - "aten::sort", - "aten::sqrt", - "aten::sub", - "aten::upsample_nearest2d", - "prim::PythonOp", - "torchvision::nms", -] - - -def flop_count_operators( - model: nn.Module, inputs: list, **kwargs -) -> typing.DefaultDict[str, float]: - """ - Implement operator-level flops counting using jit. - This is a wrapper of fvcore.nn.flop_count, that supports standard detection models - in detectron2. - - Note: - The function runs the input through the model to compute flops. - The flops of a detection model is often input-dependent, for example, - the flops of box & mask head depends on the number of proposals & - the number of detected objects. - Therefore, the flops counting using a single input may not accurately - reflect the computation cost of a model. - - Args: - model: a detectron2 model that takes `list[dict]` as input. - inputs (list[dict]): inputs to model, in detectron2's standard format. - """ - return _wrapper_count_operators(model=model, inputs=inputs, mode=FLOPS_MODE, **kwargs) - - -def activation_count_operators( - model: nn.Module, inputs: list, **kwargs -) -> typing.DefaultDict[str, float]: - """ - Implement operator-level activations counting using jit. - This is a wrapper of fvcore.nn.activation_count, that supports standard detection models - in detectron2. - - Note: - The function runs the input through the model to compute activations. - The activations of a detection model is often input-dependent, for example, - the activations of box & mask head depends on the number of proposals & - the number of detected objects. - - Args: - model: a detectron2 model that takes `list[dict]` as input. - inputs (list[dict]): inputs to model, in detectron2's standard format. - """ - return _wrapper_count_operators(model=model, inputs=inputs, mode=ACTIVATIONS_MODE, **kwargs) - - -def _flatten_to_tuple(outputs): - result = [] - if isinstance(outputs, torch.Tensor): - result.append(outputs) - elif isinstance(outputs, (list, tuple)): - for v in outputs: - result.extend(_flatten_to_tuple(v)) - elif isinstance(outputs, dict): - for _, v in outputs.items(): - result.extend(_flatten_to_tuple(v)) - elif isinstance(outputs, Instances): - result.extend(_flatten_to_tuple(outputs.get_fields())) - elif isinstance(outputs, (Boxes, BitMasks, ImageList)): - result.append(outputs.tensor) - else: - log_first_n( - logging.WARN, - f"Output of type {type(outputs)} not included in flops/activations count.", - n=10, - ) - return tuple(result) - - -def _wrapper_count_operators( - model: nn.Module, inputs: list, mode: str, **kwargs -) -> typing.DefaultDict[str, float]: - - # ignore some ops - supported_ops = {k: lambda *args, **kwargs: {} for k in _IGNORED_OPS} - supported_ops.update(kwargs.pop("supported_ops", {})) - kwargs["supported_ops"] = supported_ops - - assert len(inputs) == 1, "Please use batch size=1" - tensor_input = inputs[0]["image"] - - class WrapModel(nn.Module): - def __init__(self, model): - super().__init__() - if isinstance( - model, (nn.parallel.distributed.DistributedDataParallel, nn.DataParallel) - ): - self.model = model.module - else: - self.model = model - - def forward(self, image): - # jit requires the input/output to be Tensors - inputs = [{"image": image}] - outputs = self.model.forward(inputs) - # Only the subgraph that computes the returned tuple of tensor will be - # counted. So we flatten everything we found to tuple of tensors. - return _flatten_to_tuple(outputs) - - old_train = model.training - with torch.no_grad(): - if mode == FLOPS_MODE: - ret = flop_count(WrapModel(model).train(False), (tensor_input,), **kwargs) - elif mode == ACTIVATIONS_MODE: - ret = activation_count(WrapModel(model).train(False), (tensor_input,), **kwargs) - else: - raise NotImplementedError("Count for mode {} is not supported yet.".format(mode)) - # compatible with change in fvcore - if isinstance(ret, tuple): - ret = ret[0] - model.train(old_train) - return ret diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py deleted file mode 100644 index c25b99c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/collect_env.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import importlib -import numpy as np -import os -import re -import subprocess -import sys -from collections import defaultdict -import PIL -import torch -import torchvision -from tabulate import tabulate - -__all__ = ["collect_env_info"] - - -def collect_torch_env(): - try: - import torch.__config__ - - return torch.__config__.show() - except ImportError: - # compatible with older versions of pytorch - from torch.utils.collect_env import get_pretty_env_info - - return get_pretty_env_info() - - -def get_env_module(): - var_name = "DETECTRON2_ENV_MODULE" - return var_name, os.environ.get(var_name, "") - - -def detect_compute_compatibility(CUDA_HOME, so_file): - try: - cuobjdump = os.path.join(CUDA_HOME, "bin", "cuobjdump") - if os.path.isfile(cuobjdump): - output = subprocess.check_output( - "'{}' --list-elf '{}'".format(cuobjdump, so_file), shell=True - ) - output = output.decode("utf-8").strip().split("\n") - sm = [] - for line in output: - line = re.findall(r"\.sm_[0-9]*\.", line)[0] - sm.append(line.strip(".")) - sm = sorted(set(sm)) - return ", ".join(sm) - else: - return so_file + "; cannot find cuobjdump" - except Exception: - # unhandled failure - return so_file - - -def collect_env_info(): - has_cuda = torch.cuda.is_available() - # NOTE: the use of CUDA_HOME requires the CUDA build deps, though in - # theory detectron2 should be made runnable with only the CUDA runtime - from torch.utils.cpp_extension import CUDA_HOME - - data = [] - data.append(("sys.platform", sys.platform)) - data.append(("Python", sys.version.replace("\n", ""))) - data.append(("numpy", np.__version__)) - - try: - import detectron2 # noqa - - data.append( - ("detectron2", detectron2.__version__ + " @" + os.path.dirname(detectron2.__file__)) - ) - except ImportError: - data.append(("detectron2", "failed to import")) - else: - try: - from detectron2 import _C - except ImportError: - data.append(("detectron2._C", "failed to import")) - else: - data.append(("detectron2 compiler", _C.get_compiler_version())) - data.append(("detectron2 CUDA compiler", _C.get_cuda_version())) - if has_cuda: - data.append( - ("detectron2 arch flags", detect_compute_compatibility(CUDA_HOME, _C.__file__)) - ) - - data.append(get_env_module()) - data.append(("PyTorch", torch.__version__ + " @" + os.path.dirname(torch.__file__))) - data.append(("PyTorch debug build", torch.version.debug)) - - data.append(("CUDA available", has_cuda)) - if has_cuda: - devices = defaultdict(list) - for k in range(torch.cuda.device_count()): - devices[torch.cuda.get_device_name(k)].append(str(k)) - for name, devids in devices.items(): - data.append(("GPU " + ",".join(devids), name)) - - from torch.utils.cpp_extension import CUDA_HOME - - data.append(("CUDA_HOME", str(CUDA_HOME))) - - if CUDA_HOME is not None and os.path.isdir(CUDA_HOME): - try: - nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") - nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True) - nvcc = nvcc.decode("utf-8").strip() - except subprocess.SubprocessError: - nvcc = "Not Available" - data.append(("NVCC", nvcc)) - - cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) - if cuda_arch_list: - data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) - data.append(("Pillow", PIL.__version__)) - - try: - data.append( - ( - "torchvision", - str(torchvision.__version__) + " @" + os.path.dirname(torchvision.__file__), - ) - ) - if has_cuda: - try: - torchvision_C = importlib.util.find_spec("torchvision._C").origin - msg = detect_compute_compatibility(CUDA_HOME, torchvision_C) - data.append(("torchvision arch flags", msg)) - except ImportError: - data.append(("torchvision._C", "failed to find")) - except AttributeError: - data.append(("torchvision", "unknown")) - - try: - import fvcore - - data.append(("fvcore", fvcore.__version__)) - except ImportError: - pass - - try: - import cv2 - - data.append(("cv2", cv2.__version__)) - except ImportError: - pass - env_str = tabulate(data) + "\n" - env_str += collect_torch_env() - return env_str - - -if __name__ == "__main__": - try: - import detectron2 # noqa - except ImportError: - print(collect_env_info()) - else: - from detectron2.utils.collect_env import collect_env_info - - print(collect_env_info()) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py deleted file mode 100644 index 1bf1455..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/colormap.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -""" -An awesome colormap for really neat visualizations. -Copied from Detectron, and removed gray colors. -""" - -import numpy as np - -__all__ = ["colormap", "random_color"] - -# fmt: off -# RGB: -_COLORS = np.array( - [ - 0.000, 0.447, 0.741, - 0.850, 0.325, 0.098, - 0.929, 0.694, 0.125, - 0.494, 0.184, 0.556, - 0.466, 0.674, 0.188, - 0.301, 0.745, 0.933, - 0.635, 0.078, 0.184, - 0.300, 0.300, 0.300, - 0.600, 0.600, 0.600, - 1.000, 0.000, 0.000, - 1.000, 0.500, 0.000, - 0.749, 0.749, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 1.000, - 0.667, 0.000, 1.000, - 0.333, 0.333, 0.000, - 0.333, 0.667, 0.000, - 0.333, 1.000, 0.000, - 0.667, 0.333, 0.000, - 0.667, 0.667, 0.000, - 0.667, 1.000, 0.000, - 1.000, 0.333, 0.000, - 1.000, 0.667, 0.000, - 1.000, 1.000, 0.000, - 0.000, 0.333, 0.500, - 0.000, 0.667, 0.500, - 0.000, 1.000, 0.500, - 0.333, 0.000, 0.500, - 0.333, 0.333, 0.500, - 0.333, 0.667, 0.500, - 0.333, 1.000, 0.500, - 0.667, 0.000, 0.500, - 0.667, 0.333, 0.500, - 0.667, 0.667, 0.500, - 0.667, 1.000, 0.500, - 1.000, 0.000, 0.500, - 1.000, 0.333, 0.500, - 1.000, 0.667, 0.500, - 1.000, 1.000, 0.500, - 0.000, 0.333, 1.000, - 0.000, 0.667, 1.000, - 0.000, 1.000, 1.000, - 0.333, 0.000, 1.000, - 0.333, 0.333, 1.000, - 0.333, 0.667, 1.000, - 0.333, 1.000, 1.000, - 0.667, 0.000, 1.000, - 0.667, 0.333, 1.000, - 0.667, 0.667, 1.000, - 0.667, 1.000, 1.000, - 1.000, 0.000, 1.000, - 1.000, 0.333, 1.000, - 1.000, 0.667, 1.000, - 0.333, 0.000, 0.000, - 0.500, 0.000, 0.000, - 0.667, 0.000, 0.000, - 0.833, 0.000, 0.000, - 1.000, 0.000, 0.000, - 0.000, 0.167, 0.000, - 0.000, 0.333, 0.000, - 0.000, 0.500, 0.000, - 0.000, 0.667, 0.000, - 0.000, 0.833, 0.000, - 0.000, 1.000, 0.000, - 0.000, 0.000, 0.167, - 0.000, 0.000, 0.333, - 0.000, 0.000, 0.500, - 0.000, 0.000, 0.667, - 0.000, 0.000, 0.833, - 0.000, 0.000, 1.000, - 0.000, 0.000, 0.000, - 0.143, 0.143, 0.143, - 0.857, 0.857, 0.857, - 1.000, 1.000, 1.000 - ] -).astype(np.float32).reshape(-1, 3) -# fmt: on - - -def colormap(rgb=False, maximum=255): - """ - Args: - rgb (bool): whether to return RGB colors or BGR colors. - maximum (int): either 255 or 1 - - Returns: - ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] - """ - assert maximum in [255, 1], maximum - c = _COLORS * maximum - if not rgb: - c = c[:, ::-1] - return c - - -def random_color(rgb=False, maximum=255): - """ - Args: - rgb (bool): whether to return RGB colors or BGR colors. - maximum (int): either 255 or 1 - - Returns: - ndarray: a vector of 3 numbers - """ - idx = np.random.randint(0, len(_COLORS)) - ret = _COLORS[idx] * maximum - if not rgb: - ret = ret[::-1] - return ret - - -if __name__ == "__main__": - import cv2 - - size = 100 - H, W = 10, 10 - canvas = np.random.rand(H * size, W * size, 3).astype("float32") - for h in range(H): - for w in range(W): - idx = h * W + w - if idx >= len(_COLORS): - break - canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] - cv2.imshow("a", canvas) - cv2.waitKey(0) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py deleted file mode 100644 index 8cc7b3d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/comm.py +++ /dev/null @@ -1,263 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -This file contains primitives for multi-gpu communication. -This is useful when doing distributed training. -""" - -import functools -import logging -import numpy as np -import pickle -import torch -import torch.distributed as dist - -_LOCAL_PROCESS_GROUP = None -""" -A torch process group which only includes processes that on the same machine as the current process. -This variable is set when processes are spawned by `launch()` in "engine/launch.py". -""" - - -def get_world_size() -> int: - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size() - - -def get_rank() -> int: - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - return dist.get_rank() - - -def get_local_rank() -> int: - """ - Returns: - The rank of the current process within the local (per-machine) process group. - """ - if not dist.is_available(): - return 0 - if not dist.is_initialized(): - return 0 - assert _LOCAL_PROCESS_GROUP is not None - return dist.get_rank(group=_LOCAL_PROCESS_GROUP) - - -def get_local_size() -> int: - """ - Returns: - The size of the per-machine process group, - i.e. the number of processes per machine. - """ - if not dist.is_available(): - return 1 - if not dist.is_initialized(): - return 1 - return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) - - -def is_main_process() -> bool: - return get_rank() == 0 - - -def synchronize(): - """ - Helper function to synchronize (barrier) among all processes when - using distributed training - """ - if not dist.is_available(): - return - if not dist.is_initialized(): - return - world_size = dist.get_world_size() - if world_size == 1: - return - dist.barrier() - - -@functools.lru_cache() -def _get_global_gloo_group(): - """ - Return a process group based on gloo backend, containing all the ranks - The result is cached. - """ - if dist.get_backend() == "nccl": - return dist.new_group(backend="gloo") - else: - return dist.group.WORLD - - -def _serialize_to_tensor(data, group): - backend = dist.get_backend(group) - assert backend in ["gloo", "nccl"] - device = torch.device("cpu" if backend == "gloo" else "cuda") - - buffer = pickle.dumps(data) - if len(buffer) > 1024 ** 3: - logger = logging.getLogger(__name__) - logger.warning( - "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( - get_rank(), len(buffer) / (1024 ** 3), device - ) - ) - storage = torch.ByteStorage.from_buffer(buffer) - tensor = torch.ByteTensor(storage).to(device=device) - return tensor - - -def _pad_to_largest_tensor(tensor, group): - """ - Returns: - list[int]: size of the tensor, on each rank - Tensor: padded tensor that has the max size - """ - world_size = dist.get_world_size(group=group) - assert ( - world_size >= 1 - ), "comm.gather/all_gather must be called from ranks within the given group!" - local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) - size_list = [ - torch.zeros([1], dtype=torch.int64, device=tensor.device) for _ in range(world_size) - ] - dist.all_gather(size_list, local_size, group=group) - size_list = [int(size.item()) for size in size_list] - - max_size = max(size_list) - - # we pad the tensor because torch all_gather does not support - # gathering tensors of different shapes - if local_size != max_size: - padding = torch.zeros((max_size - local_size,), dtype=torch.uint8, device=tensor.device) - tensor = torch.cat((tensor, padding), dim=0) - return size_list, tensor - - -def all_gather(data, group=None): - """ - Run all_gather on arbitrary picklable data (not necessarily tensors). - - Args: - data: any picklable object - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - - Returns: - list[data]: list of data gathered from each rank - """ - if get_world_size() == 1: - return [data] - if group is None: - group = _get_global_gloo_group() - if dist.get_world_size(group) == 1: - return [data] - - tensor = _serialize_to_tensor(data, group) - - size_list, tensor = _pad_to_largest_tensor(tensor, group) - max_size = max(size_list) - - # receiving Tensor from all ranks - tensor_list = [ - torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list - ] - dist.all_gather(tensor_list, tensor, group=group) - - data_list = [] - for size, tensor in zip(size_list, tensor_list): - buffer = tensor.cpu().numpy().tobytes()[:size] - data_list.append(pickle.loads(buffer)) - - return data_list - - -def gather(data, dst=0, group=None): - """ - Run gather on arbitrary picklable data (not necessarily tensors). - - Args: - data: any picklable object - dst (int): destination rank - group: a torch process group. By default, will use a group which - contains all ranks on gloo backend. - - Returns: - list[data]: on dst, a list of data gathered from each rank. Otherwise, - an empty list. - """ - if get_world_size() == 1: - return [data] - if group is None: - group = _get_global_gloo_group() - if dist.get_world_size(group=group) == 1: - return [data] - rank = dist.get_rank(group=group) - - tensor = _serialize_to_tensor(data, group) - size_list, tensor = _pad_to_largest_tensor(tensor, group) - - # receiving Tensor from all ranks - if rank == dst: - max_size = max(size_list) - tensor_list = [ - torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) for _ in size_list - ] - dist.gather(tensor, tensor_list, dst=dst, group=group) - - data_list = [] - for size, tensor in zip(size_list, tensor_list): - buffer = tensor.cpu().numpy().tobytes()[:size] - data_list.append(pickle.loads(buffer)) - return data_list - else: - dist.gather(tensor, [], dst=dst, group=group) - return [] - - -def shared_random_seed(): - """ - Returns: - int: a random number that is the same across all workers. - If workers need a shared RNG, they can use this shared seed to - create one. - - All workers must call this function, otherwise it will deadlock. - """ - ints = np.random.randint(2 ** 31) - all_ints = all_gather(ints) - return all_ints[0] - - -def reduce_dict(input_dict, average=True): - """ - Reduce the values in the dictionary from all processes so that process with rank - 0 has the reduced results. - - Args: - input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. - average (bool): whether to do average or sum - - Returns: - a dict with the same keys as input_dict, after reduction. - """ - world_size = get_world_size() - if world_size < 2: - return input_dict - with torch.no_grad(): - names = [] - values = [] - # sort the keys so that they are consistent across processes - for k in sorted(input_dict.keys()): - names.append(k) - values.append(input_dict[k]) - values = torch.stack(values, dim=0) - dist.reduce(values, dst=0) - if dist.get_rank() == 0 and average: - # only main process gets accumulated, so only divide by - # world_size in this case - values /= world_size - reduced_dict = {k: v for k, v in zip(names, values)} - return reduced_dict diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py deleted file mode 100644 index 6769cae..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/env.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import importlib -import importlib.util -import logging -import numpy as np -import os -import random -import sys -from datetime import datetime -import torch - -__all__ = ["seed_all_rng"] - - -def seed_all_rng(seed=None): - """ - Set the random seed for the RNG in torch, numpy and python. - - Args: - seed (int): if None, will use a strong random seed. - """ - if seed is None: - seed = ( - os.getpid() - + int(datetime.now().strftime("%S%f")) - + int.from_bytes(os.urandom(2), "big") - ) - logger = logging.getLogger(__name__) - logger.info("Using a generated random seed {}".format(seed)) - np.random.seed(seed) - torch.set_rng_state(torch.manual_seed(seed).get_state()) - random.seed(seed) - - -# from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path -def _import_file(module_name, file_path, make_importable=False): - spec = importlib.util.spec_from_file_location(module_name, file_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - if make_importable: - sys.modules[module_name] = module - return module - - -def _configure_libraries(): - """ - Configurations for some libraries. - """ - # An environment option to disable `import cv2` globally, - # in case it leads to negative performance impact - disable_cv2 = int(os.environ.get("DETECTRON2_DISABLE_CV2", False)) - if disable_cv2: - sys.modules["cv2"] = None - else: - # Disable opencl in opencv since its interaction with cuda often has negative effects - # This envvar is supported after OpenCV 3.4.0 - os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" - try: - import cv2 - - if int(cv2.__version__.split(".")[0]) >= 3: - cv2.ocl.setUseOpenCL(False) - except ImportError: - pass - - def get_version(module, digit=2): - return tuple(map(int, module.__version__.split(".")[:digit])) - - # fmt: off - assert get_version(torch) >= (1, 4), "Requires torch>=1.4" - import fvcore - assert get_version(fvcore, 3) >= (0, 1, 1), "Requires fvcore>=0.1.1" - import yaml - assert get_version(yaml) >= (5, 1), "Requires pyyaml>=5.1" - # fmt: on - - -_ENV_SETUP_DONE = False - - -def setup_environment(): - """Perform environment setup work. The default setup is a no-op, but this - function allows the user to specify a Python source file or a module in - the $DETECTRON2_ENV_MODULE environment variable, that performs - custom setup work that may be necessary to their computing environment. - """ - global _ENV_SETUP_DONE - if _ENV_SETUP_DONE: - return - _ENV_SETUP_DONE = True - - _configure_libraries() - - custom_module_path = os.environ.get("DETECTRON2_ENV_MODULE") - - if custom_module_path: - setup_custom_environment(custom_module_path) - else: - # The default setup is a no-op - pass - - -def setup_custom_environment(custom_module): - """ - Load custom environment setup by importing a Python source file or a - module, and run the setup function. - """ - if custom_module.endswith(".py"): - module = _import_file("detectron2.utils.env.custom_module", custom_module) - else: - module = importlib.import_module(custom_module) - assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( - "Custom environment module defined in {} does not have the " - "required callable attribute 'setup_environment'." - ).format(custom_module) - module.setup_environment() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py deleted file mode 100644 index a3c57ed..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/events.py +++ /dev/null @@ -1,432 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import datetime -import json -import logging -import os -import time -from collections import defaultdict -from contextlib import contextmanager -import torch -from fvcore.common.file_io import PathManager -from fvcore.common.history_buffer import HistoryBuffer - -_CURRENT_STORAGE_STACK = [] - - -def get_event_storage(): - """ - Returns: - The :class:`EventStorage` object that's currently being used. - Throws an error if no :class:`EventStorage` is currently enabled. - """ - assert len( - _CURRENT_STORAGE_STACK - ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" - return _CURRENT_STORAGE_STACK[-1] - - -class EventWriter: - """ - Base class for writers that obtain events from :class:`EventStorage` and process them. - """ - - def write(self): - raise NotImplementedError - - def close(self): - pass - - -class JSONWriter(EventWriter): - """ - Write scalars to a json file. - - It saves scalars as one json per line (instead of a big json) for easy parsing. - - Examples parsing such a json file: - - .. code-block:: none - - $ cat metrics.json | jq -s '.[0:2]' - [ - { - "data_time": 0.008433341979980469, - "iteration": 20, - "loss": 1.9228371381759644, - "loss_box_reg": 0.050025828182697296, - "loss_classifier": 0.5316952466964722, - "loss_mask": 0.7236229181289673, - "loss_rpn_box": 0.0856662318110466, - "loss_rpn_cls": 0.48198649287223816, - "lr": 0.007173333333333333, - "time": 0.25401854515075684 - }, - { - "data_time": 0.007216215133666992, - "iteration": 40, - "loss": 1.282649278640747, - "loss_box_reg": 0.06222952902317047, - "loss_classifier": 0.30682939291000366, - "loss_mask": 0.6970193982124329, - "loss_rpn_box": 0.038663312792778015, - "loss_rpn_cls": 0.1471673548221588, - "lr": 0.007706666666666667, - "time": 0.2490077018737793 - } - ] - - $ cat metrics.json | jq '.loss_mask' - 0.7126231789588928 - 0.689423680305481 - 0.6776131987571716 - ... - - """ - - def __init__(self, json_file, window_size=20): - """ - Args: - json_file (str): path to the json file. New data will be appended if the file exists. - window_size (int): the window size of median smoothing for the scalars whose - `smoothing_hint` are True. - """ - self._file_handle = PathManager.open(json_file, "a") - self._window_size = window_size - - def write(self): - storage = get_event_storage() - to_save = {"iteration": storage.iter} - to_save.update(storage.latest_with_smoothing_hint(self._window_size)) - self._file_handle.write(json.dumps(to_save, sort_keys=True) + "\n") - self._file_handle.flush() - try: - os.fsync(self._file_handle.fileno()) - except AttributeError: - pass - - def close(self): - self._file_handle.close() - - -class TensorboardXWriter(EventWriter): - """ - Write all scalars to a tensorboard file. - """ - - def __init__(self, log_dir: str, window_size: int = 20, **kwargs): - """ - Args: - log_dir (str): the directory to save the output events - window_size (int): the scalars will be median-smoothed by this window size - - kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` - """ - self._window_size = window_size - from torch.utils.tensorboard import SummaryWriter - - self._writer = SummaryWriter(log_dir, **kwargs) - - def write(self): - storage = get_event_storage() - for k, v in storage.latest_with_smoothing_hint(self._window_size).items(): - self._writer.add_scalar(k, v, storage.iter) - - # storage.put_{image,histogram} is only meant to be used by - # tensorboard writer. So we access its internal fields directly from here. - if len(storage._vis_data) >= 1: - for img_name, img, step_num in storage._vis_data: - self._writer.add_image(img_name, img, step_num) - # Storage stores all image data and rely on this writer to clear them. - # As a result it assumes only one writer will use its image data. - # An alternative design is to let storage store limited recent - # data (e.g. only the most recent image) that all writers can access. - # In that case a writer may not see all image data if its period is long. - storage.clear_images() - - if len(storage._histograms) >= 1: - for params in storage._histograms: - self._writer.add_histogram_raw(**params) - storage.clear_histograms() - - def close(self): - if hasattr(self, "_writer"): # doesn't exist when the code fails at import - self._writer.close() - - -class CommonMetricPrinter(EventWriter): - """ - Print **common** metrics to the terminal, including - iteration time, ETA, memory, all losses, and the learning rate. - - To print something different, please implement a similar printer by yourself. - """ - - def __init__(self, max_iter): - """ - Args: - max_iter (int): the maximum number of iterations to train. - Used to compute ETA. - """ - self.logger = logging.getLogger(__name__) - self._max_iter = max_iter - self._last_write = None - - def write(self): - storage = get_event_storage() - iteration = storage.iter - - try: - data_time = storage.history("data_time").avg(20) - except KeyError: - # they may not exist in the first few iterations (due to warmup) - # or when SimpleTrainer is not used - data_time = None - - eta_string = None - try: - iter_time = storage.history("time").global_avg() - eta_seconds = storage.history("time").median(1000) * (self._max_iter - iteration) - storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - except KeyError: - iter_time = None - # estimate eta on our own - more noisy - if self._last_write is not None: - estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( - iteration - self._last_write[0] - ) - eta_seconds = estimate_iter_time * (self._max_iter - iteration) - eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - self._last_write = (iteration, time.perf_counter()) - - try: - lr = "{:.6f}".format(storage.history("lr").latest()) - except KeyError: - lr = "N/A" - - if torch.cuda.is_available(): - max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 - else: - max_mem_mb = None - - # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" - self.logger.info( - " {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format( - eta=f"eta: {eta_string} " if eta_string else "", - iter=iteration, - losses=" ".join( - [ - "{}: {:.3f}".format(k, v.median(20)) - for k, v in storage.histories().items() - if "loss" in k - ] - ), - time="time: {:.4f} ".format(iter_time) if iter_time is not None else "", - data_time="data_time: {:.4f} ".format(data_time) if data_time is not None else "", - lr=lr, - memory="max_mem: {:.0f}M".format(max_mem_mb) if max_mem_mb is not None else "", - ) - ) - - -class EventStorage: - """ - The user-facing class that provides metric storage functionalities. - - In the future we may add support for storing / logging other types of data if needed. - """ - - def __init__(self, start_iter=0): - """ - Args: - start_iter (int): the iteration number to start with - """ - self._history = defaultdict(HistoryBuffer) - self._smoothing_hints = {} - self._latest_scalars = {} - self._iter = start_iter - self._current_prefix = "" - self._vis_data = [] - self._histograms = [] - - def put_image(self, img_name, img_tensor): - """ - Add an `img_tensor` associated with `img_name`, to be shown on - tensorboard. - - Args: - img_name (str): The name of the image to put into tensorboard. - img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` - Tensor of shape `[channel, height, width]` where `channel` is - 3. The image format should be RGB. The elements in img_tensor - can either have values in [0, 1] (float32) or [0, 255] (uint8). - The `img_tensor` will be visualized in tensorboard. - """ - self._vis_data.append((img_name, img_tensor, self._iter)) - - def put_scalar(self, name, value, smoothing_hint=True): - """ - Add a scalar `value` to the `HistoryBuffer` associated with `name`. - - Args: - smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be - smoothed when logged. The hint will be accessible through - :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint - and apply custom smoothing rule. - - It defaults to True because most scalars we save need to be smoothed to - provide any useful signal. - """ - name = self._current_prefix + name - history = self._history[name] - value = float(value) - history.update(value, self._iter) - self._latest_scalars[name] = value - - existing_hint = self._smoothing_hints.get(name) - if existing_hint is not None: - assert ( - existing_hint == smoothing_hint - ), "Scalar {} was put with a different smoothing_hint!".format(name) - else: - self._smoothing_hints[name] = smoothing_hint - - def put_scalars(self, *, smoothing_hint=True, **kwargs): - """ - Put multiple scalars from keyword arguments. - - Examples: - - storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) - """ - for k, v in kwargs.items(): - self.put_scalar(k, v, smoothing_hint=smoothing_hint) - - def put_histogram(self, hist_name, hist_tensor, bins=1000): - """ - Create a histogram from a tensor. - - Args: - hist_name (str): The name of the histogram to put into tensorboard. - hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted - into a histogram. - bins (int): Number of histogram bins. - """ - ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() - - # Create a histogram with PyTorch - hist_counts = torch.histc(hist_tensor, bins=bins) - hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) - - # Parameter for the add_histogram_raw function of SummaryWriter - hist_params = dict( - tag=hist_name, - min=ht_min, - max=ht_max, - num=len(hist_tensor), - sum=float(hist_tensor.sum()), - sum_squares=float(torch.sum(hist_tensor ** 2)), - bucket_limits=hist_edges[1:].tolist(), - bucket_counts=hist_counts.tolist(), - global_step=self._iter, - ) - self._histograms.append(hist_params) - - def history(self, name): - """ - Returns: - HistoryBuffer: the scalar history for name - """ - ret = self._history.get(name, None) - if ret is None: - raise KeyError("No history metric available for {}!".format(name)) - return ret - - def histories(self): - """ - Returns: - dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars - """ - return self._history - - def latest(self): - """ - Returns: - dict[name -> number]: the scalars that's added in the current iteration. - """ - return self._latest_scalars - - def latest_with_smoothing_hint(self, window_size=20): - """ - Similar to :meth:`latest`, but the returned values - are either the un-smoothed original latest value, - or a median of the given window_size, - depend on whether the smoothing_hint is True. - - This provides a default behavior that other writers can use. - """ - result = {} - for k, v in self._latest_scalars.items(): - result[k] = self._history[k].median(window_size) if self._smoothing_hints[k] else v - return result - - def smoothing_hints(self): - """ - Returns: - dict[name -> bool]: the user-provided hint on whether the scalar - is noisy and needs smoothing. - """ - return self._smoothing_hints - - def step(self): - """ - User should call this function at the beginning of each iteration, to - notify the storage of the start of a new iteration. - The storage will then be able to associate the new data with the - correct iteration number. - """ - self._iter += 1 - self._latest_scalars = {} - - @property - def iter(self): - return self._iter - - @property - def iteration(self): - # for backward compatibility - return self._iter - - def __enter__(self): - _CURRENT_STORAGE_STACK.append(self) - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - assert _CURRENT_STORAGE_STACK[-1] == self - _CURRENT_STORAGE_STACK.pop() - - @contextmanager - def name_scope(self, name): - """ - Yields: - A context within which all the events added to this storage - will be prefixed by the name scope. - """ - old_prefix = self._current_prefix - self._current_prefix = name.rstrip("/") + "/" - yield - self._current_prefix = old_prefix - - def clear_images(self): - """ - Delete all the stored images for visualization. This should be called - after images are written to tensorboard. - """ - self._vis_data = [] - - def clear_histograms(self): - """ - Delete all the stored histograms for visualization. - This should be called after histograms are written to tensorboard. - """ - self._histograms = [] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py deleted file mode 100644 index b6496d9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/logger.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import functools -import logging -import os -import sys -import time -from collections import Counter -from fvcore.common.file_io import PathManager -from tabulate import tabulate -from termcolor import colored - - -class _ColorfulFormatter(logging.Formatter): - def __init__(self, *args, **kwargs): - self._root_name = kwargs.pop("root_name") + "." - self._abbrev_name = kwargs.pop("abbrev_name", "") - if len(self._abbrev_name): - self._abbrev_name = self._abbrev_name + "." - super(_ColorfulFormatter, self).__init__(*args, **kwargs) - - def formatMessage(self, record): - record.name = record.name.replace(self._root_name, self._abbrev_name) - log = super(_ColorfulFormatter, self).formatMessage(record) - if record.levelno == logging.WARNING: - prefix = colored("WARNING", "red", attrs=["blink"]) - elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: - prefix = colored("ERROR", "red", attrs=["blink", "underline"]) - else: - return log - return prefix + " " + log - - -@functools.lru_cache() # so that calling setup_logger multiple times won't add many handlers -def setup_logger( - output=None, distributed_rank=0, *, color=True, name="detectron2", abbrev_name=None -): - """ - Initialize the detectron2 logger and set its verbosity level to "DEBUG". - - Args: - output (str): a file name or a directory to save log. If None, will not save log file. - If ends with ".txt" or ".log", assumed to be a file name. - Otherwise, logs will be saved to `output/log.txt`. - name (str): the root module name of this logger - abbrev_name (str): an abbreviation of the module, to avoid long names in logs. - Set to "" to not log the root module in logs. - By default, will abbreviate "detectron2" to "d2" and leave other - modules unchanged. - - Returns: - logging.Logger: a logger - """ - logger = logging.getLogger(name) - logger.setLevel(logging.DEBUG) - logger.propagate = False - - if abbrev_name is None: - abbrev_name = "d2" if name == "detectron2" else name - - plain_formatter = logging.Formatter( - "[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S" - ) - # stdout logging: master only - if distributed_rank == 0: - ch = logging.StreamHandler(stream=sys.stdout) - ch.setLevel(logging.DEBUG) - if color: - formatter = _ColorfulFormatter( - colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", - datefmt="%m/%d %H:%M:%S", - root_name=name, - abbrev_name=str(abbrev_name), - ) - else: - formatter = plain_formatter - ch.setFormatter(formatter) - logger.addHandler(ch) - - # file logging: all workers - if output is not None: - if output.endswith(".txt") or output.endswith(".log"): - filename = output - else: - filename = os.path.join(output, "log.txt") - if distributed_rank > 0: - filename = filename + ".rank{}".format(distributed_rank) - PathManager.mkdirs(os.path.dirname(filename)) - - fh = logging.StreamHandler(_cached_log_stream(filename)) - fh.setLevel(logging.DEBUG) - fh.setFormatter(plain_formatter) - logger.addHandler(fh) - - return logger - - -# cache the opened file object, so that different calls to `setup_logger` -# with the same file name can safely write to the same file. -@functools.lru_cache(maxsize=None) -def _cached_log_stream(filename): - return PathManager.open(filename, "a") - - -""" -Below are some other convenient logging methods. -They are mainly adopted from -https://github.com/abseil/abseil-py/blob/master/absl/logging/__init__.py -""" - - -def _find_caller(): - """ - Returns: - str: module name of the caller - tuple: a hashable key to be used to identify different callers - """ - frame = sys._getframe(2) - while frame: - code = frame.f_code - if os.path.join("utils", "logger.") not in code.co_filename: - mod_name = frame.f_globals["__name__"] - if mod_name == "__main__": - mod_name = "detectron2" - return mod_name, (code.co_filename, frame.f_lineno, code.co_name) - frame = frame.f_back - - -_LOG_COUNTER = Counter() -_LOG_TIMER = {} - - -def log_first_n(lvl, msg, n=1, *, name=None, key="caller"): - """ - Log only for the first n times. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - key (str or tuple[str]): the string(s) can be one of "caller" or - "message", which defines how to identify duplicated logs. - For example, if called with `n=1, key="caller"`, this function - will only log the first call from the same caller, regardless of - the message content. - If called with `n=1, key="message"`, this function will log the - same content only once, even if they are called from different places. - If called with `n=1, key=("caller", "message")`, this function - will not log only if the same caller has logged the same message before. - """ - if isinstance(key, str): - key = (key,) - assert len(key) > 0 - - caller_module, caller_key = _find_caller() - hash_key = () - if "caller" in key: - hash_key = hash_key + caller_key - if "message" in key: - hash_key = hash_key + (msg,) - - _LOG_COUNTER[hash_key] += 1 - if _LOG_COUNTER[hash_key] <= n: - logging.getLogger(name or caller_module).log(lvl, msg) - - -def log_every_n(lvl, msg, n=1, *, name=None): - """ - Log once per n times. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - """ - caller_module, key = _find_caller() - _LOG_COUNTER[key] += 1 - if n == 1 or _LOG_COUNTER[key] % n == 1: - logging.getLogger(name or caller_module).log(lvl, msg) - - -def log_every_n_seconds(lvl, msg, n=1, *, name=None): - """ - Log no more than once per n seconds. - - Args: - lvl (int): the logging level - msg (str): - n (int): - name (str): name of the logger to use. Will use the caller's module by default. - """ - caller_module, key = _find_caller() - last_logged = _LOG_TIMER.get(key, None) - current_time = time.time() - if last_logged is None or current_time - last_logged >= n: - logging.getLogger(name or caller_module).log(lvl, msg) - _LOG_TIMER[key] = current_time - - -def create_small_table(small_dict): - """ - Create a small table using the keys of small_dict as headers. This is only - suitable for small dictionaries. - - Args: - small_dict (dict): a result dictionary of only a few items. - - Returns: - str: the table as a string. - """ - keys, values = tuple(zip(*small_dict.items())) - table = tabulate( - [values], - headers=keys, - tablefmt="pipe", - floatfmt=".3f", - stralign="center", - numalign="center", - ) - return table diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py deleted file mode 100644 index d495a16..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/memory.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import logging -from contextlib import contextmanager -from functools import wraps -import torch - -__all__ = ["retry_if_cuda_oom"] - - -@contextmanager -def _ignore_torch_cuda_oom(): - """ - A context which ignores CUDA OOM exception from pytorch. - """ - try: - yield - except RuntimeError as e: - # NOTE: the string may change? - if "CUDA out of memory. " in str(e): - pass - else: - raise - - -def retry_if_cuda_oom(func): - """ - Makes a function retry itself after encountering - pytorch's CUDA OOM error. - It will first retry after calling `torch.cuda.empty_cache()`. - - If that still fails, it will then retry by trying to convert inputs to CPUs. - In this case, it expects the function to dispatch to CPU implementation. - The return values may become CPU tensors as well and it's user's - responsibility to convert it back to CUDA tensor if needed. - - Args: - func: a stateless callable that takes tensor-like objects as arguments - - Returns: - a callable which retries `func` if OOM is encountered. - - Examples: - - .. code-block:: python - - output = retry_if_cuda_oom(some_torch_function)(input1, input2) - # output may be on CPU even if inputs are on GPU - - Note: - 1. When converting inputs to CPU, it will only look at each argument and check - if it has `.device` and `.to` for conversion. Nested structures of tensors - are not supported. - - 2. Since the function might be called more than once, it has to be - stateless. - """ - - def maybe_to_cpu(x): - try: - like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") - except AttributeError: - like_gpu_tensor = False - if like_gpu_tensor: - return x.to(device="cpu") - else: - return x - - @wraps(func) - def wrapped(*args, **kwargs): - with _ignore_torch_cuda_oom(): - return func(*args, **kwargs) - - # Clear cache and retry - torch.cuda.empty_cache() - with _ignore_torch_cuda_oom(): - return func(*args, **kwargs) - - # Try on CPU. This slows down the code significantly, therefore print a notice. - logger = logging.getLogger(__name__) - logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) - new_args = (maybe_to_cpu(x) for x in args) - new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} - return func(*new_args, **new_kwargs) - - return wrapped diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py deleted file mode 100644 index fea1de9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/registry.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# Keep this module for backward compatibility. -from fvcore.common.registry import Registry # noqa - -__all__ = ["Registry"] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py deleted file mode 100644 index 734a62c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/serialize.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import cloudpickle - - -class PicklableWrapper(object): - """ - Wrap an object to make it more picklable, note that it uses - heavy weight serialization libraries that are slower than pickle. - It's best to use it only on closures (which are usually not picklable). - - This is a simplified version of - https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py - """ - - def __init__(self, obj): - self._obj = obj - - def __reduce__(self): - s = cloudpickle.dumps(self._obj) - return cloudpickle.loads, (s,) - - def __call__(self, *args, **kwargs): - return self._obj(*args, **kwargs) - - def __getattr__(self, attr): - # Ensure that the wrapped object can be used seamlessly as the previous object. - if attr not in ["_obj"]: - return getattr(self._obj, attr) - return getattr(self, attr) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py deleted file mode 100644 index 0144b67..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/video_visualizer.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import pycocotools.mask as mask_util - -from detectron2.utils.visualizer import ( - ColorMode, - Visualizer, - _create_text_labels, - _PanopticPrediction, -) - -from .colormap import random_color - - -class _DetectedInstance: - """ - Used to store data about detected objects in video frame, - in order to transfer color to objects in the future frames. - - Attributes: - label (int): - bbox (tuple[float]): - mask_rle (dict): - color (tuple[float]): RGB colors in range (0, 1) - ttl (int): time-to-live for the instance. For example, if ttl=2, - the instance color can be transferred to objects in the next two frames. - """ - - __slots__ = ["label", "bbox", "mask_rle", "color", "ttl"] - - def __init__(self, label, bbox, mask_rle, color, ttl): - self.label = label - self.bbox = bbox - self.mask_rle = mask_rle - self.color = color - self.ttl = ttl - - -class VideoVisualizer: - def __init__(self, metadata, instance_mode=ColorMode.IMAGE): - """ - Args: - metadata (MetadataCatalog): image metadata. - """ - self.metadata = metadata - self._old_instances = [] - assert instance_mode in [ - ColorMode.IMAGE, - ColorMode.IMAGE_BW, - ], "Other mode not supported yet." - self._instance_mode = instance_mode - - def draw_instance_predictions(self, frame, predictions): - """ - Draw instance-level prediction results on an image. - - Args: - frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. - predictions (Instances): the output of an instance detection/segmentation - model. Following fields will be used to draw: - "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). - - Returns: - output (VisImage): image object with visualizations. - """ - frame_visualizer = Visualizer(frame, self.metadata) - num_instances = len(predictions) - if num_instances == 0: - return frame_visualizer.output - - boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None - scores = predictions.scores if predictions.has("scores") else None - classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None - keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None - - if predictions.has("pred_masks"): - masks = predictions.pred_masks - # mask IOU is not yet enabled - # masks_rles = mask_util.encode(np.asarray(masks.permute(1, 2, 0), order="F")) - # assert len(masks_rles) == num_instances - else: - masks = None - - detected = [ - _DetectedInstance(classes[i], boxes[i], mask_rle=None, color=None, ttl=8) - for i in range(num_instances) - ] - colors = self._assign_colors(detected) - - labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) - - if self._instance_mode == ColorMode.IMAGE_BW: - # any() returns uint8 tensor - frame_visualizer.output.img = frame_visualizer._create_grayscale_image( - (masks.any(dim=0) > 0).numpy() if masks is not None else None - ) - alpha = 0.3 - else: - alpha = 0.5 - - frame_visualizer.overlay_instances( - boxes=None if masks is not None else boxes, # boxes are a bit distracting - masks=masks, - labels=labels, - keypoints=keypoints, - assigned_colors=colors, - alpha=alpha, - ) - - return frame_visualizer.output - - def draw_sem_seg(self, frame, sem_seg, area_threshold=None): - """ - Args: - sem_seg (ndarray or Tensor): semantic segmentation of shape (H, W), - each value is the integer label. - area_threshold (Optional[int]): only draw segmentations larger than the threshold - """ - # don't need to do anything special - frame_visualizer = Visualizer(frame, self.metadata) - frame_visualizer.draw_sem_seg(sem_seg, area_threshold=None) - return frame_visualizer.output - - def draw_panoptic_seg_predictions( - self, frame, panoptic_seg, segments_info, area_threshold=None, alpha=0.5 - ): - frame_visualizer = Visualizer(frame, self.metadata) - pred = _PanopticPrediction(panoptic_seg, segments_info) - - if self._instance_mode == ColorMode.IMAGE_BW: - frame_visualizer.output.img = frame_visualizer._create_grayscale_image( - pred.non_empty_mask() - ) - - # draw mask for all semantic segments first i.e. "stuff" - for mask, sinfo in pred.semantic_masks(): - category_idx = sinfo["category_id"] - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] - except AttributeError: - mask_color = None - - frame_visualizer.draw_binary_mask( - mask, - color=mask_color, - text=self.metadata.stuff_classes[category_idx], - alpha=alpha, - area_threshold=area_threshold, - ) - - all_instances = list(pred.instance_masks()) - if len(all_instances) == 0: - return frame_visualizer.output - # draw mask for all instances second - masks, sinfo = list(zip(*all_instances)) - num_instances = len(masks) - masks_rles = mask_util.encode( - np.asarray(np.asarray(masks).transpose(1, 2, 0), dtype=np.uint8, order="F") - ) - assert len(masks_rles) == num_instances - - category_ids = [x["category_id"] for x in sinfo] - detected = [ - _DetectedInstance(category_ids[i], bbox=None, mask_rle=masks_rles[i], color=None, ttl=8) - for i in range(num_instances) - ] - colors = self._assign_colors(detected) - labels = [self.metadata.thing_classes[k] for k in category_ids] - - frame_visualizer.overlay_instances( - boxes=None, - masks=masks, - labels=labels, - keypoints=None, - assigned_colors=colors, - alpha=alpha, - ) - return frame_visualizer.output - - def _assign_colors(self, instances): - """ - Naive tracking heuristics to assign same color to the same instance, - will update the internal state of tracked instances. - - Returns: - list[tuple[float]]: list of colors. - """ - - # Compute iou with either boxes or masks: - is_crowd = np.zeros((len(instances),), dtype=np.bool) - if instances[0].bbox is None: - assert instances[0].mask_rle is not None - # use mask iou only when box iou is None - # because box seems good enough - rles_old = [x.mask_rle for x in self._old_instances] - rles_new = [x.mask_rle for x in instances] - ious = mask_util.iou(rles_old, rles_new, is_crowd) - threshold = 0.5 - else: - boxes_old = [x.bbox for x in self._old_instances] - boxes_new = [x.bbox for x in instances] - ious = mask_util.iou(boxes_old, boxes_new, is_crowd) - threshold = 0.6 - if len(ious) == 0: - ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") - - # Only allow matching instances of the same label: - for old_idx, old in enumerate(self._old_instances): - for new_idx, new in enumerate(instances): - if old.label != new.label: - ious[old_idx, new_idx] = 0 - - matched_new_per_old = np.asarray(ious).argmax(axis=1) - max_iou_per_old = np.asarray(ious).max(axis=1) - - # Try to find match for each old instance: - extra_instances = [] - for idx, inst in enumerate(self._old_instances): - if max_iou_per_old[idx] > threshold: - newidx = matched_new_per_old[idx] - if instances[newidx].color is None: - instances[newidx].color = inst.color - continue - # If an old instance does not match any new instances, - # keep it for the next frame in case it is just missed by the detector - inst.ttl -= 1 - if inst.ttl > 0: - extra_instances.append(inst) - - # Assign random color to newly-detected instances: - for inst in instances: - if inst.color is None: - inst.color = random_color(rgb=True, maximum=1) - self._old_instances = instances[:] + extra_instances - return [d.color for d in instances] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py deleted file mode 100644 index 3ffcbdb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/detectron2/utils/visualizer.py +++ /dev/null @@ -1,1143 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import colorsys -import logging -import math -import numpy as np -from enum import Enum, unique -import cv2 -import matplotlib as mpl -import matplotlib.colors as mplc -import matplotlib.figure as mplfigure -import pycocotools.mask as mask_util -import torch -from fvcore.common.file_io import PathManager -from matplotlib.backends.backend_agg import FigureCanvasAgg -from PIL import Image - -from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes - -from .colormap import random_color - -logger = logging.getLogger(__name__) - -__all__ = ["ColorMode", "VisImage", "Visualizer"] - - -_SMALL_OBJECT_AREA_THRESH = 1000 -_LARGE_MASK_AREA_THRESH = 120000 -_OFF_WHITE = (1.0, 1.0, 240.0 / 255) -_BLACK = (0, 0, 0) -_RED = (1.0, 0, 0) - -_KEYPOINT_THRESHOLD = 0.05 - - -@unique -class ColorMode(Enum): - """ - Enum of different color modes to use for instance visualizations. - """ - - IMAGE = 0 - """ - Picks a random color for every instance and overlay segmentations with low opacity. - """ - SEGMENTATION = 1 - """ - Let instances of the same category have similar colors - (from metadata.thing_colors), and overlay them with - high opacity. This provides more attention on the quality of segmentation. - """ - IMAGE_BW = 2 - """ - Same as IMAGE, but convert all areas without masks to gray-scale. - Only available for drawing per-instance mask predictions. - """ - - -class GenericMask: - """ - Attribute: - polygons (list[ndarray]): list[ndarray]: polygons for this mask. - Each ndarray has format [x, y, x, y, ...] - mask (ndarray): a binary mask - """ - - def __init__(self, mask_or_polygons, height, width): - self._mask = self._polygons = self._has_holes = None - self.height = height - self.width = width - - m = mask_or_polygons - if isinstance(m, dict): - # RLEs - assert "counts" in m and "size" in m - if isinstance(m["counts"], list): # uncompressed RLEs - h, w = m["size"] - assert h == height and w == width - m = mask_util.frPyObjects(m, h, w) - self._mask = mask_util.decode(m)[:, :] - return - - if isinstance(m, list): # list[ndarray] - self._polygons = [np.asarray(x).reshape(-1) for x in m] - return - - if isinstance(m, np.ndarray): # assumed to be a binary mask - assert m.shape[1] != 2, m.shape - assert m.shape == (height, width), m.shape - self._mask = m.astype("uint8") - return - - raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m))) - - @property - def mask(self): - if self._mask is None: - self._mask = self.polygons_to_mask(self._polygons) - return self._mask - - @property - def polygons(self): - if self._polygons is None: - self._polygons, self._has_holes = self.mask_to_polygons(self._mask) - return self._polygons - - @property - def has_holes(self): - if self._has_holes is None: - if self._mask is not None: - self._polygons, self._has_holes = self.mask_to_polygons(self._mask) - else: - self._has_holes = False # if original format is polygon, does not have holes - return self._has_holes - - def mask_to_polygons(self, mask): - # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level - # hierarchy. External contours (boundary) of the object are placed in hierarchy-1. - # Internal contours (holes) are placed in hierarchy-2. - # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours. - mask = np.ascontiguousarray(mask) # some versions of cv2 does not support incontiguous arr - res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) - hierarchy = res[-1] - if hierarchy is None: # empty mask - return [], False - has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0 - res = res[-2] - res = [x.flatten() for x in res] - res = [x for x in res if len(x) >= 6] - return res, has_holes - - def polygons_to_mask(self, polygons): - rle = mask_util.frPyObjects(polygons, self.height, self.width) - rle = mask_util.merge(rle) - return mask_util.decode(rle)[:, :] - - def area(self): - return self.mask.sum() - - def bbox(self): - p = mask_util.frPyObjects(self.polygons, self.height, self.width) - p = mask_util.merge(p) - bbox = mask_util.toBbox(p) - bbox[2] += bbox[0] - bbox[3] += bbox[1] - return bbox - - -class _PanopticPrediction: - def __init__(self, panoptic_seg, segments_info): - self._seg = panoptic_seg - - self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info - segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True) - areas = areas.numpy() - sorted_idxs = np.argsort(-areas) - self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs] - self._seg_ids = self._seg_ids.tolist() - for sid, area in zip(self._seg_ids, self._seg_areas): - if sid in self._sinfo: - self._sinfo[sid]["area"] = float(area) - - def non_empty_mask(self): - """ - Returns: - (H, W) array, a mask for all pixels that have a prediction - """ - empty_ids = [] - for id in self._seg_ids: - if id not in self._sinfo: - empty_ids.append(id) - if len(empty_ids) == 0: - return np.zeros(self._seg.shape, dtype=np.uint8) - assert ( - len(empty_ids) == 1 - ), ">1 ids corresponds to no labels. This is currently not supported" - return (self._seg != empty_ids[0]).numpy().astype(np.bool) - - def semantic_masks(self): - for sid in self._seg_ids: - sinfo = self._sinfo.get(sid) - if sinfo is None or sinfo["isthing"]: - # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions. - continue - yield (self._seg == sid).numpy().astype(np.bool), sinfo - - def instance_masks(self): - for sid in self._seg_ids: - sinfo = self._sinfo.get(sid) - if sinfo is None or not sinfo["isthing"]: - continue - mask = (self._seg == sid).numpy().astype(np.bool) - if mask.sum() > 0: - yield mask, sinfo - - -def _create_text_labels(classes, scores, class_names): - """ - Args: - classes (list[int] or None): - scores (list[float] or None): - class_names (list[str] or None): - - Returns: - list[str] or None - """ - labels = None - if classes is not None and class_names is not None and len(class_names) > 1: - labels = [class_names[i] for i in classes] - if scores is not None: - if labels is None: - labels = ["{:.0f}%".format(s * 100) for s in scores] - else: - labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)] - return labels - - -class VisImage: - def __init__(self, img, scale=1.0): - """ - Args: - img (ndarray): an RGB image of shape (H, W, 3). - scale (float): scale the input image - """ - self.img = img - self.scale = scale - self.width, self.height = img.shape[1], img.shape[0] - self._setup_figure(img) - - def _setup_figure(self, img): - """ - Args: - Same as in :meth:`__init__()`. - - Returns: - fig (matplotlib.pyplot.figure): top level container for all the image plot elements. - ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system. - """ - fig = mplfigure.Figure(frameon=False) - self.dpi = fig.get_dpi() - # add a small 1e-2 to avoid precision lost due to matplotlib's truncation - # (https://github.com/matplotlib/matplotlib/issues/15363) - fig.set_size_inches( - (self.width * self.scale + 1e-2) / self.dpi, - (self.height * self.scale + 1e-2) / self.dpi, - ) - self.canvas = FigureCanvasAgg(fig) - # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig) - ax = fig.add_axes([0.0, 0.0, 1.0, 1.0]) - ax.axis("off") - ax.set_xlim(0.0, self.width) - ax.set_ylim(self.height) - - self.fig = fig - self.ax = ax - - def save(self, filepath): - """ - Args: - filepath (str): a string that contains the absolute path, including the file name, where - the visualized image will be saved. - """ - if filepath.lower().endswith(".jpg") or filepath.lower().endswith(".png"): - # faster than matplotlib's imshow - cv2.imwrite(filepath, self.get_image()[:, :, ::-1]) - else: - # support general formats (e.g. pdf) - self.ax.imshow(self.img, interpolation="nearest") - self.fig.savefig(filepath) - - def get_image(self): - """ - Returns: - ndarray: - the visualized image of shape (H, W, 3) (RGB) in uint8 type. - The shape is scaled w.r.t the input image using the given `scale` argument. - """ - canvas = self.canvas - s, (width, height) = canvas.print_to_buffer() - if (self.width, self.height) != (width, height): - img = cv2.resize(self.img, (width, height)) - else: - img = self.img - - # buf = io.BytesIO() # works for cairo backend - # canvas.print_rgba(buf) - # width, height = self.width, self.height - # s = buf.getvalue() - - buffer = np.frombuffer(s, dtype="uint8") - - # imshow is slow. blend manually (still quite slow) - img_rgba = buffer.reshape(height, width, 4) - rgb, alpha = np.split(img_rgba, [3], axis=2) - - try: - import numexpr as ne # fuse them with numexpr - - visualized_image = ne.evaluate("demo * (1 - alpha / 255.0) + rgb * (alpha / 255.0)") - except ImportError: - alpha = alpha.astype("float32") / 255.0 - visualized_image = img * (1 - alpha) + rgb * alpha - - visualized_image = visualized_image.astype("uint8") - - return visualized_image - - -class Visualizer: - def __init__(self, img_rgb, metadata, scale=1.0, instance_mode=ColorMode.IMAGE): - """ - Args: - img_rgb: a numpy array of shape (H, W, C), where H and W correspond to - the height and width of the image respectively. C is the number of - color channels. The image is required to be in RGB format since that - is a requirement of the Matplotlib library. The image is also expected - to be in the range [0, 255]. - metadata (MetadataCatalog): image metadata. - """ - self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8) - self.metadata = metadata - self.output = VisImage(self.img, scale=scale) - self.cpu_device = torch.device("cpu") - - # too small texts are useless, therefore clamp to 9 - self._default_font_size = max( - np.sqrt(self.output.height * self.output.width) // 90, 10 // scale - ) - self._instance_mode = instance_mode - - def draw_instance_predictions(self, predictions): - """ - Draw instance-level prediction results on an image. - - Args: - predictions (Instances): the output of an instance detection/segmentation - model. Following fields will be used to draw: - "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle"). - - Returns: - output (VisImage): image object with visualizations. - """ - boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None - scores = predictions.scores if predictions.has("scores") else None - classes = predictions.pred_classes if predictions.has("pred_classes") else None - labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) - keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None - - if predictions.has("pred_masks"): - masks = np.asarray(predictions.pred_masks) - masks = [GenericMask(x, self.output.height, self.output.width) for x in masks] - else: - masks = None - - if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): - colors = [ - self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes - ] - alpha = 0.8 - else: - colors = None - alpha = 0.5 - - if self._instance_mode == ColorMode.IMAGE_BW: - self.output.img = self._create_grayscale_image( - (predictions.pred_masks.any(dim=0) > 0).numpy() - ) - alpha = 0.3 - - self.overlay_instances( - masks=masks, - boxes=boxes, - labels=labels, - keypoints=keypoints, - assigned_colors=colors, - alpha=alpha, - ) - return self.output - - def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8): - """ - Draw semantic segmentation predictions/labels. - - Args: - sem_seg (Tensor or ndarray): the segmentation of shape (H, W). - Each value is the integer label of the pixel. - area_threshold (int): segments with less than `area_threshold` are not drawn. - alpha (float): the larger it is, the more opaque the segmentations are. - - Returns: - output (VisImage): image object with visualizations. - """ - if isinstance(sem_seg, torch.Tensor): - sem_seg = sem_seg.numpy() - labels, areas = np.unique(sem_seg, return_counts=True) - sorted_idxs = np.argsort(-areas).tolist() - labels = labels[sorted_idxs] - for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels): - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] - except (AttributeError, IndexError): - mask_color = None - - binary_mask = (sem_seg == label).astype(np.uint8) - text = self.metadata.stuff_classes[label] - self.draw_binary_mask( - binary_mask, - color=mask_color, - edge_color=_OFF_WHITE, - text=text, - alpha=alpha, - area_threshold=area_threshold, - ) - return self.output - - def draw_panoptic_seg_predictions( - self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7 - ): - """ - Draw panoptic prediction results on an image. - - Args: - panoptic_seg (Tensor): of shape (height, width) where the values are ids for each - segment. - segments_info (list[dict]): Describe each segment in `panoptic_seg`. - Each dict contains keys "id", "category_id", "isthing". - area_threshold (int): stuff segments with less than `area_threshold` are not drawn. - - Returns: - output (VisImage): image object with visualizations. - """ - pred = _PanopticPrediction(panoptic_seg, segments_info) - - if self._instance_mode == ColorMode.IMAGE_BW: - self.output.img = self._create_grayscale_image(pred.non_empty_mask()) - - # draw mask for all semantic segments first i.e. "stuff" - for mask, sinfo in pred.semantic_masks(): - category_idx = sinfo["category_id"] - try: - mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] - except AttributeError: - mask_color = None - - text = self.metadata.stuff_classes[category_idx] - self.draw_binary_mask( - mask, - color=mask_color, - edge_color=_OFF_WHITE, - text=text, - alpha=alpha, - area_threshold=area_threshold, - ) - - # draw mask for all instances second - all_instances = list(pred.instance_masks()) - if len(all_instances) == 0: - return self.output - masks, sinfo = list(zip(*all_instances)) - category_ids = [x["category_id"] for x in sinfo] - - try: - scores = [x["score"] for x in sinfo] - except KeyError: - scores = None - labels = _create_text_labels(category_ids, scores, self.metadata.thing_classes) - - try: - colors = [random_color(rgb=True, maximum=1) for k in category_ids] - except AttributeError: - colors = None - self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) - - return self.output - - def draw_dataset_dict(self, dic): - """ - Draw annotations/segmentaions in Detectron2 Dataset format. - - Args: - dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format. - - Returns: - output (VisImage): image object with visualizations. - """ - annos = dic.get("annotations", None) - if annos: - if "segmentation" in annos[0]: - masks = [x["segmentation"] for x in annos] - else: - masks = None - if "keypoints" in annos[0]: - keypts = [x["keypoints"] for x in annos] - keypts = np.array(keypts).reshape(len(annos), -1, 3) - else: - keypts = None - - boxes = [BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS) for x in annos] - - labels = [x["category_id"] for x in annos] - colors = None - if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"): - colors = [ - self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in labels - ] - names = self.metadata.get("thing_classes", None) - if names: - labels = [names[i] for i in labels] - labels = [ - "{}".format(i) + ("|crowd" if a.get("iscrowd", 0) else "") - for i, a in zip(labels, annos) - ] - self.overlay_instances( - labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors - ) - - sem_seg = dic.get("sem_seg", None) - if sem_seg is None and "sem_seg_file_name" in dic: - with PathManager.open(dic["sem_seg_file_name"], "rb") as f: - sem_seg = Image.open(f) - sem_seg = np.asarray(sem_seg, dtype="uint8") - if sem_seg is not None: - self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5) - return self.output - - def overlay_instances( - self, - *, - boxes=None, - labels=None, - masks=None, - keypoints=None, - assigned_colors=None, - alpha=0.5 - ): - """ - Args: - boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`, - or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image, - or a :class:`RotatedBoxes`, - or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format - for the N objects in a single image, - labels (list[str]): the text to be displayed for each instance. - masks (masks-like object): Supported types are: - - * :class:`detectron2.structures.PolygonMasks`, - :class:`detectron2.structures.BitMasks`. - * list[list[ndarray]]: contains the segmentation masks for all objects in one image. - The first level of the list corresponds to individual instances. The second - level to all the polygon that compose the instance, and the third level - to the polygon coordinates. The third level should have the format of - [x0, y0, x1, y1, ..., xn, yn] (n >= 3). - * list[ndarray]: each ndarray is a binary mask of shape (H, W). - * list[dict]: each dict is a COCO-style RLE. - keypoints (Keypoint or array like): an array-like object of shape (N, K, 3), - where the N is the number of instances and K is the number of keypoints. - The last dimension corresponds to (x, y, visibility or score). - assigned_colors (list[matplotlib.colors]): a list of colors, where each color - corresponds to each mask or box in the image. Refer to 'matplotlib.colors' - for full list of formats that the colors are accepted in. - - Returns: - output (VisImage): image object with visualizations. - """ - num_instances = None - if boxes is not None: - boxes = self._convert_boxes(boxes) - num_instances = len(boxes) - if masks is not None: - masks = self._convert_masks(masks) - if num_instances: - assert len(masks) == num_instances - else: - num_instances = len(masks) - if keypoints is not None: - if num_instances: - assert len(keypoints) == num_instances - else: - num_instances = len(keypoints) - keypoints = self._convert_keypoints(keypoints) - if labels is not None: - assert len(labels) == num_instances - if assigned_colors is None: - assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] - if num_instances == 0: - return self.output - if boxes is not None and boxes.shape[1] == 5: - return self.overlay_rotated_instances( - boxes=boxes, labels=labels, assigned_colors=assigned_colors - ) - - # Display in largest to smallest order to reduce occlusion. - areas = None - if boxes is not None: - areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1) - elif masks is not None: - areas = np.asarray([x.area() for x in masks]) - - if areas is not None: - sorted_idxs = np.argsort(-areas).tolist() - # Re-order overlapped instances in descending order. - boxes = boxes[sorted_idxs] if boxes is not None else None - labels = [labels[k] for k in sorted_idxs] if labels is not None else None - masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None - assigned_colors = [assigned_colors[idx] for idx in sorted_idxs] - keypoints = keypoints[sorted_idxs] if keypoints is not None else None - - for i in range(num_instances): - color = assigned_colors[i] - if boxes is not None: - self.draw_box(boxes[i], edge_color=color) - - if masks is not None: - for segment in masks[i].polygons: - self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha) - - if labels is not None: - # first get a box - if boxes is not None: - x0, y0, x1, y1 = boxes[i] - text_pos = (x0, y0) # if drawing boxes, put text on the box corner. - horiz_align = "left" - elif masks is not None: - x0, y0, x1, y1 = masks[i].bbox() - - # draw text in the center (defined by median) when box is not drawn - # median is less sensitive to outliers. - text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1] - horiz_align = "center" - else: - continue # drawing the box confidence for keypoints isn't very useful. - # for small objects, draw text at the side to avoid occlusion - instance_area = (y1 - y0) * (x1 - x0) - if ( - instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale - or y1 - y0 < 40 * self.output.scale - ): - if y1 >= self.output.height - 5: - text_pos = (x1, y0) - else: - text_pos = (x0, y1) - - height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width) - lighter_color = self._change_color_brightness(color, brightness_factor=0.7) - font_size = ( - np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) - * 0.5 - * self._default_font_size - ) - self.draw_text( - labels[i], - text_pos, - color=lighter_color, - horizontal_alignment=horiz_align, - font_size=font_size, - ) - - # draw keypoints - if keypoints is not None: - for keypoints_per_instance in keypoints: - self.draw_and_connect_keypoints(keypoints_per_instance) - - return self.output - - def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None): - """ - Args: - boxes (ndarray): an Nx5 numpy array of - (x_center, y_center, width, height, angle_degrees) format - for the N objects in a single image. - labels (list[str]): the text to be displayed for each instance. - assigned_colors (list[matplotlib.colors]): a list of colors, where each color - corresponds to each mask or box in the image. Refer to 'matplotlib.colors' - for full list of formats that the colors are accepted in. - - Returns: - output (VisImage): image object with visualizations. - """ - - num_instances = len(boxes) - - if assigned_colors is None: - assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)] - if num_instances == 0: - return self.output - - # Display in largest to smallest order to reduce occlusion. - if boxes is not None: - areas = boxes[:, 2] * boxes[:, 3] - - sorted_idxs = np.argsort(-areas).tolist() - # Re-order overlapped instances in descending order. - boxes = boxes[sorted_idxs] - labels = [labels[k] for k in sorted_idxs] if labels is not None else None - colors = [assigned_colors[idx] for idx in sorted_idxs] - - for i in range(num_instances): - self.draw_rotated_box_with_label( - boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None - ) - - return self.output - - def draw_and_connect_keypoints(self, keypoints): - """ - Draws keypoints of an instance and follows the rules for keypoint connections - to draw lines between appropriate keypoints. This follows color heuristics for - line color. - - Args: - keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints - and the last dimension corresponds to (x, y, probability). - - Returns: - output (VisImage): image object with visualizations. - """ - visible = {} - keypoint_names = self.metadata.get("keypoint_names") - for idx, keypoint in enumerate(keypoints): - # draw keypoint - x, y, prob = keypoint - if prob > _KEYPOINT_THRESHOLD: - self.draw_circle((x, y), color=_RED) - if keypoint_names: - keypoint_name = keypoint_names[idx] - visible[keypoint_name] = (x, y) - - if self.metadata.get("keypoint_connection_rules"): - for kp0, kp1, color in self.metadata.keypoint_connection_rules: - if kp0 in visible and kp1 in visible: - x0, y0 = visible[kp0] - x1, y1 = visible[kp1] - color = tuple(x / 255.0 for x in color) - self.draw_line([x0, x1], [y0, y1], color=color) - - # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip - # Note that this strategy is specific to person keypoints. - # For other keypoints, it should just do nothing - try: - ls_x, ls_y = visible["left_shoulder"] - rs_x, rs_y = visible["right_shoulder"] - mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2 - except KeyError: - pass - else: - # draw line from nose to mid-shoulder - nose_x, nose_y = visible.get("nose", (None, None)) - if nose_x is not None: - self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED) - - try: - # draw line from mid-shoulder to mid-hip - lh_x, lh_y = visible["left_hip"] - rh_x, rh_y = visible["right_hip"] - except KeyError: - pass - else: - mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2 - self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED) - return self.output - - """ - Primitive drawing functions: - """ - - def draw_text( - self, - text, - position, - *, - font_size=None, - color="g", - horizontal_alignment="center", - rotation=0 - ): - """ - Args: - text (str): class label - position (tuple): a tuple of the x and y coordinates to place text on image. - font_size (int, optional): font of the text. If not provided, a font size - proportional to the image width is calculated and used. - color: color of the text. Refer to `matplotlib.colors` for full list - of formats that are accepted. - horizontal_alignment (str): see `matplotlib.text.Text` - rotation: rotation angle in degrees CCW - - Returns: - output (VisImage): image object with text drawn. - """ - if not font_size: - font_size = self._default_font_size - - # since the text background is dark, we don't want the text to be dark - color = np.maximum(list(mplc.to_rgb(color)), 0.2) - color[np.argmax(color)] = max(0.8, np.max(color)) - - x, y = position - self.output.ax.text( - x, - y, - text, - size=font_size * self.output.scale, - family="sans-serif", - bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"}, - verticalalignment="top", - horizontalalignment=horizontal_alignment, - color=color, - zorder=10, - rotation=rotation, - ) - return self.output - - def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"): - """ - Args: - box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0 - are the coordinates of the image's top left corner. x1 and y1 are the - coordinates of the image's bottom right corner. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - edge_color: color of the outline of the box. Refer to `matplotlib.colors` - for full list of formats that are accepted. - line_style (string): the string to use to create the outline of the boxes. - - Returns: - output (VisImage): image object with box drawn. - """ - x0, y0, x1, y1 = box_coord - width = x1 - x0 - height = y1 - y0 - - linewidth = max(self._default_font_size / 4, 1) - - self.output.ax.add_patch( - mpl.patches.Rectangle( - (x0, y0), - width, - height, - fill=False, - edgecolor=edge_color, - linewidth=linewidth * self.output.scale, - alpha=alpha, - linestyle=line_style, - ) - ) - return self.output - - def draw_rotated_box_with_label( - self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None - ): - """ - Args: - rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle), - where cnt_x and cnt_y are the center coordinates of the box. - w and h are the width and height of the box. angle represents how - many degrees the box is rotated CCW with regard to the 0-degree box. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - edge_color: color of the outline of the box. Refer to `matplotlib.colors` - for full list of formats that are accepted. - line_style (string): the string to use to create the outline of the boxes. - label (string): label for rotated box. It will not be rendered when set to None. - - Returns: - output (VisImage): image object with box drawn. - """ - cnt_x, cnt_y, w, h, angle = rotated_box - area = w * h - # use thinner lines when the box is small - linewidth = self._default_font_size / ( - 6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3 - ) - - theta = angle * math.pi / 180.0 - c = math.cos(theta) - s = math.sin(theta) - rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)] - # x: left->right ; y: top->down - rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect] - for k in range(4): - j = (k + 1) % 4 - self.draw_line( - [rotated_rect[k][0], rotated_rect[j][0]], - [rotated_rect[k][1], rotated_rect[j][1]], - color=edge_color, - linestyle="--" if k == 1 else line_style, - linewidth=linewidth, - ) - - if label is not None: - text_pos = rotated_rect[1] # topleft corner - - height_ratio = h / np.sqrt(self.output.height * self.output.width) - label_color = self._change_color_brightness(edge_color, brightness_factor=0.7) - font_size = ( - np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size - ) - self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle) - - return self.output - - def draw_circle(self, circle_coord, color, radius=3): - """ - Args: - circle_coord (list(int) or tuple(int)): contains the x and y coordinates - of the center of the circle. - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - radius (int): radius of the circle. - - Returns: - output (VisImage): image object with box drawn. - """ - x, y = circle_coord - self.output.ax.add_patch( - mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color) - ) - return self.output - - def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None): - """ - Args: - x_data (list[int]): a list containing x values of all the points being drawn. - Length of list should match the length of y_data. - y_data (list[int]): a list containing y values of all the points being drawn. - Length of list should match the length of x_data. - color: color of the line. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - linestyle: style of the line. Refer to `matplotlib.lines.Line2D` - for a full list of formats that are accepted. - linewidth (float or None): width of the line. When it's None, - a default value will be computed and used. - - Returns: - output (VisImage): image object with line drawn. - """ - if linewidth is None: - linewidth = self._default_font_size / 3 - linewidth = max(linewidth, 1) - self.output.ax.add_line( - mpl.lines.Line2D( - x_data, - y_data, - linewidth=linewidth * self.output.scale, - color=color, - linestyle=linestyle, - ) - ) - return self.output - - def draw_binary_mask( - self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=4096 - ): - """ - Args: - binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and - W is the image width. Each value in the array is either a 0 or 1 value of uint8 - type. - color: color of the mask. Refer to `matplotlib.colors` for a full list of - formats that are accepted. If None, will pick a random color. - edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a - full list of formats that are accepted. - text (str): if None, will be drawn in the object's center of mass. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - area_threshold (float): a connected component small than this will not be shown. - - Returns: - output (VisImage): image object with mask drawn. - """ - if color is None: - color = random_color(rgb=True, maximum=1) - if area_threshold is None: - area_threshold = 4096 - - has_valid_segment = False - binary_mask = binary_mask.astype("uint8") # opencv needs uint8 - mask = GenericMask(binary_mask, self.output.height, self.output.width) - shape2d = (binary_mask.shape[0], binary_mask.shape[1]) - - if not mask.has_holes: - # draw polygons for regular masks - for segment in mask.polygons: - area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1])) - if area < area_threshold: - continue - has_valid_segment = True - segment = segment.reshape(-1, 2) - self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha) - else: - rgba = np.zeros(shape2d + (4,), dtype="float32") - rgba[:, :, :3] = color - rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha - has_valid_segment = True - self.output.ax.imshow(rgba) - - if text is not None and has_valid_segment: - # TODO sometimes drawn on wrong objects. the heuristics here can improve. - lighter_color = self._change_color_brightness(color, brightness_factor=0.7) - _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8) - largest_component_id = np.argmax(stats[1:, -1]) + 1 - - # draw text on the largest component, as well as other very large components. - for cid in range(1, _num_cc): - if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH: - # median is more stable than centroid - # center = centroids[largest_component_id] - center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1] - self.draw_text(text, center, color=lighter_color) - return self.output - - def draw_polygon(self, segment, color, edge_color=None, alpha=0.5): - """ - Args: - segment: numpy array of shape Nx2, containing all the points in the polygon. - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a - full list of formats that are accepted. If not provided, a darker shade - of the polygon color will be used instead. - alpha (float): blending efficient. Smaller values lead to more transparent masks. - - Returns: - output (VisImage): image object with polygon drawn. - """ - if edge_color is None: - # make edge color darker than the polygon color - if alpha > 0.8: - edge_color = self._change_color_brightness(color, brightness_factor=-0.7) - else: - edge_color = color - edge_color = mplc.to_rgb(edge_color) + (1,) - - polygon = mpl.patches.Polygon( - segment, - fill=True, - facecolor=mplc.to_rgb(color) + (alpha,), - edgecolor=edge_color, - linewidth=max(self._default_font_size // 15 * self.output.scale, 1), - ) - self.output.ax.add_patch(polygon) - return self.output - - """ - Internal methods: - """ - - def _jitter(self, color): - """ - Randomly modifies given color to produce a slightly different color than the color given. - - Args: - color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color - picked. The values in the list are in the [0.0, 1.0] range. - - Returns: - jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the - color after being jittered. The values in the list are in the [0.0, 1.0] range. - """ - color = mplc.to_rgb(color) - vec = np.random.rand(3) - # better to do it in another color space - vec = vec / np.linalg.norm(vec) * 0.5 - res = np.clip(vec + color, 0, 1) - return tuple(res) - - def _create_grayscale_image(self, mask=None): - """ - Create a grayscale version of the original image. - The colors in masked area, if given, will be kept. - """ - img_bw = self.img.astype("f4").mean(axis=2) - img_bw = np.stack([img_bw] * 3, axis=2) - if mask is not None: - img_bw[mask] = self.img[mask] - return img_bw - - def _change_color_brightness(self, color, brightness_factor): - """ - Depending on the brightness_factor, gives a lighter or darker color i.e. a color with - less or more saturation than the original color. - - Args: - color: color of the polygon. Refer to `matplotlib.colors` for a full list of - formats that are accepted. - brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of - 0 will correspond to no change, a factor in [-1.0, 0) range will result in - a darker color and a factor in (0, 1.0] range will result in a lighter color. - - Returns: - modified_color (tuple[double]): a tuple containing the RGB values of the - modified color. Each value in the tuple is in the [0.0, 1.0] range. - """ - assert brightness_factor >= -1.0 and brightness_factor <= 1.0 - color = mplc.to_rgb(color) - polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color)) - modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1]) - modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness - modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness - modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2]) - return modified_color - - def _convert_boxes(self, boxes): - """ - Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension. - """ - if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes): - return boxes.tensor.numpy() - else: - return np.asarray(boxes) - - def _convert_masks(self, masks_or_polygons): - """ - Convert different format of masks or polygons to a tuple of masks and polygons. - - Returns: - list[GenericMask]: - """ - - m = masks_or_polygons - if isinstance(m, PolygonMasks): - m = m.polygons - if isinstance(m, BitMasks): - m = m.tensor.numpy() - if isinstance(m, torch.Tensor): - m = m.numpy() - ret = [] - for x in m: - if isinstance(x, GenericMask): - ret.append(x) - else: - ret.append(GenericMask(x, self.output.height, self.output.width)) - return ret - - def _convert_keypoints(self, keypoints): - if isinstance(keypoints, Keypoints): - keypoints = keypoints.tensor - keypoints = np.asarray(keypoints) - return keypoints - - def get_output(self): - """ - Returns: - output (VisImage): the image output containing the visualizations added - to the image. - """ - return self.output diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md b/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md deleted file mode 100644 index cc0d329..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -## Some scripts for developers to use, include: - -- `linter.sh`: lint the codebase before commit -- `run_{inference,instant}_tests.sh`: run inference/training for a few iterations. - Note that these tests require 2 GPUs. -- `parse_results.sh`: parse results from a log file. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh deleted file mode 100644 index fd7081d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/linter.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# Run this script at project root by "./dev/linter.sh" before you commit - -vergte() { - [ "$2" = "$(echo -e "$1\\n$2" | sort -V | head -n1)" ] -} - -{ - black --version | grep -E "(19.3b0.*6733274)|(19.3b0\\+8)" > /dev/null -} || { - echo "Linter requires 'black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2' !" - exit 1 -} - -ISORT_TARGET_VERSION="4.3.21" -ISORT_VERSION=$(isort -v | grep VERSION | awk '{print $2}') -vergte "$ISORT_VERSION" "$ISORT_TARGET_VERSION" || { - echo "Linter requires isort>=${ISORT_TARGET_VERSION} !" - exit 1 -} - -set -v - -echo "Running isort ..." -isort -y -sp . --atomic - -echo "Running black ..." -black -l 100 . - -echo "Running flake8 ..." -if [ -x "$(command -v flake8-3)" ]; then - flake8-3 . -else - python3 -m flake8 . -fi - -# echo "Running mypy ..." -# Pytorch does not have enough type annotations -# mypy detectron2/solver detectron2/structures detectron2/config - -echo "Running clang-format ..." -find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i - -command -v arc > /dev/null && arc lint diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md deleted file mode 100644 index 095684f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/README.md +++ /dev/null @@ -1,17 +0,0 @@ - -## To build a cu101 wheel for release: - -``` -$ nvidia-docker run -it --storage-opt "size=20GB" --name pt pytorch/manylinux-cuda101 -# inside the container: -# git clone https://github.com/facebookresearch/detectron2/ -# cd detectron2 -# export CU_VERSION=cu101 D2_VERSION_SUFFIX= PYTHON_VERSION=3.7 PYTORCH_VERSION=1.4 -# ./dev/packaging/build_wheel.sh -``` - -## To build all wheels for `CUDA {9.2,10.0,10.1}` x `Python {3.6,3.7,3.8}`: -``` -./dev/packaging/build_all_wheels.sh -./dev/packaging/gen_wheel_index.sh /path/to/wheels -``` diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh deleted file mode 100644 index eb64dea..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/build_all_wheels.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -PYTORCH_VERSION=1.5 - -build_for_one_cuda() { - cu=$1 - - case "$cu" in - cu*) - container_name=manylinux-cuda${cu/cu/} - ;; - cpu) - container_name=manylinux-cuda101 - ;; - *) - echo "Unrecognized cu=$cu" - exit 1 - ;; - esac - - echo "Launching container $container_name ..." - - for py in 3.6 3.7 3.8; do - docker run -itd \ - --name $container_name \ - --mount type=bind,source="$(pwd)",target=/detectron2 \ - pytorch/$container_name - - cat </dev/null 2>&1 && pwd )" -. "$script_dir/pkg_helpers.bash" - -echo "Build Settings:" -echo "CU_VERSION: $CU_VERSION" # e.g. cu101 -echo "D2_VERSION_SUFFIX: $D2_VERSION_SUFFIX" # e.g. +cu101 or "" -echo "PYTHON_VERSION: $PYTHON_VERSION" # e.g. 3.6 -echo "PYTORCH_VERSION: $PYTORCH_VERSION" # e.g. 1.4 - -setup_cuda -setup_wheel_python -yum install ninja-build -y && ln -sv /usr/bin/ninja-build /usr/bin/ninja - -export TORCH_VERSION_SUFFIX="+$CU_VERSION" -if [[ "$CU_VERSION" == "cu102" ]]; then - export TORCH_VERSION_SUFFIX="" -fi -pip_install pip numpy -U -pip_install "torch==$PYTORCH_VERSION$TORCH_VERSION_SUFFIX" \ - -f https://download.pytorch.org/whl/$CU_VERSION/torch_stable.html - -# use separate directories to allow parallel build -BASE_BUILD_DIR=build/$CU_VERSION/$PYTHON_VERSION -python setup.py \ - build -b $BASE_BUILD_DIR \ - bdist_wheel -b $BASE_BUILD_DIR/build_dist -d wheels/$CU_VERSION diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh deleted file mode 100644 index 44d6041..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/gen_wheel_index.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -root=$1 -if [[ -z "$root" ]]; then - echo "Usage: ./gen_wheel_index.sh /path/to/wheels" - exit -fi - -index=$root/index.html - -cd "$root" -for cu in cpu cu92 cu100 cu101 cu102; do - cd $cu - echo "Creating $PWD/index.html ..." - for whl in *.whl; do - echo "$whl
" - done > index.html - cd "$root" -done - -echo "Creating $index ..." -for whl in $(find . -type f -name '*.whl' -printf '%P\n' | sort); do - echo "$whl
" -done > "$index" - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash b/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash deleted file mode 100644 index 51e6185..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/packaging/pkg_helpers.bash +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} -# Install with pip a bit more robustly than the default -pip_install() { - retry pip install --progress-bar off "$@" -} - - -setup_cuda() { - # Now work out the CUDA settings - # Like other torch domain libraries, we choose common GPU architectures only. - export FORCE_CUDA=1 - case "$CU_VERSION" in - cu102) - export CUDA_HOME=/usr/local/cuda-10.2/ - export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" - ;; - cu101) - export CUDA_HOME=/usr/local/cuda-10.1/ - export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" - ;; - cu100) - export CUDA_HOME=/usr/local/cuda-10.0/ - export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX;7.5+PTX" - ;; - cu92) - export CUDA_HOME=/usr/local/cuda-9.2/ - export TORCH_CUDA_ARCH_LIST="3.5;3.7;5.0;5.2;6.0+PTX;6.1+PTX;7.0+PTX" - ;; - cpu) - unset FORCE_CUDA - export CUDA_VISIBLE_DEVICES= - ;; - *) - echo "Unrecognized CU_VERSION=$CU_VERSION" - exit 1 - ;; - esac -} - -setup_wheel_python() { - case "$PYTHON_VERSION" in - 3.6) python_abi=cp36-cp36m ;; - 3.7) python_abi=cp37-cp37m ;; - 3.8) python_abi=cp38-cp38 ;; - *) - echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" - exit 1 - ;; - esac - export PATH="/opt/python/$python_abi/bin:$PATH" -} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh deleted file mode 100644 index 874b688..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/parse_results.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# A shell script that parses metrics from the log file. -# Make it easier for developers to track performance of models. - -LOG="$1" - -if [[ -z "$LOG" ]]; then - echo "Usage: $0 /path/to/log/file" - exit 1 -fi - -# [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it) -# [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / demo per device, on 8 devices) -# [12/15 11:49:03] inference INFO: Total inference pure compute time: ..... - -# training time -trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*') -echo "Training speed: $trainspeed s/it" - -# inference time: there could be multiple inference during training -inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1) -echo "Inference speed: $inferencespeed s/it" - -# [12/15 11:47:18] trainer INFO: eta: 0:00:00 iter: 90000 loss: 0.5407 (0.7256) loss_classifier: 0.1744 (0.2446) loss_box_reg: 0.0838 (0.1160) loss_mask: 0.2159 (0.2722) loss_objectness: 0.0244 (0.0429) loss_rpn_box_reg: 0.0279 (0.0500) time: 0.4487 (0.4899) data: 0.0076 (0.0975) lr: 0.000200 max mem: 4161 -memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*') -echo "Training memory: $memory MB" - -echo "Easy to copypaste:" -echo "$trainspeed","$inferencespeed","$memory" - -echo "------------------------------" - -# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox -# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl -# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011 -# [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm -# [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl -# [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011 - -echo "COCO Results:" -num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l) -# each task has 3 lines -grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh deleted file mode 100644 index 17e422d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_inference_tests.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -BIN="python tools/train_net.py" -OUTPUT="inference_test_output" -NUM_GPUS=2 - -CFG_LIST=( "${@:1}" ) - -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN \ - --eval-only \ - --num-gpus $NUM_GPUS \ - --config-file "$cfg" \ - OUTPUT_DIR $OUTPUT - rm -rf $OUTPUT -done - - -echo "========================================================================" -echo "Running demo.py ..." -echo "========================================================================" -DEMO_BIN="python demo/demo.py" -COCO_DIR=datasets/coco/val2014 -mkdir -pv $OUTPUT - -set -v - -$DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \ - --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT -rm -rf $OUTPUT diff --git a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh deleted file mode 100644 index 2c51de6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/dev/run_instant_tests.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -BIN="python tools/train_net.py" -OUTPUT="instant_test_output" -NUM_GPUS=2 - -CFG_LIST=( "${@:1}" ) -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ - SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \ - OUTPUT_DIR "$OUTPUT" - rm -rf "$OUTPUT" -done - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile b/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile deleted file mode 100644 index 2a86039..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile +++ /dev/null @@ -1,49 +0,0 @@ -FROM nvidia/cuda:10.1-cudnn7-devel - -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y \ - python3-opencv ca-certificates python3-dev git wget sudo \ - cmake ninja-build protobuf-compiler libprotobuf-dev && \ - rm -rf /var/lib/apt/lists/* -RUN ln -sv /usr/bin/python3 /usr/bin/python - -# create a non-root user -ARG USER_ID=1000 -RUN useradd -m --no-log-init --system --uid ${USER_ID} appuser -g sudo -RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers -USER appuser -WORKDIR /home/appuser - -ENV PATH="/home/appuser/.local/bin:${PATH}" -RUN wget https://bootstrap.pypa.io/get-pip.py && \ - python3 get-pip.py --user && \ - rm get-pip.py - -# install dependencies -# See https://pytorch.org/ for other options if you use a different version of CUDA -RUN pip install --user tensorboard cython -RUN pip install --user torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html -RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' - -RUN pip install --user 'git+https://github.com/facebookresearch/fvcore' -# install detectron2 -RUN git clone https://github.com/facebookresearch/detectron2 detectron2_repo -# set FORCE_CUDA because during `docker build` cuda is not accessible -ENV FORCE_CUDA="1" -# This will by default build detectron2 for all common cuda architectures and take a lot more time, -# because inside `docker build`, there is no way to tell which architecture will be used. -ARG TORCH_CUDA_ARCH_LIST="Kepler;Kepler+Tesla;Maxwell;Maxwell+Tegra;Pascal;Volta;Turing" -ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" - -RUN pip install --user -e detectron2_repo - -# Set a fixed model cache directory. -ENV FVCORE_CACHE="/tmp" -WORKDIR /home/appuser/detectron2_repo - -# run detectron2 under user "appuser": -# wget http://images.cocodataset.org/val2017/000000439715.jpg -O input.jpg -# python3 demo/demo.py \ - #--config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - #--input input.jpg --output outputs/ \ - #--opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci b/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci deleted file mode 100644 index bc0be84..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docker/Dockerfile-circleci +++ /dev/null @@ -1,17 +0,0 @@ -FROM nvidia/cuda:10.1-cudnn7-devel -# This dockerfile only aims to provide an environment for unittest on CircleCI - -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && apt-get install -y \ - python3-opencv ca-certificates python3-dev git wget sudo ninja-build && \ - rm -rf /var/lib/apt/lists/* - -RUN wget -q https://bootstrap.pypa.io/get-pip.py && \ - python3 get-pip.py && \ - rm get-pip.py - -# install dependencies -# See https://pytorch.org/ for other options if you use a different version of CUDA -RUN pip install tensorboard cython -RUN pip install torch==1.5+cu101 torchvision==0.6+cu101 -f https://download.pytorch.org/whl/torch_stable.html -RUN pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md deleted file mode 100644 index 760c405..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docker/README.md +++ /dev/null @@ -1,36 +0,0 @@ - -## Use the container (with docker ≥ 19.03) - -``` -cd docker/ -# Build: -docker build --build-arg USER_ID=$UID -t detectron2:v0 . -# Run: -docker run --gpus all -it \ - --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \ - --name=detectron2 detectron2:v0 - -# Grant docker access to host X server to show images -xhost +local:`docker inspect --format='{{ .Config.Hostname }}' detectron2` -``` - -## Use the container (with docker < 19.03) - -Install docker-compose and nvidia-docker2, then run: -``` -cd docker && USER_ID=$UID docker-compose run detectron2 -``` - -#### Using a persistent cache directory - -You can prevent models from being re-downloaded on every run, -by storing them in a cache directory. - -To do this, add `--volume=$HOME/.torch/fvcore_cache:/tmp:rw` in the run command. - -## Install new dependencies -Add the following to `Dockerfile` to make persistent changes. -``` -RUN sudo apt-get update && sudo apt-get install -y vim -``` -Or run them in the container to make temporary changes. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml b/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml deleted file mode 100644 index e660f44..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docker/docker-compose.yml +++ /dev/null @@ -1,18 +0,0 @@ -version: "2.3" -services: - detectron2: - build: - context: . - dockerfile: Dockerfile - args: - USER_ID: ${USER_ID:-1000} - runtime: nvidia # TODO: Exchange with "gpu: all" in the future (see https://github.com/facebookresearch/detectron2/pull/197/commits/00545e1f376918db4a8ce264d427a07c1e896c5a). - shm_size: "8gb" - ulimits: - memlock: -1 - stack: 67108864 - volumes: - - /tmp/.X11-unix:/tmp/.X11-unix:ro - environment: - - DISPLAY=$DISPLAY - - NVIDIA_VISIBLE_DEVICES=all diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore b/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore deleted file mode 100644 index e35d885..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -_build diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile b/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile deleted file mode 100644 index d537643..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -# Minimal makefile for Sphinx documentation -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md deleted file mode 100644 index 2c65c36..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Read the docs: - -The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). -Documents in this directory are not meant to be read on github. - -# Build the docs: - -1. Install detectron2 according to [INSTALL.md](INSTALL.md). -2. Install additional libraries required to build docs: - - docutils==0.16 - - Sphinx==3.0.0 - - recommonmark==0.6.0 - - sphinx_rtd_theme - - mock - -3. Run `make html` from this directory. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py b/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py deleted file mode 100644 index 44e9f2b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/conf.py +++ /dev/null @@ -1,335 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -# flake8: noqa - -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -import mock -from sphinx.domains import Domain -from typing import Dict, List, Tuple - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -import sphinx_rtd_theme - - -class GithubURLDomain(Domain): - """ - Resolve certain links in markdown files to github source. - """ - - name = "githuburl" - ROOT = "https://github.com/facebookresearch/detectron2/blob/master/" - LINKED_DOC = ["tutorials/install", "tutorials/getting_started"] - - def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode): - github_url = None - if not target.endswith("html") and target.startswith("../../"): - url = target.replace("../", "") - github_url = url - if fromdocname in self.LINKED_DOC: - # unresolved links in these docs are all github links - github_url = target - - if github_url is not None: - if github_url.endswith("MODEL_ZOO") or github_url.endswith("README"): - # bug of recommonmark. - # https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155 - github_url += ".md" - print("Ref {} resolved to github:{}".format(target, github_url)) - contnode["refuri"] = self.ROOT + github_url - return [("githuburl:any", contnode)] - else: - return [] - - -# to support markdown -from recommonmark.parser import CommonMarkParser - -sys.path.insert(0, os.path.abspath("../")) -os.environ["DOC_BUILDING"] = "True" -DEPLOY = os.environ.get("READTHEDOCS") == "True" - - -# -- Project information ----------------------------------------------------- - -# fmt: off -try: - import torch # noqa -except ImportError: - for m in [ - "torch", "torchvision", "torch.nn", "torch.nn.parallel", "torch.distributed", "torch.multiprocessing", "torch.autograd", - "torch.autograd.function", "torch.nn.modules", "torch.nn.modules.utils", "torch.utils", "torch.utils.data", "torch.onnx", - "torchvision", "torchvision.ops", - ]: - sys.modules[m] = mock.Mock(name=m) - sys.modules['torch'].__version__ = "1.5" # fake version - -for m in [ - "cv2", "scipy", "portalocker", "detectron2._C", - "pycocotools", "pycocotools.mask", "pycocotools.coco", "pycocotools.cocoeval", - "google", "google.protobuf", "google.protobuf.internal", "onnx", - "caffe2", "caffe2.proto", "caffe2.python", "caffe2.python.utils", "caffe2.python.onnx", "caffe2.python.onnx.backend", -]: - sys.modules[m] = mock.Mock(name=m) -# fmt: on -sys.modules["cv2"].__version__ = "3.4" - -import detectron2 # isort: skip - - -project = "detectron2" -copyright = "2019-2020, detectron2 contributors" -author = "detectron2 contributors" - -# The short X.Y version -version = detectron2.__version__ -# The full version, including alpha/beta/rc tags -release = version - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -needs_sphinx = "3.0" - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "recommonmark", - "sphinx.ext.autodoc", - "sphinx.ext.napoleon", - "sphinx.ext.intersphinx", - "sphinx.ext.todo", - "sphinx.ext.coverage", - "sphinx.ext.mathjax", - "sphinx.ext.viewcode", - "sphinx.ext.githubpages", -] - -# -- Configurations for plugins ------------ -napoleon_google_docstring = True -napoleon_include_init_with_doc = True -napoleon_include_special_with_doc = True -napoleon_numpy_docstring = False -napoleon_use_rtype = False -autodoc_inherit_docstrings = False -autodoc_member_order = "bysource" - -if DEPLOY: - intersphinx_timeout = 10 -else: - # skip this when building locally - intersphinx_timeout = 0.1 -intersphinx_mapping = { - "python": ("https://docs.python.org/3.6", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None), - "torch": ("https://pytorch.org/docs/master/", None), -} -# ------------------------- - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -source_suffix = [".rst", ".md"] - -# The master toctree document. -master_doc = "index" - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md", "tutorials/README.md"] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" - - -# -- Options for HTML output ------------------------------------------------- - -html_theme = "sphinx_rtd_theme" -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = "detectron2doc" - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, "detectron2.tex", "detectron2 Documentation", "detectron2 contributors", "manual") -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "detectron2", "detectron2 Documentation", [author], 1)] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ( - master_doc, - "detectron2", - "detectron2 Documentation", - author, - "detectron2", - "One line description of project.", - "Miscellaneous", - ) -] - - -# -- Options for todo extension ---------------------------------------------- - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True - - -_DEPRECATED_NAMES = set() - - -def autodoc_skip_member(app, what, name, obj, skip, options): - # we hide something deliberately - if getattr(obj, "__HIDE_SPHINX_DOC__", False): - return True - # Hide some names that are deprecated or not intended to be used - if name in _DEPRECATED_NAMES: - return True - return None - - -_PAPER_DATA = { - "resnet": ("1512.03385", "Deep Residual Learning for Image Recognition"), - "fpn": ("1612.03144", "Feature Pyramid Networks for Object Detection"), - "mask r-cnn": ("1703.06870", "Mask R-CNN"), - "faster r-cnn": ( - "1506.01497", - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", - ), - "deformconv": ("1703.06211", "Deformable Convolutional Networks"), - "deformconv2": ("1811.11168", "Deformable ConvNets v2: More Deformable, Better Results"), - "panopticfpn": ("1901.02446", "Panoptic Feature Pyramid Networks"), - "retinanet": ("1708.02002", "Focal Loss for Dense Object Detection"), - "cascade r-cnn": ("1712.00726", "Cascade R-CNN: Delving into High Quality Object Detection"), - "lvis": ("1908.03195", "LVIS: A Dataset for Large Vocabulary Instance Segmentation"), - "rrpn": ("1703.01086", "Arbitrary-Oriented Scene Text Detection via Rotation Proposals"), - "in1k1h": ("1706.02677", "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour"), -} - - -def paper_ref_role( - typ: str, - rawtext: str, - text: str, - lineno: int, - inliner, - options: Dict = {}, - content: List[str] = [], -): - """ - Parse :paper:`xxx`. Similar to the "extlinks" sphinx extension. - """ - from docutils import nodes, utils - from sphinx.util.nodes import split_explicit_title - - text = utils.unescape(text) - has_explicit_title, title, link = split_explicit_title(text) - link = link.lower() - if link not in _PAPER_DATA: - inliner.reporter.warning("Cannot find paper " + link) - paper_url, paper_title = "#", link - else: - paper_url, paper_title = _PAPER_DATA[link] - if "/" not in paper_url: - paper_url = "https://arxiv.org/abs/" + paper_url - if not has_explicit_title: - title = paper_title - pnode = nodes.reference(title, title, internal=False, refuri=paper_url) - return [pnode], [] - - -def setup(app): - from recommonmark.transform import AutoStructify - - app.add_domain(GithubURLDomain) - app.connect("autodoc-skip-member", autodoc_skip_member) - app.add_role("paper", paper_ref_role) - app.add_config_value( - "recommonmark_config", - {"enable_math": True, "enable_inline_math": True, "enable_eval_rst": True}, - True, - ) - app.add_transform(AutoStructify) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst deleted file mode 100644 index 8634b7b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/index.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. detectron2 documentation master file, created by - sphinx-quickstart on Sat Sep 21 13:46:45 2019. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to detectron2's documentation! -====================================== - -.. toctree:: - :maxdepth: 2 - - tutorials/index - notes/index - modules/index diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst deleted file mode 100644 index 616cb18..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/checkpoint.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.checkpoint package -============================= - -.. automodule:: detectron2.checkpoint - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst deleted file mode 100644 index 034bd5f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/config.rst +++ /dev/null @@ -1,17 +0,0 @@ -detectron2.config package -========================= - -.. automodule:: detectron2.config - :members: - :undoc-members: - :show-inheritance: - :inherited-members: - - -Config References ------------------ - -.. literalinclude:: ../../detectron2/config/defaults.py - :language: python - :linenos: - :lines: 4- diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst deleted file mode 100644 index 3697f0e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/data.rst +++ /dev/null @@ -1,40 +0,0 @@ -detectron2.data package -======================= - -.. automodule:: detectron2.data - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.detection\_utils module ---------------------------------------- - -.. automodule:: detectron2.data.detection_utils - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.datasets module ---------------------------------------- - -.. automodule:: detectron2.data.datasets - :members: - :undoc-members: - :show-inheritance: - -detectron2.data.samplers module ---------------------------------------- - -.. automodule:: detectron2.data.samplers - :members: - :undoc-members: - :show-inheritance: - - -detectron2.data.transforms module ---------------------------------------- - -.. automodule:: detectron2.data.transforms - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst deleted file mode 100644 index bb8b533..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/engine.rst +++ /dev/null @@ -1,25 +0,0 @@ -detectron2.engine package -========================= - - -.. automodule:: detectron2.engine - :members: - :undoc-members: - :show-inheritance: - - -detectron2.engine.defaults module ---------------------------------- - -.. automodule:: detectron2.engine.defaults - :members: - :undoc-members: - :show-inheritance: - -detectron2.engine.hooks module ---------------------------------- - -.. automodule:: detectron2.engine.hooks - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst deleted file mode 100644 index d9d34ff..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/evaluation.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.evaluation package -============================= - -.. automodule:: detectron2.evaluation - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst deleted file mode 100644 index bb7c3c9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/export.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.export package -========================= - -.. automodule:: detectron2.export - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst deleted file mode 100644 index 1b246f5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/index.rst +++ /dev/null @@ -1,17 +0,0 @@ -API Documentation -================== - -.. toctree:: - - checkpoint - config - data - engine - evaluation - layers - model_zoo - modeling - solver - structures - utils - export diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst deleted file mode 100644 index 6aeb521..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/layers.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.layers package -========================= - -.. automodule:: detectron2.layers - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst deleted file mode 100644 index 8b1c7d5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/model_zoo.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.model_zoo package -============================ - -.. automodule:: detectron2.model_zoo - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst deleted file mode 100644 index 58ccd2c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/modeling.rst +++ /dev/null @@ -1,58 +0,0 @@ -detectron2.modeling package -=========================== - -.. automodule:: detectron2.modeling - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.poolers module ---------------------------------------- - -.. automodule:: detectron2.modeling.poolers - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.sampling module ------------------------------------- - -.. automodule:: detectron2.modeling.sampling - :members: - :undoc-members: - :show-inheritance: - - -detectron2.modeling.box_regression module ------------------------------------------- - -.. automodule:: detectron2.modeling.box_regression - :members: - :undoc-members: - :show-inheritance: - - -Model Registries ------------------ - -These are different registries provided in modeling. -Each registry provide you the ability to replace it with your customized component, -without having to modify detectron2's code. - -Note that it is impossible to allow users to customize any line of code directly. -Even just to add one line at some place, -you'll likely need to find out the smallest registry which contains that line, -and register your component to that registry. - - -.. autodata:: detectron2.modeling.META_ARCH_REGISTRY -.. autodata:: detectron2.modeling.BACKBONE_REGISTRY -.. autodata:: detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY -.. autodata:: detectron2.modeling.RPN_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ANCHOR_GENERATOR_REGISTRY -.. autodata:: detectron2.modeling.ROI_HEADS_REGISTRY -.. autodata:: detectron2.modeling.ROI_BOX_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ROI_MASK_HEAD_REGISTRY -.. autodata:: detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst deleted file mode 100644 index 7f4a49f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/solver.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.solver package -========================= - -.. automodule:: detectron2.solver - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst deleted file mode 100644 index 5701c61..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/structures.rst +++ /dev/null @@ -1,7 +0,0 @@ -detectron2.structures package -============================= - -.. automodule:: detectron2.structures - :members: - :undoc-members: - :show-inheritance: diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst deleted file mode 100644 index 8b57292..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/modules/utils.rst +++ /dev/null @@ -1,80 +0,0 @@ -detectron2.utils package -======================== - -detectron2.utils.colormap module --------------------------------- - -.. automodule:: detectron2.utils.colormap - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.comm module ----------------------------- - -.. automodule:: detectron2.utils.comm - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.events module ------------------------------- - -.. automodule:: detectron2.utils.events - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.logger module ------------------------------- - -.. automodule:: detectron2.utils.logger - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.registry module --------------------------------- - -.. automodule:: detectron2.utils.registry - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.memory module ----------------------------------- - -.. automodule:: detectron2.utils.memory - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.analysis module ----------------------------------- - -.. automodule:: detectron2.utils.analysis - :members: - :undoc-members: - :show-inheritance: - - -detectron2.utils.visualizer module ----------------------------------- - -.. automodule:: detectron2.utils.visualizer - :members: - :undoc-members: - :show-inheritance: - -detectron2.utils.video\_visualizer module ------------------------------------------ - -.. automodule:: detectron2.utils.video_visualizer - :members: - :undoc-members: - :show-inheritance: - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md deleted file mode 100644 index 963f921..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/benchmarks.md +++ /dev/null @@ -1,196 +0,0 @@ - -# Benchmarks - -Here we benchmark the training speed of a Mask R-CNN in detectron2, -with some other popular open source Mask R-CNN implementations. - - -### Settings - -* Hardware: 8 NVIDIA V100s with NVLink. -* Software: Python 3.7, CUDA 10.1, cuDNN 7.6.5, PyTorch 1.5, - TensorFlow 1.15.0rc2, Keras 2.2.5, MxNet 1.6.0b20190820. -* Model: an end-to-end R-50-FPN Mask-RCNN model, using the same hyperparameter as the - [Detectron baseline config](https://github.com/facebookresearch/Detectron/blob/master/configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml) - (it does no have scale augmentation). -* Metrics: We use the average throughput in iterations 100-500 to skip GPU warmup time. - Note that for R-CNN-style models, the throughput of a model typically changes during training, because - it depends on the predictions of the model. Therefore this metric is not directly comparable with - "train speed" in model zoo, which is the average speed of the entire training run. - - -### Main Results - -```eval_rst -+-------------------------------+--------------------+ -| Implementation | Throughput (img/s) | -+===============================+====================+ -| |D2| |PT| | 62 | -+-------------------------------+--------------------+ -| mmdetection_ |PT| | 53 | -+-------------------------------+--------------------+ -| maskrcnn-benchmark_ |PT| | 53 | -+-------------------------------+--------------------+ -| tensorpack_ |TF| | 50 | -+-------------------------------+--------------------+ -| simpledet_ |mxnet| | 39 | -+-------------------------------+--------------------+ -| Detectron_ |C2| | 19 | -+-------------------------------+--------------------+ -| `matterport/Mask_RCNN`__ |TF| | 14 | -+-------------------------------+--------------------+ - -.. _maskrcnn-benchmark: https://github.com/facebookresearch/maskrcnn-benchmark/ -.. _tensorpack: https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN -.. _mmdetection: https://github.com/open-mmlab/mmdetection/ -.. _simpledet: https://github.com/TuSimple/simpledet/ -.. _Detectron: https://github.com/facebookresearch/Detectron -__ https://github.com/matterport/Mask_RCNN/ - -.. |D2| image:: https://github.com/facebookresearch/detectron2/raw/master/.github/Detectron2-Logo-Horz.svg?sanitize=true - :height: 15pt - :target: https://github.com/facebookresearch/detectron2/ -.. |PT| image:: https://pytorch.org/assets/images/logo-icon.svg - :width: 15pt - :height: 15pt - :target: https://pytorch.org -.. |TF| image:: https://static.nvidiagrid.net/ngc/containers/tensorflow.png - :width: 15pt - :height: 15pt - :target: https://tensorflow.org -.. |mxnet| image:: https://github.com/dmlc/web-data/raw/master/mxnet/image/mxnet_favicon.png - :width: 15pt - :height: 15pt - :target: https://mxnet.apache.org/ -.. |C2| image:: https://caffe2.ai/static/logo.svg - :width: 15pt - :height: 15pt - :target: https://caffe2.ai -``` - - -Details for each implementation: - -* __Detectron2__: with release v0.1.2, run: - ``` - python tools/train_net.py --config-file configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml --num-gpus 8 - ``` - -* __mmdetection__: at commit `b0d845f`, run - ``` - ./tools/dist_train.sh configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py 8 - ``` - -* __maskrcnn-benchmark__: use commit `0ce8f6f` with `sed -i ‘s/torch.uint8/torch.bool/g’ **/*.py; sed -i 's/AT_CHECK/TORCH_CHECK/g' **/*.cu` - to make it compatible with PyTorch 1.5. Then, run training with - ``` - python -m torch.distributed.launch --nproc_per_node=8 tools/train_net.py --config-file configs/e2e_mask_rcnn_R_50_FPN_1x.yaml - ``` - The speed we observed is faster than its model zoo, likely due to different software versions. - -* __tensorpack__: at commit `caafda`, `export TF_CUDNN_USE_AUTOTUNE=0`, then run - ``` - mpirun -np 8 ./train.py --config DATA.BASEDIR=/data/coco TRAINER=horovod BACKBONE.STRIDE_1X1=True TRAIN.STEPS_PER_EPOCH=50 --load ImageNet-R50-AlignPadding.npz - ``` - -* __SimpleDet__: at commit `9187a1`, run - ``` - python detection_train.py --config config/mask_r50v1_fpn_1x.py - ``` - -* __Detectron__: run - ``` - python tools/train_net.py --cfg configs/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml - ``` - Note that many of its ops run on CPUs, therefore the performance is limited. - -* __matterport/Mask_RCNN__: at commit `3deaec`, apply the following diff, `export TF_CUDNN_USE_AUTOTUNE=0`, then run - ``` - python coco.py train --dataset=/data/coco/ --model=imagenet - ``` - Note that many small details in this implementation might be different - from Detectron's standards. - -
- - (diff to make it use the same hyperparameters - click to expand) - - - ```diff - diff --git i/mrcnn/model.py w/mrcnn/model.py - index 62cb2b0..61d7779 100644 - --- i/mrcnn/model.py - +++ w/mrcnn/model.py - @@ -2367,8 +2367,8 @@ class MaskRCNN(): - epochs=epochs, - steps_per_epoch=self.config.STEPS_PER_EPOCH, - callbacks=callbacks, - - validation_data=val_generator, - - validation_steps=self.config.VALIDATION_STEPS, - + #validation_data=val_generator, - + #validation_steps=self.config.VALIDATION_STEPS, - max_queue_size=100, - workers=workers, - use_multiprocessing=True, - diff --git i/mrcnn/parallel_model.py w/mrcnn/parallel_model.py - index d2bf53b..060172a 100644 - --- i/mrcnn/parallel_model.py - +++ w/mrcnn/parallel_model.py - @@ -32,6 +32,7 @@ class ParallelModel(KM.Model): - keras_model: The Keras model to parallelize - gpu_count: Number of GPUs. Must be > 1 - """ - + super().__init__() - self.inner_model = keras_model - self.gpu_count = gpu_count - merged_outputs = self.make_parallel() - diff --git i/samples/coco/coco.py w/samples/coco/coco.py - index 5d172b5..239ed75 100644 - --- i/samples/coco/coco.py - +++ w/samples/coco/coco.py - @@ -81,7 +81,10 @@ class CocoConfig(Config): - IMAGES_PER_GPU = 2 - - # Uncomment to train on 8 GPUs (default is 1) - - # GPU_COUNT = 8 - + GPU_COUNT = 8 - + BACKBONE = "resnet50" - + STEPS_PER_EPOCH = 50 - + TRAIN_ROIS_PER_IMAGE = 512 - - # Number of classes (including background) - NUM_CLASSES = 1 + 80 # COCO has 80 classes - @@ -496,29 +499,10 @@ if __name__ == '__main__': - # *** This training schedule is an example. Update to your needs *** - - # Training - Stage 1 - - print("Training network heads") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=40, - - layers='heads', - - augmentation=augmentation) - - - - # Training - Stage 2 - - # Finetune layers from ResNet stage 4 and up - - print("Fine tune Resnet stage 4 and up") - - model.train(dataset_train, dataset_val, - - learning_rate=config.LEARNING_RATE, - - epochs=120, - - layers='4+', - - augmentation=augmentation) - - - - # Training - Stage 3 - - # Fine tune all layers - - print("Fine tune all layers") - - model.train(dataset_train, dataset_val, - - learning_rate=config.LEARNING_RATE / 10, - - epochs=160, - - layers='all', - + layers='3+', - augmentation=augmentation) - - elif args.command == "evaluate": - ``` - -
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md deleted file mode 100644 index c0d4f59..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/changelog.md +++ /dev/null @@ -1,26 +0,0 @@ -# Change Log - -### Releases -See release log at -[https://github.com/facebookresearch/detectron2/releases](https://github.com/facebookresearch/detectron2/releases). - -### Notable Backward Incompatible Changes: - -* 03/30/2020: Custom box head's `output_size` changed to `output_shape`. -* 02/14/2020,02/18/2020: Mask head and keypoint head now include logic for losses & inference. Custom heads - should overwrite the feature computation by `layers()` method. -* 11/11/2019: `detectron2.data.detection_utils.read_image` transposes images with exif information. - -### Config Version Change Log - -* v1: Rename `RPN_HEAD.NAME` to `RPN.HEAD_NAME`. -* v2: A batch of rename of many configurations before release. - -### Silent Regression in Historical Versions: - -We list a few silent regressions since they may silently produce incorrect results and will be hard to debug. - -* 04/01/2020 - 05/11/2020: Bad accuracy if `TRAIN_ON_PRED_BOXES` is set to True. -* 03/30/2020 - 04/01/2020: ResNets are not correctly built. -* 12/19/2019 - 12/26/2019: Using aspect ratio grouping causes a drop in accuracy. -* release - 11/9/2019: Test time augmentation does not predict the last category. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md deleted file mode 100644 index f7b66c2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/compatibility.md +++ /dev/null @@ -1,83 +0,0 @@ -# Compatibility with Other Libraries - -## Compatibility with Detectron (and maskrcnn-benchmark) - -Detectron2 addresses some legacy issues left in Detectron. As a result, their models -are not compatible: -running inference with the same model weights will produce different results in the two code bases. - -The major differences regarding inference are: - -- The height and width of a box with corners (x1, y1) and (x2, y2) is now computed more naturally as - width = x2 - x1 and height = y2 - y1; - In Detectron, a "+ 1" was added both height and width. - - Note that the relevant ops in Caffe2 have [adopted this change of convention](https://github.com/pytorch/pytorch/pull/20550) - with an extra option. - So it is still possible to run inference with a Detectron2-trained model in Caffe2. - - The change in height/width calculations most notably changes: - - encoding/decoding in bounding box regression. - - non-maximum suppression. The effect here is very negligible, though. - -- RPN now uses simpler anchors with fewer quantization artifacts. - - In Detectron, the anchors were quantized and - [do not have accurate areas](https://github.com/facebookresearch/Detectron/issues/227). - In Detectron2, the anchors are center-aligned to feature grid points and not quantized. - -- Classification layers have a different ordering of class labels. - - This involves any trainable parameter with shape (..., num_categories + 1, ...). - In Detectron2, integer labels [0, K-1] correspond to the K = num_categories object categories - and the label "K" corresponds to the special "background" category. - In Detectron, label "0" means background, and labels [1, K] correspond to the K categories. - -- ROIAlign is implemented differently. The new implementation is [available in Caffe2](https://github.com/pytorch/pytorch/pull/23706). - - 1. All the ROIs are shifted by half a pixel compared to Detectron in order to create better image-feature-map alignment. - See `layers/roi_align.py` for details. - To enable the old behavior, use `ROIAlign(aligned=False)`, or `POOLER_TYPE=ROIAlign` instead of - `ROIAlignV2` (the default). - - 1. The ROIs are not required to have a minimum size of 1. - This will lead to tiny differences in the output, but should be negligible. - -- Mask inference function is different. - - In Detectron2, the "paste_mask" function is different and should be more accurate than in Detectron. This change - can improve mask AP on COCO by ~0.5% absolute. - -There are some other differences in training as well, but they won't affect -model-level compatibility. The major ones are: - -- We fixed a [bug](https://github.com/facebookresearch/Detectron/issues/459) in - Detectron, by making `RPN.POST_NMS_TOPK_TRAIN` per-image, rather than per-batch. - The fix may lead to a small accuracy drop for a few models (e.g. keypoint - detection) and will require some parameter tuning to match the Detectron results. -- For simplicity, we change the default loss in bounding box regression to L1 loss, instead of smooth L1 loss. - We have observed that this tends to slightly decrease box AP50 while improving box AP for higher - overlap thresholds (and leading to a slight overall improvement in box AP). -- We interpret the coordinates in COCO bounding box and segmentation annotations - as coordinates in range `[0, width]` or `[0, height]`. The coordinates in - COCO keypoint annotations are interpreted as pixel indices in range `[0, width - 1]` or `[0, height - 1]`. - Note that this affects how flip augmentation is implemented. - - -We will later share more details and rationale behind the above mentioned issues -about pixels, coordinates, and "+1"s. - - -## Compatibility with Caffe2 - -As mentioned above, despite the incompatibilities with Detectron, the relevant -ops have been implemented in Caffe2. -Therefore, models trained with detectron2 can be converted in Caffe2. -See [Deployment](../tutorials/deployment.md) for the tutorial. - -## Compatibility with TensorFlow - -Most ops are available in TensorFlow, although some tiny differences in -the implementation of resize / ROIAlign / padding need to be addressed. -A working conversion script is provided by [tensorpack FasterRCNN](https://github.com/tensorpack/tensorpack/tree/master/examples/FasterRCNN/convert_d2) -to run a standard detectron2 model in TensorFlow. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md deleted file mode 100644 index 81936df..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/contributing.md +++ /dev/null @@ -1,49 +0,0 @@ -# Contributing to detectron2 - -## Issues -We use GitHub issues to track public bugs and questions. -Please make sure to follow one of the -[issue templates](https://github.com/facebookresearch/detectron2/issues/new/choose) -when reporting any issues. - -Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe -disclosure of security bugs. In those cases, please go through the process -outlined on that page and do not file a public issue. - -## Pull Requests -We actively welcome your pull requests. - -However, if you're adding any significant features (e.g. > 50 lines), please -make sure to have a corresponding issue to discuss your motivation and proposals, -before sending a PR. We do not always accept new features, and we take the following -factors into consideration: - -1. Whether the same feature can be achieved without modifying detectron2. -Detectron2 is designed so that you can implement many extensions from the outside, e.g. -those in [projects](https://github.com/facebookresearch/detectron2/tree/master/projects). -If some part is not as extensible, you can also bring up the issue to make it more extensible. -2. Whether the feature is potentially useful to a large audience, or only to a small portion of users. -3. Whether the proposed solution has a good design / interface. -4. Whether the proposed solution adds extra mental/practical overhead to users who don't - need such feature. -5. Whether the proposed solution breaks existing APIs. - -When sending a PR, please do: - -1. If a PR contains multiple orthogonal changes, split it to several PRs. -2. If you've added code that should be tested, add tests. -3. For PRs that need experiments (e.g. adding a new model or new methods), - you don't need to update model zoo, but do provide experiment results in the description of the PR. -4. If APIs are changed, update the documentation. -5. Make sure your code lints with `./dev/linter.sh`. - - -## Contributor License Agreement ("CLA") -In order to accept your pull request, we need you to submit a CLA. You only need -to do this once to work on any of Facebook's open source projects. - -Complete your CLA here: - -## License -By contributing to detectron2, you agree that your contributions will be licensed -under the LICENSE file in the root directory of this source tree. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst deleted file mode 100644 index 63cf907..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/notes/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Notes -====================================== - -.. toctree:: - :maxdepth: 2 - - benchmarks - compatibility - contributing - changelog diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md deleted file mode 100644 index 1ca9c94..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Read the docs: - -The latest documentation built from this directory is available at [detectron2.readthedocs.io](https://detectron2.readthedocs.io/). -Documents in this directory are not meant to be read on github. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md deleted file mode 100644 index 1a2633f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/builtin_datasets.md +++ /dev/null @@ -1,99 +0,0 @@ -# Setup Builtin Datasets - -Detectron2 has builtin support for a few datasets. -The datasets are assumed to exist in a directory specified by the environment variable -`DETECTRON2_DATASETS`. -Under this directory, detectron2 expects to find datasets in the structure described below. - -You can set the location for builtin datasets by `export DETECTRON2_DATASETS=/path/to/datasets`. -If left unset, the default is `./datasets` relative to your current working directory. - -The [model zoo](https://github.com/facebookresearch/detectron2/blob/master/MODEL_ZOO.md) -contains configs and models that use these builtin datasets. - -## Expected dataset structure for COCO instance/keypoint detection: - -``` -coco/ - annotations/ - instances_{train,val}2017.json - person_keypoints_{train,val}2017.json - {train,val}2017/ - # image files that are mentioned in the corresponding json -``` - -You can use the 2014 version of the dataset as well. - -Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, -which you can download with `./prepare_for_tests.sh`. - -## Expected dataset structure for PanopticFPN: - -``` -coco/ - annotations/ - panoptic_{train,val}2017.json - panoptic_{train,val}2017/ # png annotations - panoptic_stuff_{train,val}2017/ # generated by the script mentioned below -``` - -Install panopticapi by: -``` -pip install git+https://github.com/cocodataset/panopticapi.git -``` -Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. - -## Expected dataset structure for LVIS instance segmentation: -``` -coco/ - {train,val,test}2017/ -lvis/ - lvis_v0.5_{train,val}.json - lvis_v0.5_image_info_test.json -``` - -Install lvis-api by: -``` -pip install git+https://github.com/lvis-dataset/lvis-api.git -``` - -Run `python prepare_cocofied_lvis.py` to prepare "cocofied" LVIS annotations for evaluation of models trained on the COCO dataset. - -## Expected dataset structure for cityscapes: -``` -cityscapes/ - gtFine/ - train/ - aachen/ - color.png, instanceIds.png, labelIds.png, polygons.json, - labelTrainIds.png - ... - val/ - test/ - leftImg8bit/ - train/ - val/ - test/ -``` -Install cityscapes scripts by: -``` -pip install git+https://github.com/mcordts/cityscapesScripts.git -``` - -Note: labelTrainIds.png are created using cityscapesescript with: -``` -CITYSCAPES_DATASET=$DETECTRON2_DATASETS/cityscapes python cityscapesscripts/preparation/createTrainIdLabelImgs.py -``` -They are not needed for instance segmentation. - -## Expected dataset structure for Pascal VOC: -``` -VOC20{07,12}/ - Annotations/ - ImageSets/ - Main/ - trainval.txt - test.txt - # train.txt or val.txt, if you use these splits - JPEGImages/ -``` diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md deleted file mode 100644 index ea82583..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/configs.md +++ /dev/null @@ -1,58 +0,0 @@ -# Configs - -Detectron2 provides a key-value based config system that can be -used to obtain standard, common behaviors. - -Detectron2's config system uses YAML and [yacs](https://github.com/rbgirshick/yacs). -In addition to the [basic operations](../modules/config.html#detectron2.config.CfgNode) -that access and update a config, we provide the following extra functionalities: - -1. The config can have `_BASE_: base.yaml` field, which will load a base config first. - Values in the base config will be overwritten in sub-configs, if there are any conflicts. - We provided several base configs for standard model architectures. -2. We provide config versioning, for backward compatibility. - If your config file is versioned with a config line like `VERSION: 2`, - detectron2 will still recognize it even if we change some keys in the future. - -"Config" is a very limited abstraction. -We do not expect all features in detectron2 to be available through configs. -If you need something that's not available in the config space, -please write code using detectron2's API. - -### Basic Usage - -Some basic usage of the `CfgNode` object is shown here. See more in [documentation](../modules/config.html#detectron2.config.CfgNode). -```python -from detectron2.config import get_cfg -cfg = get_cfg() # obtain detectron2's default config -cfg.xxx = yyy # add new configs for your own custom components -cfg.merge_from_file("my_cfg.yaml") # load values from a file - -cfg.merge_from_list(["MODEL.WEIGHTS", "weights.pth"]) # can also load values from a list of str -print(cfg.dump()) # print formatted configs -``` - -Many builtin tools in detectron2 accepts command line config overwrite: -Key-value pairs provided in the command line will overwrite the existing values in the config file. -For example, [demo.py](../../demo/demo.py) can be used with -``` -./demo.py --config-file config.yaml [--other-options] \ - --opts MODEL.WEIGHTS /path/to/weights INPUT.MIN_SIZE_TEST 1000 -``` - -To see a list of available configs in detectron2 and what they mean, -check [Config References](../modules/config.html#config-references) - - -### Best Practice with Configs - -1. Treat the configs you write as "code": avoid copying them or duplicating them; use `_BASE_` - to share common parts between configs. - -2. Keep the configs you write simple: don't include keys that do not affect the experimental setting. - -3. Keep a version number in your configs (or the base config), e.g., `VERSION: 2`, - for backward compatibility. - We print a warning when reading a config without version number. - The official configs do not include version number because they are meant to - be always up-to-date. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md deleted file mode 100644 index bb037ca..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/data_loading.md +++ /dev/null @@ -1,77 +0,0 @@ - -# Use Custom Dataloaders - -## How the Existing Dataloader Works - -Detectron2 contains a builtin data loading pipeline. -It's good to understand how it works, in case you need to write a custom one. - -Detectron2 provides two functions -[build_detection_{train,test}_loader](../modules/data.html#detectron2.data.build_detection_train_loader) -that create a default data loader from a given config. -Here is how `build_detection_{train,test}_loader` work: - -1. It takes the name of a registered dataset (e.g., "coco_2017_train") and loads a `list[dict]` representing the dataset items - in a lightweight, canonical format. These dataset items are not yet ready to be used by the model (e.g., images are - not loaded into memory, random augmentations have not been applied, etc.). - Details about the dataset format and dataset registration can be found in - [datasets](./datasets.md). -2. Each dict in this list is mapped by a function ("mapper"): - * Users can customize this mapping function by specifying the "mapper" argument in - `build_detection_{train,test}_loader`. The default mapper is [DatasetMapper](../modules/data.html#detectron2.data.DatasetMapper). - * The output format of such function can be arbitrary, as long as it is accepted by the consumer of this data loader (usually the model). - The outputs of the default mapper, after batching, follow the default model input format documented in - [Use Models](./models.html#model-input-format). - * The role of the mapper is to transform the lightweight, canonical representation of a dataset item into a format - that is ready for the model to consume (including, e.g., read images, perform random data augmentation and convert to torch Tensors). - If you would like to perform custom transformations to data, you often want a custom mapper. -3. The outputs of the mapper are batched (simply into a list). -4. This batched data is the output of the data loader. Typically, it's also the input of - `model.forward()`. - - -## Write a Custom Dataloader - -Using a different "mapper" with `build_detection_{train,test}_loader(mapper=)` works for most use cases -of custom data loading. -For example, if you want to resize all images to a fixed size for Mask R-CNN training, write this: - -```python -from detectron2.data import build_detection_train_loader -from detectron2.data import transforms as T -from detectron2.data import detection_utils as utils - -def mapper(dataset_dict): - # Implement a mapper, similar to the default DatasetMapper, but with your own customizations - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - image = utils.read_image(dataset_dict["file_name"], format="BGR") - image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image) - dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) - - annos = [ - utils.transform_instance_annotations(obj, transforms, image.shape[:2]) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - instances = utils.annotations_to_instances(annos, image.shape[:2]) - dataset_dict["instances"] = utils.filter_empty_instances(instances) - return dataset_dict - -data_loader = build_detection_train_loader(cfg, mapper=mapper) -# use this dataloader instead of the default -``` -Refer to [API documentation of detectron2.data](../modules/data) for details. - -If you want to change not only the mapper (e.g., to write different sampling or batching logic), -you can write your own data loader. The data loader is simply a -python iterator that produces [the format](./models.md) your model accepts. -You can implement it using any tools you like. - -## Use a Custom Dataloader - -If you use [DefaultTrainer](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer), -you can overwrite its `build_{train,test}_loader` method to use your own dataloader. -See the [densepose dataloader](../../projects/DensePose/train_net.py) -for an example. - -If you write your own training loop, you can plug in your data loader easily. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md deleted file mode 100644 index 8dc1c0c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/datasets.md +++ /dev/null @@ -1,221 +0,0 @@ -# Use Custom Datasets - -Datasets that have builtin support in detectron2 are listed in [datasets](../../datasets). -If you want to use a custom dataset while also reusing detectron2's data loaders, -you will need to - -1. __Register__ your dataset (i.e., tell detectron2 how to obtain your dataset). -2. Optionally, __register metadata__ for your dataset. - -Next, we explain the above two concepts in detail. - -The [Colab tutorial](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -has a live example of how to register and train on a dataset of custom formats. - -### Register a Dataset - -To let detectron2 know how to obtain a dataset named "my_dataset", you will implement -a function that returns the items in your dataset and then tell detectron2 about this -function: -```python -def my_dataset_function(): - ... - return list[dict] in the following format - -from detectron2.data import DatasetCatalog -DatasetCatalog.register("my_dataset", my_dataset_function) -``` - -Here, the snippet associates a dataset "my_dataset" with a function that returns the data. -The registration stays effective until the process exists. - -The function can processes data from its original format into either one of the following: -1. Detectron2's standard dataset dict, described below. This will work with many other builtin - features in detectron2, so it's recommended to use it when it's sufficient for your task. -2. Your custom dataset dict. You can also return arbitrary dicts in your own format, - such as adding extra keys for new tasks. - Then you will need to handle them properly downstream as well. - See below for more details. - -#### Standard Dataset Dicts - -For standard tasks -(instance detection, instance/semantic/panoptic segmentation, keypoint detection), -we load the original dataset into `list[dict]` with a specification similar to COCO's json annotations. -This is our standard representation for a dataset. - -Each dict contains information about one image. -The dict may have the following fields, -and the required fields vary based on what the dataloader or the task needs (see more below). - -+ `file_name`: the full path to the image file. Will apply rotation and flipping if the image has such exif information. -+ `height`, `width`: integer. The shape of image. -+ `image_id` (str or int): a unique id that identifies this image. Used - during evaluation to identify the images, but a dataset may use it for different purposes. -+ `annotations` (list[dict]): each dict corresponds to annotations of one instance - in this image. Required by instance detection/segmentation or keypoint detection tasks. - - Images with empty `annotations` will by default be removed from training, - but can be included using `DATALOADER.FILTER_EMPTY_ANNOTATIONS`. - - Each dict contains the following keys, of which `bbox`,`bbox_mode` and `category_id` are required: - + `bbox` (list[float]): list of 4 numbers representing the bounding box of the instance. - + `bbox_mode` (int): the format of bbox. - It must be a member of - [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode). - Currently supports: `BoxMode.XYXY_ABS`, `BoxMode.XYWH_ABS`. - + `category_id` (int): an integer in the range [0, num_categories) representing the category label. - The value num_categories is reserved to represent the "background" category, if applicable. - + `segmentation` (list[list[float]] or dict): the segmentation mask of the instance. - + If `list[list[float]]`, it represents a list of polygons, one for each connected component - of the object. Each `list[float]` is one simple polygon in the format of `[x1, y1, ..., xn, yn]`. - The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates, - depend on whether "bbox_mode" is relative. - + If `dict`, it represents the per-pixel segmentation mask in COCO's RLE format. The dict should have - keys "size" and "counts". You can convert a uint8 segmentation mask of 0s and 1s into - RLE format by `pycocotools.mask.encode(np.asarray(mask, order="F"))`. - + `keypoints` (list[float]): in the format of [x1, y1, v1,..., xn, yn, vn]. - v[i] means the [visibility](http://cocodataset.org/#format-data) of this keypoint. - `n` must be equal to the number of keypoint categories. - The Xs and Ys are either relative coordinates in [0, 1], or absolute coordinates, - depend on whether "bbox_mode" is relative. - - Note that the coordinate annotations in COCO format are integers in range [0, H-1 or W-1]. - By default, detectron2 adds 0.5 to absolute keypoint coordinates to convert them from discrete - pixel indices to floating point coordinates. - + `iscrowd`: 0 (default) or 1. Whether this instance is labeled as COCO's "crowd - region". Don't include this field if you don't know what it means. -+ `sem_seg_file_name`: the full path to the ground truth semantic segmentation file. - Required by semantic segmentation task. - It should be an image whose pixel values are integer labels. - - -Fast R-CNN (with precomputed proposals) is rarely used today. -To train a Fast R-CNN, the following extra keys are needed: - -+ `proposal_boxes` (array): 2D numpy array with shape (K, 4) representing K precomputed proposal boxes for this image. -+ `proposal_objectness_logits` (array): numpy array with shape (K, ), which corresponds to the objectness - logits of proposals in 'proposal_boxes'. -+ `proposal_bbox_mode` (int): the format of the precomputed proposal bbox. - It must be a member of - [structures.BoxMode](../modules/structures.html#detectron2.structures.BoxMode). - Default is `BoxMode.XYXY_ABS`. - -#### Custom Dataset Dicts for New Tasks - -In the `list[dict]` that your dataset function returns, the dictionary can also have arbitrary custom data. -This will be useful for a new task that needs extra information not supported -by the standard dataset dicts. In this case, you need to make sure the downstream code can handle your data -correctly. Usually this requires writing a new `mapper` for the dataloader (see [Use Custom Dataloaders](./data_loading.md)). - -When designing a custom format, note that all dicts are stored in memory -(sometimes serialized and with multiple copies). -To save memory, each dict is meant to contain small but sufficient information -about each sample, such as file names and annotations. -Loading full samples typically happens in the data loader. - -For attributes shared among the entire dataset, use `Metadata` (see below). -To avoid extra memory, do not save such information repeatly for each sample. - -### "Metadata" for Datasets - -Each dataset is associated with some metadata, accessible through -`MetadataCatalog.get(dataset_name).some_metadata`. -Metadata is a key-value mapping that contains information that's shared among -the entire dataset, and usually is used to interpret what's in the dataset, e.g., -names of classes, colors of classes, root of files, etc. -This information will be useful for augmentation, evaluation, visualization, logging, etc. -The structure of metadata depends on the what is needed from the corresponding downstream code. - -If you register a new dataset through `DatasetCatalog.register`, -you may also want to add its corresponding metadata through -`MetadataCatalog.get(dataset_name).some_key = some_value`, to enable any features that need the metadata. -You can do it like this (using the metadata key "thing_classes" as an example): - -```python -from detectron2.data import MetadataCatalog -MetadataCatalog.get("my_dataset").thing_classes = ["person", "dog"] -``` - -Here is a list of metadata keys that are used by builtin features in detectron2. -If you add your own dataset without these metadata, some features may be -unavailable to you: - -* `thing_classes` (list[str]): Used by all instance detection/segmentation tasks. - A list of names for each instance/thing category. - If you load a COCO format dataset, it will be automatically set by the function `load_coco_json`. - -* `thing_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each thing category. - Used for visualization. If not given, random colors are used. - -* `stuff_classes` (list[str]): Used by semantic and panoptic segmentation tasks. - A list of names for each stuff category. - -* `stuff_colors` (list[tuple(r, g, b)]): Pre-defined color (in [0, 255]) for each stuff category. - Used for visualization. If not given, random colors are used. - -* `keypoint_names` (list[str]): Used by keypoint localization. A list of names for each keypoint. - -* `keypoint_flip_map` (list[tuple[str]]): Used by the keypoint localization task. A list of pairs of names, - where each pair are the two keypoints that should be flipped if the image is - flipped horizontally during augmentation. -* `keypoint_connection_rules`: list[tuple(str, str, (r, g, b))]. Each tuple specifies a pair of keypoints - that are connected and the color to use for the line between them when visualized. - -Some additional metadata that are specific to the evaluation of certain datasets (e.g. COCO): - -* `thing_dataset_id_to_contiguous_id` (dict[int->int]): Used by all instance detection/segmentation tasks in the COCO format. - A mapping from instance class ids in the dataset to contiguous ids in range [0, #class). - Will be automatically set by the function `load_coco_json`. - -* `stuff_dataset_id_to_contiguous_id` (dict[int->int]): Used when generating prediction json files for - semantic/panoptic segmentation. - A mapping from semantic segmentation class ids in the dataset - to contiguous ids in [0, num_categories). It is useful for evaluation only. - -* `json_file`: The COCO annotation json file. Used by COCO evaluation for COCO-format datasets. -* `panoptic_root`, `panoptic_json`: Used by panoptic evaluation. -* `evaluator_type`: Used by the builtin main training script to select - evaluator. Don't use it in a new training script. - You can just provide the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator) - for your dataset directly in your main script. - -NOTE: For background on the concept of "thing" and "stuff", see -[On Seeing Stuff: The Perception of Materials by Humans and Machines](http://persci.mit.edu/pub_pdfs/adelson_spie_01.pdf). -In detectron2, the term "thing" is used for instance-level tasks, -and "stuff" is used for semantic segmentation tasks. -Both are used in panoptic segmentation. - -### Register a COCO Format Dataset - -If your dataset is already a json file in the COCO format, -the dataset and its associated metadata can be registered easily with: -```python -from detectron2.data.datasets import register_coco_instances -register_coco_instances("my_dataset", {}, "json_annotation.json", "path/to/image/dir") -``` - -If your dataset is in COCO format but with extra custom per-instance annotations, -the [load_coco_json](../modules/data.html#detectron2.data.datasets.load_coco_json) -function might be useful. - -### Update the Config for New Datasets - -Once you've registered the dataset, you can use the name of the dataset (e.g., "my_dataset" in -example above) in `cfg.DATASETS.{TRAIN,TEST}`. -There are other configs you might want to change to train or evaluate on new datasets: - -* `MODEL.ROI_HEADS.NUM_CLASSES` and `MODEL.RETINANET.NUM_CLASSES` are the number of thing classes - for R-CNN and RetinaNet models, respectively. -* `MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS` sets the number of keypoints for Keypoint R-CNN. - You'll also need to set [Keypoint OKS](http://cocodataset.org/#keypoints-eval) - with `TEST.KEYPOINT_OKS_SIGMAS` for evaluation. -* `MODEL.SEM_SEG_HEAD.NUM_CLASSES` sets the number of stuff classes for Semantic FPN & Panoptic FPN. -* If you're training Fast R-CNN (with precomputed proposals), `DATASETS.PROPOSAL_FILES_{TRAIN,TEST}` - need to match the datasets. The format of proposal files are documented - [here](../modules/data.html#detectron2.data.load_proposals_into_dataset). - -New models -(e.g. [TensorMask](../../projects/TensorMask), -[PointRend](../../projects/PointRend)) -often have similar configs of their own that need to be changed as well. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md deleted file mode 100644 index a473247..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/deployment.md +++ /dev/null @@ -1,92 +0,0 @@ -# Deployment - -## Caffe2 Deployment -We currently support converting a detectron2 model to Caffe2 format through ONNX. -The converted Caffe2 model is able to run without detectron2 dependency in either Python or C++. -It has a runtime optimized for CPU & mobile inference, but not for GPU inference. - -Caffe2 conversion requires PyTorch ≥ 1.4 and ONNX ≥ 1.6. - -### Coverage - -It supports 3 most common meta architectures: `GeneralizedRCNN`, `RetinaNet`, `PanopticFPN`, -and most official models under these 3 meta architectures. - -Users' custom extensions under these architectures (added through registration) are supported -as long as they do not contain control flow or operators not available in Caffe2 (e.g. deformable convolution). -For example, custom backbones and heads are often supported out of the box. - -### Usage - -The conversion APIs are documented at [the API documentation](../modules/export). -We provide a tool, `caffe2_converter.py` as an example that uses -these APIs to convert a standard model. - -To convert an official Mask R-CNN trained on COCO, first -[prepare the COCO dataset](../../datasets/), then pick the model from [Model Zoo](../../MODEL_ZOO.md), and run: -``` -cd tools/deploy/ && ./caffe2_converter.py --config-file ../../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --output ./caffe2_model --run-eval \ - MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl \ - MODEL.DEVICE cpu -``` - -Note that: -1. The conversion needs valid sample inputs & weights to trace the model. That's why the script requires the dataset. - You can modify the script to obtain sample inputs in other ways. -2. With the `--run-eval` flag, it will evaluate the converted models to verify its accuracy. - The accuracy is typically slightly different (within 0.1 AP) from PyTorch due to - numerical precisions between different implementations. - It's recommended to always verify the accuracy in case your custom model is not supported by the - conversion. - -The converted model is available at the specified `caffe2_model/` directory. Two files `model.pb` -and `model_init.pb` that contain network structure and network parameters are necessary for deployment. -These files can then be loaded in C++ or Python using Caffe2's APIs. - -The script generates `model.svg` file which contains a visualization of the network. -You can also load `model.pb` to tools such as [netron](https://github.com/lutzroeder/netron) to visualize it. - -### Use the model in C++/Python - -The model can be loaded in C++. An example [caffe2_mask_rcnn.cpp](../../tools/deploy/) is given, -which performs CPU/GPU inference using `COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x`. - -The C++ example needs to be built with: -* PyTorch with caffe2 inside -* gflags, glog, opencv -* protobuf headers that match the version of your caffe2 -* MKL headers if caffe2 is built with MKL - -The following can compile the example inside [official detectron2 docker](../../docker/): -``` -sudo apt update && sudo apt install libgflags-dev libgoogle-glog-dev libopencv-dev -pip install mkl-include -wget https://github.com/protocolbuffers/protobuf/releases/download/v3.6.1/protobuf-cpp-3.6.1.tar.gz -tar xf protobuf-cpp-3.6.1.tar.gz -export CPATH=$(readlink -f ./protobuf-3.6.1/src/):$HOME/.local/include -export CMAKE_PREFIX_PATH=$HOME/.local/lib/python3.6/site-packages/torch/ -mkdir build && cd build -cmake -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST .. && make - -# To run: -./caffe2_mask_rcnn --predict_net=./model.pb --init_net=./model_init.pb --input=input.jpg -``` - -Note that: - -* All converted models (the .pb files) take two input tensors: - "data" is an NCHW image, and "im_info" is an Nx3 tensor consisting of (height, width, 1.0) for - each image (the shape of "data" might be larger than that in "im_info" due to padding). - -* The converted models do not contain post-processing operations that - transform raw layer outputs into formatted predictions. - The example only produces raw outputs (28x28 masks) from the final - layers that are not post-processed, because in actual deployment, an application often needs - its custom lightweight post-processing (e.g. full-image masks for every detected object is often not necessary). - -We also provide a python wrapper around the converted model, in the -[Caffe2Model.\_\_call\_\_](../modules/export.html#detectron2.export.Caffe2Model.__call__) method. -This method has an interface that's identical to the [pytorch versions of models](./models.md), -and it internally applies pre/post-processing code to match the formats. -They can serve as a reference for pre/post-processing in actual deployment. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md deleted file mode 100644 index c71adb7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/evaluation.md +++ /dev/null @@ -1,43 +0,0 @@ - -# Evaluation - -Evaluation is a process that takes a number of inputs/outputs pairs and aggregate them. -You can always [use the model](./models.md) directly and just parse its inputs/outputs manually to perform -evaluation. -Alternatively, evaluation is implemented in detectron2 using the [DatasetEvaluator](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluator) -interface. - -Detectron2 includes a few `DatasetEvaluator` that computes metrics using standard dataset-specific -APIs (e.g., COCO, LVIS). -You can also implement your own `DatasetEvaluator` that performs some other jobs -using the inputs/outputs pairs. -For example, to count how many instances are detected on the validation set: - -``` -class Counter(DatasetEvaluator): - def reset(self): - self.count = 0 - def process(self, inputs, outputs): - for output in outputs: - self.count += len(output["instances"]) - def evaluate(self): - # save self.count somewhere, or print it, or return it. - return {"count": self.count} -``` - -Once you have some `DatasetEvaluator`, you can run it with -[inference_on_dataset](../modules/evaluation.html#detectron2.evaluation.inference_on_dataset). -For example, - -```python -val_results = inference_on_dataset( - model, - val_data_loader, - DatasetEvaluators([COCOEvaluator(...), Counter()])) -``` -Compared to running the evaluation manually using the model, the benefit of this function is that -you can merge evaluators together using [DatasetEvaluators](../modules/evaluation.html#detectron2.evaluation.DatasetEvaluators). -In this way you can run all evaluations without having to go through the dataset multiple times. - -The `inference_on_dataset` function also provides accurate speed benchmarks for the -given model and dataset. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md deleted file mode 100644 index 4232185..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/extend.md +++ /dev/null @@ -1,53 +0,0 @@ -# Extend Detectron2's Defaults - -__Research is about doing things in new ways__. -This brings a tension in how to create abstractions in code, -which is a challenge for any research engineering project of a significant size: - -1. On one hand, it needs to have very thin abstractions to allow for the possibility of doing - everything in new ways. It should be reasonably easy to break existing - abstractions and replace them with new ones. - -2. On the other hand, such a project also needs reasonably high-level - abstractions, so that users can easily do things in standard ways, - without worrying too much about the details that only certain researchers care about. - -In detectron2, there are two types of interfaces that address this tension together: - -1. Functions and classes that take a config (`cfg`) argument - (sometimes with only a few extra arguments). - - Such functions and classes implement - the "standard default" behavior: it will read what it needs from the - config and do the "standard" thing. - Users only need to load a given config and pass it around, without having to worry about - which arguments are used and what they all mean. - -2. Functions and classes that have well-defined explicit arguments. - - Each of these is a small building block of the entire system. - They require users' expertise to understand what each argument should be, - and require more effort to stitch together to a larger system. - But they can be stitched together in more flexible ways. - - When you need to implement something not supported by the "standard defaults" - included in detectron2, these well-defined components can be reused. - -3. (experimental) A few classes are implemented with the - [@configurable](../../modules/config.html#detectron2.config.configurable) - decorator - they can be called with either a config, or with explicit arguments. - Their explicit argument interfaces are currently __experimental__ and subject to change. - - -If you only need the standard behavior, the [Beginner's Tutorial](./getting_started.md) -should suffice. If you need to extend detectron2 to your own needs, -see the following tutorials for more details: - -* Detectron2 includes a few standard datasets. To use custom ones, see - [Use Custom Datasets](./datasets.md). -* Detectron2 contains the standard logic that creates a data loader for training/testing from a - dataset, but you can write your own as well. See [Use Custom Data Loaders](./data_loading.md). -* Detectron2 implements many standard detection models, and provide ways for you - to overwrite their behaviors. See [Use Models](./models.md) and [Write Models](./write-models.md). -* Detectron2 provides a default training loop that is good for common training tasks. - You can customize it with hooks, or write your own loop instead. See [training](./training.md). diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md deleted file mode 100644 index acaf13f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/getting_started.md +++ /dev/null @@ -1,79 +0,0 @@ -## Getting Started with Detectron2 - -This document provides a brief intro of the usage of builtin command-line tools in detectron2. - -For a tutorial that involves actual coding with the API, -see our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -which covers how to run inference with an -existing model, and how to train a builtin model on a custom dataset. - -For more advanced tutorials, refer to our [documentation](https://detectron2.readthedocs.io/tutorials/extend.html). - - -### Inference Demo with Pre-trained Models - -1. Pick a model and its config file from - [model zoo](MODEL_ZOO.md), - for example, `mask_rcnn_R_50_FPN_3x.yaml`. -2. We provide `demo.py` that is able to run builtin standard models. Run it with: -``` -cd demo/ -python demo.py --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml \ - --input input1.jpg input2.jpg \ - [--other-options] - --opts MODEL.WEIGHTS detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl -``` -The configs are made for training, therefore we need to specify `MODEL.WEIGHTS` to a model from model zoo for evaluation. -This command will run the inference and show visualizations in an OpenCV window. - -For details of the command line arguments, see `demo.py -h` or look at its source code -to understand its behavior. Some common arguments are: -* To run __on your webcam__, replace `--input files` with `--webcam`. -* To run __on a video__, replace `--input files` with `--video-input video.mp4`. -* To run __on cpu__, add `MODEL.DEVICE cpu` after `--opts`. -* To save outputs to a directory (for images) or a file (for webcam or video), use `--output`. - - -### Training & Evaluation in Command Line - -We provide a script in "tools/{,plain_}train_net.py", that is made to train -all the configs provided in detectron2. -You may want to use it as a reference to write your own training script. - -To train a model with "train_net.py", first -setup the corresponding datasets following -[datasets/README.md](./datasets/README.md), -then run: -``` -cd tools/ -./train_net.py --num-gpus 8 \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml -``` - -The configs are made for 8-GPU training. -To train on 1 GPU, you may need to [change some parameters](https://arxiv.org/abs/1706.02677), e.g.: -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --num-gpus 1 SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 -``` - -For most models, CPU training is not supported. - -To evaluate a model's performance, use -``` -./train_net.py \ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \ - --eval-only MODEL.WEIGHTS /path/to/checkpoint_file -``` -For more options, see `./train_net.py -h`. - -### Use Detectron2 APIs in Your Code - -See our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -to learn how to use detectron2 APIs to: -1. run inference with an existing model -2. train a builtin model on a custom dataset - -See [detectron2/projects](https://github.com/facebookresearch/detectron2/tree/master/projects) -for more ways to build your project on detectron2. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst deleted file mode 100644 index 896e71e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/index.rst +++ /dev/null @@ -1,18 +0,0 @@ -Tutorials -====================================== - -.. toctree:: - :maxdepth: 2 - - install - getting_started - builtin_datasets - extend - datasets - data_loading - models - write-models - training - evaluation - configs - deployment diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md deleted file mode 100644 index 3985f8a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/install.md +++ /dev/null @@ -1,184 +0,0 @@ -## Installation - -Our [Colab Notebook](https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5) -has step-by-step instructions that install detectron2. -The [Dockerfile](docker) -also installs detectron2 with a few simple commands. - -### Requirements -- Linux or macOS with Python ≥ 3.6 -- PyTorch ≥ 1.4 -- [torchvision](https://github.com/pytorch/vision/) that matches the PyTorch installation. - You can install them together at [pytorch.org](https://pytorch.org) to make sure of this. -- OpenCV, optional, needed by demo and visualization -- pycocotools: `pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'` - - -### Build Detectron2 from Source - -gcc & g++ ≥ 5 are required. [ninja](https://ninja-build.org/) is recommended for faster build. -After having them, run: -``` -python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' -# (add --user if you don't have permission) - -# Or, to install it from a local clone: -git clone https://github.com/facebookresearch/detectron2.git -python -m pip install -e detectron2 - -# Or if you are on macOS -# CC=clang CXX=clang++ python -m pip install -e . -``` - -To __rebuild__ detectron2 that's built from a local clone, use `rm -rf build/ **/*.so` to clean the -old build first. You often need to rebuild detectron2 after reinstalling PyTorch. - -### Install Pre-Built Detectron2 (Linux only) -``` -# for CUDA 10.1: -python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html -``` -You can replace cu101 with "cu{100,92}" or "cpu". - -Note that: -1. Such installation has to be used with certain version of official PyTorch release. - See [releases](https://github.com/facebookresearch/detectron2/releases) for requirements. - It will not work with a different version of PyTorch or a non-official build of PyTorch. -2. Such installation is out-of-date w.r.t. master branch of detectron2. It may not be - compatible with the master branch of a research project that uses detectron2 (e.g. those in - [projects](projects) or [meshrcnn](https://github.com/facebookresearch/meshrcnn/)). - -### Common Installation Issues - -If you met issues using the pre-built detectron2, please uninstall it and try building it from source. - -Click each issue for its solutions: - -
- -Undefined torch/aten/caffe2 symbols, or segmentation fault immediately when running the library. - -
- -This usually happens when detectron2 or torchvision is not -compiled with the version of PyTorch you're running. - -Pre-built torchvision or detectron2 has to work with the corresponding official release of pytorch. -If the error comes from a pre-built torchvision, uninstall torchvision and pytorch and reinstall them -following [pytorch.org](http://pytorch.org). So the versions will match. - -If the error comes from a pre-built detectron2, check [release notes](https://github.com/facebookresearch/detectron2/releases) -to see the corresponding pytorch version required for each pre-built detectron2. - -If the error comes from detectron2 or torchvision that you built manually from source, -remove files you built (`build/`, `**/*.so`) and rebuild it so it can pick up the version of pytorch currently in your environment. - -If you cannot resolve this problem, please include the output of `gdb -ex "r" -ex "bt" -ex "quit" --args python -m detectron2.utils.collect_env` -in your issue. -
- -
- -Undefined C++ symbols (e.g. `GLIBCXX`) or C++ symbols not found. - -
-Usually it's because the library is compiled with a newer C++ compiler but run with an old C++ runtime. - -This often happens with old anaconda. -Try `conda update libgcc`. Then rebuild detectron2. - -The fundamental solution is to run the code with proper C++ runtime. -One way is to use `LD_PRELOAD=/path/to/libstdc++.so`. - -
- -
- -"Not compiled with GPU support" or "Detectron2 CUDA Compiler: not available". - -
-CUDA is not found when building detectron2. -You should make sure - -``` -python -c 'import torch; from torch.utils.cpp_extension import CUDA_HOME; print(torch.cuda.is_available(), CUDA_HOME)' -``` - -print valid outputs at the time you build detectron2. - -Most models can run inference (but not training) without GPU support. To use CPUs, set `MODEL.DEVICE='cpu'` in the config. -
- -
- -"invalid device function" or "no kernel image is available for execution". - -
-Two possibilities: - -* You build detectron2 with one version of CUDA but run it with a different version. - - To check whether it is the case, - use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. - In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" - to contain cuda libraries of the same version. - - When they are inconsistent, - you need to either install a different build of PyTorch (or build by yourself) - to match your local CUDA installation, or install a different version of CUDA to match PyTorch. - -* Detectron2 or PyTorch/torchvision is not built for the correct GPU architecture (compute compatibility). - - The GPU architecture for PyTorch/detectron2/torchvision is available in the "architecture flags" in - `python -m detectron2.utils.collect_env`. - - The GPU architecture flags of detectron2/torchvision by default matches the GPU model detected - during compilation. This means the compiled code may not work on a different GPU model. - To overwrite the GPU architecture for detectron2/torchvision, use `TORCH_CUDA_ARCH_LIST` environment variable during compilation. - - For example, `export TORCH_CUDA_ARCH_LIST=6.0,7.0` makes it compile for both P100s and V100s. - Visit [developer.nvidia.com/cuda-gpus](https://developer.nvidia.com/cuda-gpus) to find out - the correct compute compatibility number for your device. - -
- -
- -Undefined CUDA symbols; cannot open libcudart.so; other nvcc failures. - -
-The version of NVCC you use to build detectron2 or torchvision does -not match the version of CUDA you are running with. -This often happens when using anaconda's CUDA runtime. - -Use `python -m detectron2.utils.collect_env` to find out inconsistent CUDA versions. -In the output of this command, you should expect "Detectron2 CUDA Compiler", "CUDA_HOME", "PyTorch built with - CUDA" -to contain cuda libraries of the same version. - -When they are inconsistent, -you need to either install a different build of PyTorch (or build by yourself) -to match your local CUDA installation, or install a different version of CUDA to match PyTorch. -
- - -
- -"ImportError: cannot import name '_C'". - -
-Please build and install detectron2 following the instructions above. - -If you are running code from detectron2's root directory, `cd` to a different one. -Otherwise you may not import the code that you installed. -
- -
- -ONNX conversion segfault after some "TraceWarning". - -
-The ONNX package is compiled with too old compiler. - -Please build and install ONNX from its source code using a compiler -whose version is closer to what's used by PyTorch (available in `torch.__config__.show()`). -
diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md deleted file mode 100644 index 456f36d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/models.md +++ /dev/null @@ -1,151 +0,0 @@ -# Use Models - -Models (and their sub-models) in detectron2 are built by -functions such as `build_model`, `build_backbone`, `build_roi_heads`: -```python -from detectron2.modeling import build_model -model = build_model(cfg) # returns a torch.nn.Module -``` - -`build_model` only builds the model structure, and fill it with random parameters. -See below for how to load an existing checkpoint to the model, -and how to use the `model` object. - -### Load/Save a Checkpoint -```python -from detectron2.checkpoint import DetectionCheckpointer -DetectionCheckpointer(model).load(file_path) # load a file to model - -checkpointer = DetectionCheckpointer(model, save_dir="output") -checkpointer.save("model_999") # save to output/model_999.pth -``` - -Detectron2's checkpointer recognizes models in pytorch's `.pth` format, as well as the `.pkl` files -in our model zoo. -See [API doc](../modules/checkpoint.html#detectron2.checkpoint.DetectionCheckpointer) -for more details about its usage. - -The model files can be arbitrarily manipulated using `torch.{load,save}` for `.pth` files or -`pickle.{dump,load}` for `.pkl` files. - -### Use a Model - -A model can be called by `outputs = model(inputs)`, where `inputs` is a `list[dict]`. -Each dict corresponds to one image and the required keys -depend on the type of model, and whether the model is in training or evaluation mode. -For example, in order to do inference, -all existing models expect the "image" key, and optionally "height" and "width". -The detailed format of inputs and outputs of existing models are explained below. - -When in training mode, all models are required to be used under an `EventStorage`. -The training statistics will be put into the storage: -```python -from detectron2.utils.events import EventStorage -with EventStorage() as storage: - losses = model(inputs) -``` - -If you only want to do simple inference using an existing model, -[DefaultPredictor](../modules/engine.html#detectron2.engine.defaults.DefaultPredictor) -is a wrapper around model that provides such basic functionality. -It includes default behavior including model loading, preprocessing, -and operates on single image rather than batches. - -### Model Input Format - -Users can implement custom models that support any arbitrary input format. -Here we describe the standard input format that all builtin models support in detectron2. -They all take a `list[dict]` as the inputs. Each dict -corresponds to information about one image. - -The dict may contain the following keys: - -* "image": `Tensor` in (C, H, W) format. The meaning of channels are defined by `cfg.INPUT.FORMAT`. - Image normalization, if any, will be performed inside the model using - `cfg.MODEL.PIXEL_{MEAN,STD}`. -* "instances": an [Instances](../modules/structures.html#detectron2.structures.Instances) - object, with the following fields: - + "gt_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each instance. - + "gt_classes": `Tensor` of long type, a vector of N labels, in range [0, num_categories). - + "gt_masks": a [PolygonMasks](../modules/structures.html#detectron2.structures.PolygonMasks) - or [BitMasks](../modules/structures.html#detectron2.structures.BitMasks) object storing N masks, one for each instance. - + "gt_keypoints": a [Keypoints](../modules/structures.html#detectron2.structures.Keypoints) - object storing N keypoint sets, one for each instance. -* "proposals": an [Instances](../modules/structures.html#detectron2.structures.Instances) - object used only in Fast R-CNN style models, with the following fields: - + "proposal_boxes": a [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing P proposal boxes. - + "objectness_logits": `Tensor`, a vector of P scores, one for each proposal. -* "height", "width": the **desired** output height and width, which is not necessarily the same - as the height or width of the `image` input field. - For example, the `image` input field might be a resized image, - but you may want the outputs to be in **original** resolution. - - If provided, the model will produce output in this resolution, - rather than in the resolution of the `image` as input into the model. This is more efficient and accurate. -* "sem_seg": `Tensor[int]` in (H, W) format. The semantic segmentation ground truth. - Values represent category labels starting from 0. - - -#### How it connects to data loader: - -The output of the default [DatasetMapper]( ../modules/data.html#detectron2.data.DatasetMapper) is a dict -that follows the above format. -After the data loader performs batching, it becomes `list[dict]` which the builtin models support. - - -### Model Output Format - -When in training mode, the builtin models output a `dict[str->ScalarTensor]` with all the losses. - -When in inference mode, the builtin models output a `list[dict]`, one dict for each image. -Based on the tasks the model is doing, each dict may contain the following fields: - -* "instances": [Instances](../modules/structures.html#detectron2.structures.Instances) - object with the following fields: - * "pred_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) object storing N boxes, one for each detected instance. - * "scores": `Tensor`, a vector of N scores. - * "pred_classes": `Tensor`, a vector of N labels in range [0, num_categories). - + "pred_masks": a `Tensor` of shape (N, H, W), masks for each detected instance. - + "pred_keypoints": a `Tensor` of shape (N, num_keypoint, 3). - Each row in the last dimension is (x, y, score). Scores are larger than 0. -* "sem_seg": `Tensor` of (num_categories, H, W), the semantic segmentation prediction. -* "proposals": [Instances](../modules/structures.html#detectron2.structures.Instances) - object with the following fields: - * "proposal_boxes": [Boxes](../modules/structures.html#detectron2.structures.Boxes) - object storing N boxes. - * "objectness_logits": a torch vector of N scores. -* "panoptic_seg": A tuple of `(Tensor, list[dict])`. The tensor has shape (H, W), where each element - represent the segment id of the pixel. Each dict describes one segment id and has the following fields: - * "id": the segment id - * "isthing": whether the segment is a thing or stuff - * "category_id": the category id of this segment. It represents the thing - class id when `isthing==True`, and the stuff class id otherwise. - - -### Partially execute a model: - -Sometimes you may want to obtain an intermediate tensor inside a model. -Since there are typically hundreds of intermediate tensors, there isn't an API that provides you -the intermediate result you need. -You have the following options: - -1. Write a (sub)model. Following the [tutorial](./write-models.md), you can - rewrite a model component (e.g. a head of a model), such that it - does the same thing as the existing component, but returns the output - you need. -2. Partially execute a model. You can create the model as usual, - but use custom code to execute it instead of its `forward()`. For example, - the following code obtains mask features before mask head. - -```python -images = ImageList.from_tensors(...) # preprocessed input tensor -model = build_model(cfg) -features = model.backbone(images.tensor) -proposals, _ = model.proposal_generator(images, features) -instances = model.roi_heads._forward_box(features, proposals) -mask_features = [features[f] for f in model.roi_heads.in_features] -mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances]) -``` - -Note that both options require you to read the existing forward code to understand -how to write code to obtain the outputs you need. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md deleted file mode 100644 index dc7d537..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/training.md +++ /dev/null @@ -1,50 +0,0 @@ -# Training - -From the previous tutorials, you may now have a custom model and data loader. - -You are free to create your own optimizer, and write the training logic: it's -usually easy with PyTorch, and allow researchers to see the entire training -logic more clearly and have full control. -One such example is provided in [tools/plain_train_net.py](../../tools/plain_train_net.py). - -We also provide a standarized "trainer" abstraction with a -[minimal hook system](../modules/engine.html#detectron2.engine.HookBase) -that helps simplify the standard types of training. - -You can use -[SimpleTrainer().train()](../modules/engine.html#detectron2.engine.SimpleTrainer) -which provides minimal abstraction for single-cost single-optimizer single-data-source training. -The builtin `train_net.py` script uses -[DefaultTrainer().train()](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer), -which includes more standard default behavior that one might want to opt in, -including default configurations for learning rate schedule, -logging, evaluation, checkpointing etc. -This also means that it's less likely to support some non-standard behavior -you might want during research. - -To customize the training loops, you can: - -1. If your customization is similar to what `DefaultTrainer` is already doing, -you can change behavior of `DefaultTrainer` by overwriting [its methods](../modules/engine.html#detectron2.engine.defaults.DefaultTrainer) -in a subclass, like what [tools/train_net.py](../../tools/train_net.py) does. -2. If you need something very novel, you can start from [tools/plain_train_net.py](../../tools/plain_train_net.py) to implement them yourself. - -### Logging of Metrics - -During training, metrics are saved to a centralized [EventStorage](../modules/utils.html#detectron2.utils.events.EventStorage). -You can use the following code to access it and log metrics to it: -``` -from detectron2.utils.events import get_event_storage - -# inside the model: -if self.training: - value = # compute the value from inputs - storage = get_event_storage() - storage.put_scalar("some_accuracy", value) -``` - -Refer to its documentation for more details. - -Metrics are then saved to various destinations with [EventWriter](../modules/utils.html#module-detectron2.utils.events). -DefaultTrainer enables a few `EventWriter` with default configurations. -See above for how to customize them. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md b/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md deleted file mode 100644 index bb87d58..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/docs/tutorials/write-models.md +++ /dev/null @@ -1,39 +0,0 @@ -# Write Models - -If you are trying to do something completely new, you may wish to implement -a model entirely from scratch within detectron2. However, in many situations you may -be interested in modifying or extending some components of an existing model. -Therefore, we also provide a registration mechanism that lets you override the -behavior of certain internal components of standard models. - -For example, to add a new backbone, import this code in your code: -```python -from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec - -@BACKBONE_REGISTRY.register() -class ToyBackBone(Backbone): - def __init__(self, cfg, input_shape): - # create your own backbone - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=16, padding=3) - - def forward(self, image): - return {"conv1": self.conv1(image)} - - def output_shape(self): - return {"conv1": ShapeSpec(channels=64, stride=16)} -``` -Then, you can use `cfg.MODEL.BACKBONE.NAME = 'ToyBackBone'` in your config object. -`build_model(cfg)` will then call your `ToyBackBone` instead. - -As another example, to add new abilities to the ROI heads in the Generalized R-CNN meta-architecture, -you can implement a new -[ROIHeads](../modules/modeling.html#detectron2.modeling.ROIHeads) subclass and put it in the `ROI_HEADS_REGISTRY`. -See [densepose in detectron2](../../projects/DensePose) -and [meshrcnn](https://github.com/facebookresearch/meshrcnn) -for examples that implement new ROIHeads to perform new tasks. -And [projects/](../../projects/) -contains more examples that implement different architectures. - -A complete list of registries can be found in [API documentation](../modules/modeling.html#model-registries). -You can register components in these registries to customize different parts of a model, or the -entire model. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md deleted file mode 100644 index fd2f1ee..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# DensePose in Detectron2 -**Dense Human Pose Estimation In The Wild** - -_Rıza Alp Güler, Natalia Neverova, Iasonas Kokkinos_ - -[[`densepose.org`](https://densepose.org)] [[`arXiv`](https://arxiv.org/abs/1802.00434)] [[`BibTeX`](#CitingDensePose)] - -Dense human pose estimation aims at mapping all human pixels of an RGB image to the 3D surface of the human body. - -
- -
- -In this repository, we provide the code to train and evaluate DensePose-RCNN. We also provide tools to visualize -DensePose annotation and results. - -# Quick Start - -See [ Getting Started ](doc/GETTING_STARTED.md) - -# Model Zoo and Baselines - -We provide a number of baseline results and trained models available for download. See [Model Zoo](doc/MODEL_ZOO.md) for details. - -# License - -Detectron2 is released under the [Apache 2.0 license](../../LICENSE) - -## Citing DensePose - -If you use DensePose, please take the references from the following BibTeX entries: - -For DensePose with estimated confidences: - -``` -@InProceedings{Neverova2019DensePoseConfidences, - title = {Correlated Uncertainty for Learning Dense Correspondences from Noisy Labels}, - author = {Neverova, Natalia and Novotny, David and Vedaldi, Andrea}, - journal = {Advances in Neural Information Processing Systems}, - year = {2019}, -} -``` - -For the original DensePose: - -``` -@InProceedings{Guler2018DensePose, - title={DensePose: Dense Human Pose Estimation In The Wild}, - author={R\{i}za Alp G\"uler, Natalia Neverova, Iasonas Kokkinos}, - journal={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, - year={2018} -} -``` - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py deleted file mode 100644 index 7262f7c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/apply_net.py +++ /dev/null @@ -1,318 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import argparse -import glob -import logging -import os -import pickle -import sys -from typing import Any, ClassVar, Dict, List -import torch - -from detectron2.config import get_cfg -from detectron2.data.detection_utils import read_image -from detectron2.engine.defaults import DefaultPredictor -from detectron2.structures.boxes import BoxMode -from detectron2.structures.instances import Instances -from detectron2.utils.logger import setup_logger - -from densepose import add_densepose_config -from densepose.utils.logger import verbosity_to_level -from densepose.vis.base import CompoundVisualizer -from densepose.vis.bounding_box import ScoredBoundingBoxVisualizer -from densepose.vis.densepose import ( - DensePoseResultsContourVisualizer, - DensePoseResultsFineSegmentationVisualizer, - DensePoseResultsUVisualizer, - DensePoseResultsVVisualizer, -) -from densepose.vis.extractor import CompoundExtractor, create_extractor - -DOC = """Apply Net - a tool to print / visualize DensePose results -""" - -LOGGER_NAME = "apply_net" -logger = logging.getLogger(LOGGER_NAME) - -_ACTION_REGISTRY: Dict[str, "Action"] = {} - - -class Action(object): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - parser.add_argument( - "-v", - "--verbosity", - action="count", - help="Verbose mode. Multiple -v options increase the verbosity.", - ) - - -def register_action(cls: type): - """ - Decorator for action classes to automate action registration - """ - global _ACTION_REGISTRY - _ACTION_REGISTRY[cls.COMMAND] = cls - return cls - - -class InferenceAction(Action): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(InferenceAction, cls).add_arguments(parser) - parser.add_argument("cfg", metavar="", help="Config file") - parser.add_argument("model", metavar="", help="Model file") - parser.add_argument("input", metavar="", help="Input data") - parser.add_argument( - "--opts", - help="Modify config options using the command-line 'KEY VALUE' pairs", - default=[], - nargs=argparse.REMAINDER, - ) - - @classmethod - def execute(cls: type, args: argparse.Namespace): - logger.info(f"Loading config from {args.cfg}") - opts = [] - cfg = cls.setup_config(args.cfg, args.model, args, opts) - logger.info(f"Loading model from {args.model}") - predictor = DefaultPredictor(cfg) - logger.info(f"Loading data from {args.input}") - file_list = cls._get_input_file_list(args.input) - if len(file_list) == 0: - logger.warning(f"No input images for {args.input}") - return - context = cls.create_context(args) - for file_name in file_list: - img = read_image(file_name, format="BGR") # predictor expects BGR image. - with torch.no_grad(): - outputs = predictor(img)["instances"] - cls.execute_on_outputs(context, {"file_name": file_name, "image": img}, outputs) - cls.postexecute(context) - - @classmethod - def setup_config( - cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] - ): - cfg = get_cfg() - add_densepose_config(cfg) - cfg.merge_from_file(config_fpath) - cfg.merge_from_list(args.opts) - if opts: - cfg.merge_from_list(opts) - cfg.MODEL.WEIGHTS = model_fpath - cfg.freeze() - return cfg - - @classmethod - def _get_input_file_list(cls: type, input_spec: str): - if os.path.isdir(input_spec): - file_list = [ - os.path.join(input_spec, fname) - for fname in os.listdir(input_spec) - if os.path.isfile(os.path.join(input_spec, fname)) - ] - elif os.path.isfile(input_spec): - file_list = [input_spec] - else: - file_list = glob.glob(input_spec) - return file_list - - -@register_action -class DumpAction(InferenceAction): - """ - Dump action that outputs results to a pickle file - """ - - COMMAND: ClassVar[str] = "dump" - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Dump model outputs to a file.") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(DumpAction, cls).add_arguments(parser) - parser.add_argument( - "--output", - metavar="", - default="results.pkl", - help="File name to save dump to", - ) - - @classmethod - def execute_on_outputs( - cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances - ): - image_fpath = entry["file_name"] - logger.info(f"Processing {image_fpath}") - result = {"file_name": image_fpath} - if outputs.has("scores"): - result["scores"] = outputs.get("scores").cpu() - if outputs.has("pred_boxes"): - result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() - if outputs.has("pred_densepose"): - boxes_XYWH = BoxMode.convert( - result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS - ) - result["pred_densepose"] = outputs.get("pred_densepose").to_result(boxes_XYWH) - context["results"].append(result) - - @classmethod - def create_context(cls: type, args: argparse.Namespace): - context = {"results": [], "out_fname": args.output} - return context - - @classmethod - def postexecute(cls: type, context: Dict[str, Any]): - out_fname = context["out_fname"] - out_dir = os.path.dirname(out_fname) - if len(out_dir) > 0 and not os.path.exists(out_dir): - os.makedirs(out_dir) - with open(out_fname, "wb") as hFile: - pickle.dump(context["results"], hFile) - logger.info(f"Output saved to {out_fname}") - - -@register_action -class ShowAction(InferenceAction): - """ - Show action that visualizes selected entries on an image - """ - - COMMAND: ClassVar[str] = "show" - VISUALIZERS: ClassVar[Dict[str, object]] = { - "dp_contour": DensePoseResultsContourVisualizer, - "dp_segm": DensePoseResultsFineSegmentationVisualizer, - "dp_u": DensePoseResultsUVisualizer, - "dp_v": DensePoseResultsVVisualizer, - "bbox": ScoredBoundingBoxVisualizer, - } - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(ShowAction, cls).add_arguments(parser) - parser.add_argument( - "visualizations", - metavar="", - help="Comma separated list of visualizations, possible values: " - "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), - ) - parser.add_argument( - "--min_score", - metavar="", - default=0.8, - type=float, - help="Minimum detection score to visualize", - ) - parser.add_argument( - "--nms_thresh", metavar="", default=None, type=float, help="NMS threshold" - ) - parser.add_argument( - "--output", - metavar="", - default="outputres.png", - help="File name to save output to", - ) - - @classmethod - def setup_config( - cls: type, config_fpath: str, model_fpath: str, args: argparse.Namespace, opts: List[str] - ): - opts.append("MODEL.ROI_HEADS.SCORE_THRESH_TEST") - opts.append(str(args.min_score)) - if args.nms_thresh is not None: - opts.append("MODEL.ROI_HEADS.NMS_THRESH_TEST") - opts.append(str(args.nms_thresh)) - cfg = super(ShowAction, cls).setup_config(config_fpath, model_fpath, args, opts) - return cfg - - @classmethod - def execute_on_outputs( - cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances - ): - import cv2 - import numpy as np - - visualizer = context["visualizer"] - extractor = context["extractor"] - image_fpath = entry["file_name"] - logger.info(f"Processing {image_fpath}") - image = cv2.cvtColor(entry["image"], cv2.COLOR_BGR2GRAY) - image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) - data = extractor(outputs) - image_vis = visualizer.visualize(image, data) - entry_idx = context["entry_idx"] + 1 - out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) - out_dir = os.path.dirname(out_fname) - if len(out_dir) > 0 and not os.path.exists(out_dir): - os.makedirs(out_dir) - cv2.imwrite(out_fname, image_vis) - logger.info(f"Output saved to {out_fname}") - context["entry_idx"] += 1 - - @classmethod - def postexecute(cls: type, context: Dict[str, Any]): - pass - - @classmethod - def _get_out_fname(cls: type, entry_idx: int, fname_base: str): - base, ext = os.path.splitext(fname_base) - return base + ".{0:04d}".format(entry_idx) + ext - - @classmethod - def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: - vis_specs = args.visualizations.split(",") - visualizers = [] - extractors = [] - for vis_spec in vis_specs: - vis = cls.VISUALIZERS[vis_spec]() - visualizers.append(vis) - extractor = create_extractor(vis) - extractors.append(extractor) - visualizer = CompoundVisualizer(visualizers) - extractor = CompoundExtractor(extractors) - context = { - "extractor": extractor, - "visualizer": visualizer, - "out_fname": args.output, - "entry_idx": 0, - } - return context - - -def create_argument_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=DOC, - formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), - ) - parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) - subparsers = parser.add_subparsers(title="Actions") - for _, action in _ACTION_REGISTRY.items(): - action.add_parser(subparsers) - return parser - - -def main(): - parser = create_argument_parser() - args = parser.parse_args() - verbosity = args.verbosity if hasattr(args, "verbosity") else None - global logger - logger = setup_logger(name=LOGGER_NAME) - logger.setLevel(verbosity_to_level(verbosity)) - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml deleted file mode 100644 index 3ed1bcd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/Base-DensePose-RCNN-FPN.yaml +++ /dev/null @@ -1,47 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - - DENSEPOSE_ON: True - ROI_HEADS: - NAME: "DensePoseROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - POOLER_SAMPLING_RATIO: 2 - POOLER_TYPE: "ROIAlign" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseV1ConvXHead" - POOLER_TYPE: "ROIAlign" - NUM_COARSE_SEGM_CHANNELS: 2 -DATASETS: - TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival") - TEST: ("densepose_coco_2014_minival",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.01 - STEPS: (60000, 80000) - MAX_ITER: 90000 - WARMUP_FACTOR: 0.1 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml deleted file mode 100644 index 15475b1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml deleted file mode 100644 index 7546b96..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml deleted file mode 100644 index 045f7f0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_DL_s1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml deleted file mode 100644 index ace6209..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml deleted file mode 100644 index 766c098..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml deleted file mode 100644 index af44fb7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml deleted file mode 100644 index 8e79a1b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_101_FPN_s1x_legacy.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - RESNETS: - DEPTH: 101 - ROI_DENSEPOSE_HEAD: - NUM_COARSE_SEGM_CHANNELS: 15 - POOLER_RESOLUTION: 14 - HEATMAP_SIZE: 56 - INDEX_WEIGHTS: 2.0 - PART_WEIGHTS: 0.3 - POINT_REGRESSION_WEIGHTS: 0.1 - DECODER_ON: False -SOLVER: - BASE_LR: 0.002 - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml deleted file mode 100644 index f3720ef..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml deleted file mode 100644 index 5a47cc0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml deleted file mode 100644 index 52a170b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_DL_s1x.yaml +++ /dev/null @@ -1,10 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml deleted file mode 100644 index d36e542..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC1_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml deleted file mode 100644 index e880d46..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_WC2_s1x.yaml +++ /dev/null @@ -1,16 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 130000 - STEPS: (100000, 120000) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml deleted file mode 100644 index d2dd14c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml deleted file mode 100644 index 6c5391f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/densepose_rcnn_R_50_FPN_s1x_legacy.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_BASE_: "Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - NUM_COARSE_SEGM_CHANNELS: 15 - POOLER_RESOLUTION: 14 - HEATMAP_SIZE: 56 - INDEX_WEIGHTS: 2.0 - PART_WEIGHTS: 0.3 - POINT_REGRESSION_WEIGHTS: 0.1 - DECODER_ON: False -SOLVER: - BASE_LR: 0.002 - MAX_ITER: 130000 - STEPS: (100000, 120000) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml deleted file mode 100644 index 5a20882..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/Base-RCNN-FPN-MC.yaml +++ /dev/null @@ -1,91 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map - ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) - RPN: - IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] - PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level - PRE_NMS_TOPK_TEST: 1000 # Per FPN level - # Detectron1 uses 2000 proposals per-batch, - # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) - # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. - POST_NMS_TOPK_TRAIN: 1000 - POST_NMS_TOPK_TEST: 1000 - ROI_HEADS: - NAME: "StandardROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - NUM_CLASSES: 1 - ROI_BOX_HEAD: - NAME: "FastRCNNConvFCHead" - NUM_FC: 2 - POOLER_RESOLUTION: 7 - ROI_MASK_HEAD: - NAME: "MaskRCNNConvUpsampleHead" - NUM_CONV: 4 - POOLER_RESOLUTION: 14 -DATASETS: - TRAIN: ("base_coco_2017_train",) - TEST: ("base_coco_2017_val", "densepose_chimps") - CATEGORY_MAPS: - "base_coco_2017_train": - "16": 1 # bird -> person - "17": 1 # cat -> person - "18": 1 # dog -> person - "19": 1 # horse -> person - "20": 1 # sheep -> person - "21": 1 # cow -> person - "22": 1 # elephant -> person - "23": 1 # bear -> person - "24": 1 # zebra -> person - "25": 1 # girafe -> person - "base_coco_2017_val": - "16": 1 # bird -> person - "17": 1 # cat -> person - "18": 1 # dog -> person - "19": 1 # horse -> person - "20": 1 # sheep -> person - "21": 1 # cow -> person - "22": 1 # elephant -> person - "23": 1 # bear -> person - "24": 1 # zebra -> person - "25": 1 # girafe -> person - WHITELISTED_CATEGORIES: - "base_coco_2017_train": - - 1 # person - - 16 # bird - - 17 # cat - - 18 # dog - - 19 # horse - - 20 # sheep - - 21 # cow - - 22 # elephant - - 23 # bear - - 24 # zebra - - 25 # girafe - "base_coco_2017_val": - - 1 # person - - 16 # bird - - 17 # cat - - 18 # dog - - 19 # horse - - 20 # sheep - - 21 # cow - - 22 # elephant - - 23 # bear - - 24 # zebra - - 25 # girafe -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml deleted file mode 100644 index 80139ad..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/evolution/faster_rcnn_R_50_FPN_1x_MC.yaml +++ /dev/null @@ -1,7 +0,0 @@ -_BASE_: "Base-RCNN-FPN-MC.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - DENSEPOSE_ON: False - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml deleted file mode 100644 index b90989e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_DL_instant_test.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_DENSEPOSE_HEAD: - NAME: "DensePoseDeepLabHead" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml deleted file mode 100644 index 7d41274..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_TTA_inference_acc_test.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" -DATASETS: - TRAIN: () - TEST: ("densepose_coco_2014_minival_100",) -TEST: - AUG: - ENABLED: True - MIN_SIZES: (400, 500, 600, 700, 800, 900, 1000, 1100, 1200) - MAX_SIZE: 4000 - FLIP: True - EXPECTED_RESULTS: [["bbox_TTA", "AP", 61.74, 0.03], ["densepose_gps_TTA", "AP", 60.22, 0.03], ["densepose_gpsm_TTA", "AP", 63.85, 0.03]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml deleted file mode 100644 index f0fe611..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC1_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "iid_iso" - POINT_REGRESSION_WEIGHTS: 0.0005 -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 40 - STEPS: (30,) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml deleted file mode 100644 index f0d9358..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_WC2_instant_test.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 - ROI_DENSEPOSE_HEAD: - UV_CONFIDENCE: - ENABLED: True - TYPE: "indep_aniso" - POINT_REGRESSION_WEIGHTS: 0.0005 -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - CLIP_GRADIENTS: - ENABLED: True - MAX_ITER: 40 - STEPS: (30,) - WARMUP_FACTOR: 0.025 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml deleted file mode 100644 index 3c5a7d2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_inference_acc_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -_BASE_: "../densepose_rcnn_R_50_FPN_s1x.yaml" -MODEL: - WEIGHTS: "https://dl.fbaipublicfiles.com/densepose/densepose_rcnn_R_50_FPN_s1x/165712039/model_final_162be9.pkl" -DATASETS: - TRAIN: () - TEST: ("densepose_coco_2014_minival_100",) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 59.27, 0.025], ["densepose_gps", "AP", 60.11, 0.02], ["densepose_gpsm", "AP", 64.20, 0.02]] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml deleted file mode 100644 index 057c876..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_instant_test.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" -DATASETS: - TRAIN: ("densepose_coco_2014_minival_100",) - TEST: ("densepose_coco_2014_minival_100",) -SOLVER: - MAX_ITER: 40 - STEPS: (30,) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml deleted file mode 100644 index b991160..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/configs/quick_schedules/densepose_rcnn_R_50_FPN_training_acc_test.yaml +++ /dev/null @@ -1,14 +0,0 @@ -_BASE_: "../Base-DensePose-RCNN-FPN.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - ROI_HEADS: - NUM_CLASSES: 1 -DATASETS: - TRAIN: ("densepose_coco_2014_minival",) - TEST: ("densepose_coco_2014_minival",) -SOLVER: - MAX_ITER: 6000 - STEPS: (5500, 5800) -TEST: - EXPECTED_RESULTS: [["bbox", "AP", 58.27, 1.0], ["densepose_gps", "AP", 42.47, 1.5], ["densepose_gpsm", "AP", 49.20, 1.5]] - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py deleted file mode 100644 index aea5a1a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .data.datasets import builtin # just to register data -from .config import add_densepose_config, add_dataset_category_config -from .densepose_head import ROI_DENSEPOSE_HEAD_REGISTRY -from .evaluator import DensePoseCOCOEvaluator -from .roi_head import DensePoseROIHeads -from .data.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData -from .modeling.test_time_augmentation import DensePoseGeneralizedRCNNWithTTA -from .utils.transform import load_from_cfg diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py deleted file mode 100644 index 2d76056..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/config.py +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding = utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from detectron2.config import CfgNode as CN - - -def add_dataset_category_config(cfg: CN): - """ - Add config for additional category-related dataset options - - category whitelisting - - category mapping - """ - _C = cfg - _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True) - _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True) - - -def add_densepose_config(cfg: CN): - """ - Add config for densepose head. - """ - _C = cfg - - _C.MODEL.DENSEPOSE_ON = True - - _C.MODEL.ROI_DENSEPOSE_HEAD = CN() - _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = "" - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8 - # Number of parts used for point labels - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4 - _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512 - _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3 - _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2 - _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112 - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2" - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28 - _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2 - _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2 - # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD) - _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7 - # Loss weights for annotation masks.(14 Parts) - _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0 - # Loss weights for surface parts. (24 Parts) - _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0 - # Loss weights for UV regression. - _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01 - # For Decoder - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256 - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = "" - _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4 - # For DeepLab head - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN() - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN" - _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0 - # Confidences - # Enable learning confidences (variances) along with the actual values - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False}) - # UV confidence lower bound - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01 - # Statistical model type for confidence learning, possible values: - # - "iid_iso": statistically independent identically distributed residuals - # with isotropic covariance - # - "indep_aniso": statistically independent residuals with anisotropic - # covariances - _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py deleted file mode 100644 index 5484f59..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from .build import build_detection_test_loader, build_detection_train_loader -from .dataset_mapper import DatasetMapper - -# ensure the builtin data are registered -from . import datasets - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py deleted file mode 100644 index c722ec1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/build.py +++ /dev/null @@ -1,405 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import itertools -import logging -import numpy as np -import operator -from typing import Any, Callable, Collection, Dict, Iterable, List, Optional -import torch - -from detectron2.config import CfgNode -from detectron2.data import samplers -from detectron2.data.build import ( - load_proposals_into_dataset, - print_instances_class_histogram, - trivial_batch_collator, - worker_init_reset_seed, -) -from detectron2.data.catalog import DatasetCatalog, MetadataCatalog -from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset -from detectron2.utils.comm import get_world_size - -from .dataset_mapper import DatasetMapper -from .datasets.coco import DENSEPOSE_KEYS_WITHOUT_MASK as DENSEPOSE_COCO_KEYS_WITHOUT_MASK -from .datasets.coco import DENSEPOSE_MASK_KEY as DENSEPOSE_COCO_MASK_KEY - -__all__ = ["build_detection_train_loader", "build_detection_test_loader"] - - -Instance = Dict[str, Any] -InstancePredicate = Callable[[Instance], bool] - - -def _compute_num_images_per_worker(cfg: CfgNode): - num_workers = get_world_size() - images_per_batch = cfg.SOLVER.IMS_PER_BATCH - assert ( - images_per_batch % num_workers == 0 - ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format( - images_per_batch, num_workers - ) - assert ( - images_per_batch >= num_workers - ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format( - images_per_batch, num_workers - ) - images_per_worker = images_per_batch // num_workers - return images_per_worker - - -def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]): - meta = MetadataCatalog.get(dataset_name) - for dataset_dict in dataset_dicts: - for ann in dataset_dict["annotations"]: - ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]] - - -def _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names: Iterable[str]): - # merge categories for all data - merged_categories = {} - for dataset_name in dataset_names: - meta = MetadataCatalog.get(dataset_name) - for cat_id, cat_name in meta.categories.items(): - if cat_id not in merged_categories: - merged_categories[cat_id] = (cat_name, dataset_name) - continue - cat_name_other, dataset_name_other = merged_categories[cat_id] - if cat_name_other != cat_name: - raise ValueError( - f"Incompatible categories for category ID {cat_id}: " - f'dataset {dataset_name} value "{cat_name}", ' - f'dataset {dataset_name_other} value "{cat_name_other}"' - ) - - merged_cat_id_to_cont_id = {} - for i, cat_id in enumerate(sorted(merged_categories.keys())): - merged_cat_id_to_cont_id[cat_id] = i - - # add category maps to metadata - for dataset_name in dataset_names: - meta = MetadataCatalog.get(dataset_name) - categories = meta.get("categories") - meta.thing_classes = [categories[cat_id] for cat_id in sorted(categories.keys())] - meta.thing_dataset_id_to_contiguous_id = { - cat_id: merged_cat_id_to_cont_id[cat_id] for cat_id in sorted(categories.keys()) - } - meta.thing_contiguous_id_to_dataset_id = { - merged_cat_id_to_cont_id[cat_id]: cat_id for cat_id in sorted(categories.keys()) - } - - -def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - def has_annotations(instance: Instance) -> bool: - return "annotations" in instance - - def has_only_crowd_anotations(instance: Instance) -> bool: - for ann in instance["annotations"]: - if ann.get("is_crowd", 0) == 0: - return False - return True - - def general_keep_instance_predicate(instance: Instance) -> bool: - return has_annotations(instance) and not has_only_crowd_anotations(instance) - - if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS: - return None - return general_keep_instance_predicate - - -def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - - min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE - - def has_sufficient_num_keypoints(instance: Instance) -> bool: - num_kpts = sum( - (np.array(ann["keypoints"][2::3]) > 0).sum() - for ann in instance["annotations"] - if "keypoints" in ann - ) - return num_kpts >= min_num_keypoints - - if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0): - return has_sufficient_num_keypoints - return None - - -def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - if not cfg.MODEL.MASK_ON: - return None - - def has_mask_annotations(instance: Instance) -> bool: - return any("segmentation" in ann for ann in instance["annotations"]) - - return has_mask_annotations - - -def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - if not cfg.MODEL.DENSEPOSE_ON: - return None - - def has_densepose_annotations(instance: Instance) -> bool: - for ann in instance["annotations"]: - if all(key in ann for key in DENSEPOSE_COCO_KEYS_WITHOUT_MASK) and ( - (DENSEPOSE_COCO_MASK_KEY in ann) or ("segmentation" in ann) - ): - return True - return False - - return has_densepose_annotations - - -def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]: - specific_predicate_creators = [ - _maybe_create_keypoints_keep_instance_predicate, - _maybe_create_mask_keep_instance_predicate, - _maybe_create_densepose_keep_instance_predicate, - ] - predicates = [creator(cfg) for creator in specific_predicate_creators] - predicates = [p for p in predicates if p is not None] - if not predicates: - return None - - def combined_predicate(instance: Instance) -> bool: - return any(p(instance) for p in predicates) - - return combined_predicate - - -def _get_train_keep_instance_predicate(cfg: CfgNode): - general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) - combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg) - - def combined_general_specific_keep_predicate(instance: Instance) -> bool: - return general_keep_predicate(instance) and combined_specific_keep_predicate(instance) - - if (general_keep_predicate is None) and (combined_specific_keep_predicate is None): - return None - if general_keep_predicate is None: - return combined_specific_keep_predicate - if combined_specific_keep_predicate is None: - return general_keep_predicate - return combined_general_specific_keep_predicate - - -def _get_test_keep_instance_predicate(cfg: CfgNode): - general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg) - return general_keep_predicate - - -def _maybe_filter_and_map_categories( - dataset_name: str, dataset_dicts: List[Instance] -) -> List[Instance]: - meta = MetadataCatalog.get(dataset_name) - whitelisted_categories = meta.get("whitelisted_categories") - category_map = meta.get("category_map", {}) - if whitelisted_categories is None and not category_map: - return dataset_dicts - filtered_dataset_dicts = [] - for dataset_dict in dataset_dicts: - anns = [] - for ann in dataset_dict["annotations"]: - cat_id = ann["category_id"] - if whitelisted_categories is not None and cat_id not in whitelisted_categories: - continue - ann["category_id"] = category_map.get(cat_id, cat_id) - anns.append(ann) - dataset_dict["annotations"] = anns - filtered_dataset_dicts.append(dataset_dict) - return filtered_dataset_dicts - - -def _add_category_whitelists_to_metadata(cfg: CfgNode): - for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items(): - meta = MetadataCatalog.get(dataset_name) - meta.whitelisted_categories = whitelisted_cat_ids - logger = logging.getLogger(__name__) - logger.info( - "Whitelisted categories for dataset {}: {}".format( - dataset_name, meta.whitelisted_categories - ) - ) - - -def _add_category_maps_to_metadata(cfg: CfgNode): - for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items(): - category_map = { - int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items() - } - meta = MetadataCatalog.get(dataset_name) - meta.category_map = category_map - logger = logging.getLogger(__name__) - logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map)) - - -def combine_detection_dataset_dicts( - dataset_names: Collection[str], - keep_instance_predicate: Optional[InstancePredicate] = None, - proposal_files: Optional[Collection[str]] = None, -) -> List[Instance]: - """ - Load and prepare dataset dicts for training / testing - - Args: - dataset_names (Collection[str]): a list of dataset names - keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate - applied to instance dicts which defines whether to keep the instance - proposal_files (Collection[str]): if given, a list of object proposal files - that match each dataset in `dataset_names`. - """ - assert len(dataset_names) - if proposal_files is None: - proposal_files = [None] * len(dataset_names) - assert len(dataset_names) == len(proposal_files) - # load annotations and dataset metadata - dataset_map = {} - for dataset_name in dataset_names: - dataset_dicts = DatasetCatalog.get(dataset_name) - dataset_map[dataset_name] = dataset_dicts - # initialize category maps - _add_category_id_to_contiguous_id_maps_to_metadata(dataset_names) - # apply category maps - all_datasets_dicts = [] - for dataset_name, proposal_file in zip(dataset_names, proposal_files): - dataset_dicts = dataset_map[dataset_name] - assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!" - if proposal_file is not None: - dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file) - dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts) - _map_category_id_to_contiguous_id(dataset_name, dataset_dicts) - print_instances_class_histogram( - dataset_dicts, MetadataCatalog.get(dataset_name).thing_classes - ) - all_datasets_dicts.append(dataset_dicts) - - if keep_instance_predicate is not None: - all_datasets_dicts_plain = [ - d - for d in itertools.chain.from_iterable(all_datasets_dicts) - if keep_instance_predicate(d) - ] - else: - all_datasets_dicts_plain = list(itertools.chain.from_iterable(all_datasets_dicts)) - return all_datasets_dicts_plain - - -def build_detection_train_loader(cfg: CfgNode, mapper=None): - """ - A data loader is created in a way similar to that of Detectron2. - The main differences are: - - it allows to combine data with different but compatible object category sets - - The data loader is created by the following steps: - 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts. - 2. Start workers to work on the dicts. Each worker will: - * Map each metadata dict into another format to be consumed by the model. - * Batch them by simply putting dicts into a list. - The batched ``list[mapped_dict]`` is what this dataloader will return. - - Args: - cfg (CfgNode): the config - mapper (callable): a callable which takes a sample (dict) from dataset and - returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, True)`. - - Returns: - an infinite iterator of training data - """ - images_per_worker = _compute_num_images_per_worker(cfg) - - _add_category_whitelists_to_metadata(cfg) - _add_category_maps_to_metadata(cfg) - dataset_dicts = combine_detection_dataset_dicts( - cfg.DATASETS.TRAIN, - keep_instance_predicate=_get_train_keep_instance_predicate(cfg), - proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, - ) - dataset = DatasetFromList(dataset_dicts, copy=False) - - if mapper is None: - mapper = DatasetMapper(cfg, True) - dataset = MapDataset(dataset, mapper) - - sampler_name = cfg.DATALOADER.SAMPLER_TRAIN - logger = logging.getLogger(__name__) - logger.info("Using training sampler {}".format(sampler_name)) - if sampler_name == "TrainingSampler": - sampler = samplers.TrainingSampler(len(dataset)) - elif sampler_name == "RepeatFactorTrainingSampler": - sampler = samplers.RepeatFactorTrainingSampler( - dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD - ) - else: - raise ValueError("Unknown training sampler: {}".format(sampler_name)) - - if cfg.DATALOADER.ASPECT_RATIO_GROUPING: - data_loader = torch.utils.data.DataLoader( - dataset, - sampler=sampler, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=None, - collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements - worker_init_fn=worker_init_reset_seed, - ) # yield individual mapped dict - data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker) - else: - batch_sampler = torch.utils.data.sampler.BatchSampler( - sampler, images_per_worker, drop_last=True - ) - # drop_last so the batch always have the same size - data_loader = torch.utils.data.DataLoader( - dataset, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=batch_sampler, - collate_fn=trivial_batch_collator, - worker_init_fn=worker_init_reset_seed, - ) - - return data_loader - - -def build_detection_test_loader(cfg, dataset_name, mapper=None): - """ - Similar to `build_detection_train_loader`. - But this function uses the given `dataset_name` argument (instead of the names in cfg), - and uses batch size 1. - - Args: - cfg: a detectron2 CfgNode - dataset_name (str): a name of the dataset that's available in the DatasetCatalog - mapper (callable): a callable which takes a sample (dict) from dataset - and returns the format to be consumed by the model. - By default it will be `DatasetMapper(cfg, False)`. - - Returns: - DataLoader: a torch DataLoader, that loads the given detection - dataset, with test-time transformation and batching. - """ - _add_category_whitelists_to_metadata(cfg) - _add_category_maps_to_metadata(cfg) - dataset_dicts = combine_detection_dataset_dicts( - [dataset_name], - keep_instance_predicate=_get_test_keep_instance_predicate(cfg), - proposal_files=[ - cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)] - ] - if cfg.MODEL.LOAD_PROPOSALS - else None, - ) - - dataset = DatasetFromList(dataset_dicts) - if mapper is None: - mapper = DatasetMapper(cfg, False) - dataset = MapDataset(dataset, mapper) - - sampler = samplers.InferenceSampler(len(dataset)) - # Always use 1 image per worker during inference since this is the - # standard when reporting inference time in papers. - batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False) - - data_loader = torch.utils.data.DataLoader( - dataset, - num_workers=cfg.DATALOADER.NUM_WORKERS, - batch_sampler=batch_sampler, - collate_fn=trivial_batch_collator, - ) - return data_loader diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py deleted file mode 100644 index f749767..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/dataset_mapper.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import copy -import torch -from fvcore.common.file_io import PathManager - -from detectron2.data import MetadataCatalog -from detectron2.data import detection_utils as utils -from detectron2.data import transforms as T - -from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData - - -class DatasetMapper: - """ - A customized version of `detectron2.data.DatasetMapper` - """ - - def __init__(self, cfg, is_train=True): - self.tfm_gens = utils.build_transform_gen(cfg, is_train) - - # fmt: off - self.img_format = cfg.INPUT.FORMAT - self.mask_on = cfg.MODEL.MASK_ON - self.keypoint_on = cfg.MODEL.KEYPOINT_ON - self.densepose_on = cfg.MODEL.DENSEPOSE_ON - assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet" - # fmt: on - if self.keypoint_on and is_train: - # Flip only makes sense in training - self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN) - else: - self.keypoint_hflip_indices = None - - if self.densepose_on: - densepose_transform_srcs = [ - MetadataCatalog.get(ds).densepose_transform_src - for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST - ] - assert len(densepose_transform_srcs) > 0 - # TODO: check that DensePose transformation data is the same for - # all the data. Otherwise one would have to pass DB ID with - # each entry to select proper transformation data. For now, since - # all DensePose annotated data uses the same data semantics, we - # omit this check. - densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0]) - self.densepose_transform_data = DensePoseTransformData.load( - densepose_transform_data_fpath - ) - - self.is_train = is_train - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - image = utils.read_image(dataset_dict["file_name"], format=self.img_format) - utils.check_image_size(dataset_dict, image) - - image, transforms = T.apply_transform_gens(self.tfm_gens, image) - image_shape = image.shape[:2] # h, w - dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) - - if not self.is_train: - dataset_dict.pop("annotations", None) - return dataset_dict - - for anno in dataset_dict["annotations"]: - if not self.mask_on: - anno.pop("segmentation", None) - if not self.keypoint_on: - anno.pop("keypoints", None) - - # USER: Implement additional transformations if you have other types of data - # USER: Don't call transpose_densepose if you don't need - annos = [ - self._transform_densepose( - utils.transform_instance_annotations( - obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices - ), - transforms, - ) - for obj in dataset_dict.pop("annotations") - if obj.get("iscrowd", 0) == 0 - ] - instances = utils.annotations_to_instances(annos, image_shape) - - if len(annos) and "densepose" in annos[0]: - gt_densepose = [obj["densepose"] for obj in annos] - instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape) - - dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] - return dataset_dict - - def _transform_densepose(self, annotation, transforms): - if not self.densepose_on: - return annotation - - # Handle densepose annotations - is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation) - if is_valid: - densepose_data = DensePoseDataRelative(annotation, cleanup=True) - densepose_data.apply_transform(transforms, self.densepose_transform_data) - annotation["densepose"] = densepose_data - else: - # logger = logging.getLogger(__name__) - # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid)) - DensePoseDataRelative.cleanup_annotation(annotation) - # NOTE: annotations for certain instances may be unavailable. - # 'None' is accepted by the DensePostList data structure. - annotation["densepose"] = None - return annotation diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py deleted file mode 100644 index 4a59d93..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from . import builtin # ensure the builtin data are registered - -__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py deleted file mode 100644 index e70f3d3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/builtin.py +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .coco import BASE_DATASETS as BASE_COCO_DATASETS -from .coco import DATASETS as COCO_DATASETS -from .coco import register_datasets as register_coco_datasets - -DEFAULT_DATASETS_ROOT = "data" - - -register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT) -register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py deleted file mode 100644 index 3a96474..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/datasets/coco.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import contextlib -import io -import logging -import os -from dataclasses import dataclass -from typing import Any, Dict, Iterable, List, Optional -from fvcore.common.file_io import PathManager -from fvcore.common.timer import Timer - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import BoxMode - -DENSEPOSE_MASK_KEY = "dp_masks" -DENSEPOSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"] -DENSEPOSE_KEYS = DENSEPOSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY] -DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/" - - -@dataclass -class CocoDatasetInfo: - name: str - images_root: str - annotations_fpath: str - - -DATASETS = [ - CocoDatasetInfo( - name="densepose_coco_2014_train", - images_root="coco/train2014", - annotations_fpath="coco/annotations/densepose_train2014.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_minival2014.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_minival_100", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_minival2014_100.json", - ), - CocoDatasetInfo( - name="densepose_coco_2014_valminusminival", - images_root="coco/val2014", - annotations_fpath="coco/annotations/densepose_valminusminival2014.json", - ), - CocoDatasetInfo( - name="densepose_chimps", - images_root="densepose_evolution/densepose_chimps", - annotations_fpath="densepose_evolution/annotations/densepose_chimps_densepose.json", - ), -] - - -BASE_DATASETS = [ - CocoDatasetInfo( - name="base_coco_2017_train", - images_root="coco/train2017", - annotations_fpath="coco/annotations/instances_train2017.json", - ), - CocoDatasetInfo( - name="base_coco_2017_val", - images_root="coco/val2017", - annotations_fpath="coco/annotations/instances_val2017.json", - ), - CocoDatasetInfo( - name="base_coco_2017_val_100", - images_root="coco/val2017", - annotations_fpath="coco/annotations/instances_val2017_100.json", - ), -] - - -def _is_relative_local_path(path: os.PathLike): - path_str = os.fsdecode(path) - return ("://" not in path_str) and not os.path.isabs(path) - - -def _maybe_prepend_base_path(base_path: Optional[os.PathLike], path: os.PathLike): - """ - Prepends the provided path with a base path prefix if: - 1) base path is not None; - 2) path is a local path - """ - if base_path is None: - return path - if _is_relative_local_path(path): - return os.path.join(base_path, path) - return path - - -def get_metadata(base_path: Optional[os.PathLike]) -> Dict[str, Any]: - """ - Returns metadata associated with COCO DensePose data - - Args: - base_path: Optional[os.PathLike] - Base path used to load metadata from - - Returns: - Dict[str, Any] - Metadata in the form of a dictionary - """ - meta = { - "densepose_transform_src": _maybe_prepend_base_path( - base_path, "UV_symmetry_transforms.mat" - ), - "densepose_smpl_subdiv": _maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"), - "densepose_smpl_subdiv_transform": _maybe_prepend_base_path( - base_path, "SMPL_SUBDIV_TRANSFORM.mat" - ), - } - return meta - - -def _load_coco_annotations(json_file: str): - """ - Load COCO annotations from a JSON file - - Args: - json_file: str - Path to the file to load annotations from - Returns: - Instance of `pycocotools.coco.COCO` that provides access to annotations - data - """ - from pycocotools.coco import COCO - - logger = logging.getLogger(__name__) - timer = Timer() - with contextlib.redirect_stdout(io.StringIO()): - coco_api = COCO(json_file) - if timer.seconds() > 1: - logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) - return coco_api - - -def _add_categories_metadata(dataset_name: str, categories: Dict[str, Any]): - meta = MetadataCatalog.get(dataset_name) - meta.categories = {c["id"]: c["name"] for c in categories} - logger = logging.getLogger(__name__) - logger.info("Dataset {} categories: {}".format(dataset_name, categories)) - - -def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]): - if "minival" in json_file: - # Skip validation on COCO2014 valminusminival and minival annotations - # The ratio of buggy annotations there is tiny and does not affect accuracy - # Therefore we explicitly white-list them - return - ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] - assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( - json_file - ) - - -def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "bbox" not in ann_dict: - return - obj["bbox"] = ann_dict["bbox"] - obj["bbox_mode"] = BoxMode.XYWH_ABS - - -def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "segmentation" not in ann_dict: - return - segm = ann_dict["segmentation"] - if not isinstance(segm, dict): - # filter out invalid polygons (< 3 points) - segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] - if len(segm) == 0: - return - obj["segmentation"] = segm - - -def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - if "keypoints" not in ann_dict: - return - keypts = ann_dict["keypoints"] # list[int] - for idx, v in enumerate(keypts): - if idx % 3 != 2: - # COCO's segmentation coordinates are floating points in [0, H or W], - # but keypoint coordinates are integers in [0, H-1 or W-1] - # Therefore we assume the coordinates are "pixel indices" and - # add 0.5 to convert to floating point coordinates. - keypts[idx] = v + 0.5 - obj["keypoints"] = keypts - - -def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]): - for key in DENSEPOSE_KEYS: - if key in ann_dict: - obj[key] = ann_dict[key] - - -def _combine_images_with_annotations( - dataset_name: str, - image_root: str, - img_datas: Iterable[Dict[str, Any]], - ann_datas: Iterable[Iterable[Dict[str, Any]]], -): - - ann_keys = ["iscrowd", "category_id"] - dataset_dicts = [] - - for img_dict, ann_dicts in zip(img_datas, ann_datas): - record = {} - record["file_name"] = os.path.join(image_root, img_dict["file_name"]) - record["height"] = img_dict["height"] - record["width"] = img_dict["width"] - record["image_id"] = img_dict["id"] - record["dataset"] = dataset_name - objs = [] - for ann_dict in ann_dicts: - assert ann_dict["image_id"] == record["image_id"] - assert ann_dict.get("ignore", 0) == 0 - obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict} - _maybe_add_bbox(obj, ann_dict) - _maybe_add_segm(obj, ann_dict) - _maybe_add_keypoints(obj, ann_dict) - _maybe_add_densepose(obj, ann_dict) - objs.append(obj) - record["annotations"] = objs - dataset_dicts.append(record) - return dataset_dicts - - -def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str): - """ - Loads a JSON file with annotations in COCO instances format. - Replaces `detectron2.data.data.coco.load_coco_json` to handle metadata - in a more flexible way. Postpones category mapping to a later stage to be - able to combine several data with different (but coherent) sets of - categories. - - Args: - - annotations_json_file: str - Path to the JSON file with annotations in COCO instances format. - image_root: str - directory that contains all the images - dataset_name: str - the name that identifies a dataset, e.g. "densepose_coco_2014_train" - extra_annotation_keys: Optional[List[str]] - If provided, these keys are used to extract additional data from - the annotations. - """ - coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file)) - _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds())) - # sort indices for reproducible results - img_ids = sorted(coco_api.imgs.keys()) - # imgs is a list of dicts, each looks something like: - # {'license': 4, - # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', - # 'file_name': 'COCO_val2014_000000001268.jpg', - # 'height': 427, - # 'width': 640, - # 'date_captured': '2013-11-17 05:57:24', - # 'id': 1268} - imgs = coco_api.loadImgs(img_ids) - logger = logging.getLogger(__name__) - logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file)) - # anns is a list[list[dict]], where each dict is an annotation - # record for an object. The inner list enumerates the objects in an image - # and the outer list enumerates over images. - anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] - _verify_annotations_have_unique_ids(annotations_json_file, anns) - dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns) - return dataset_records - - -def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[os.PathLike] = None): - """ - Registers provided COCO DensePose dataset - - Args: - dataset_data: CocoDatasetInfo - Dataset data - datasets_root: Optional[os.PathLike] - Datasets root folder (default: None) - """ - annotations_fpath = _maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath) - images_root = _maybe_prepend_base_path(datasets_root, dataset_data.images_root) - - def load_annotations(): - return load_coco_json( - annotations_json_file=annotations_fpath, - image_root=images_root, - dataset_name=dataset_data.name, - ) - - DatasetCatalog.register(dataset_data.name, load_annotations) - MetadataCatalog.get(dataset_data.name).set( - json_file=annotations_fpath, - image_root=images_root, - **get_metadata(DENSEPOSE_METADATA_URL_PREFIX) - ) - - -def register_datasets( - datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[os.PathLike] = None -): - """ - Registers provided COCO DensePose data - - Args: - datasets_data: Iterable[CocoDatasetInfo] - An iterable of dataset datas - datasets_root: Optional[os.PathLike] - Datasets root folder (default: None) - """ - for dataset_data in datasets_data: - register_dataset(dataset_data, datasets_root) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py deleted file mode 100644 index bbb950b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/data/structures.py +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import base64 -import numpy as np -from io import BytesIO -import torch -from PIL import Image -from torch.nn import functional as F - - -class DensePoseTransformData(object): - - # Horizontal symmetry label transforms used for horizontal flip - MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14] - # fmt: off - POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa - # fmt: on - - def __init__(self, uv_symmetries): - self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES - self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES - self.uv_symmetries = uv_symmetries - - @staticmethod - def load(fpath): - import scipy.io - - uv_symmetry_map = scipy.io.loadmat(fpath) - uv_symmetry_map_torch = {} - for key in ["U_transforms", "V_transforms"]: - uv_symmetry_map_torch[key] = [] - map_src = uv_symmetry_map[key] - map_dst = uv_symmetry_map_torch[key] - for i in range(map_src.shape[1]): - map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float)) - uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0).to( - device=torch.cuda.current_device() - ) - transform_data = DensePoseTransformData(uv_symmetry_map_torch) - return transform_data - - -class DensePoseDataRelative(object): - """ - Dense pose relative annotations that can be applied to any bounding box: - x - normalized X coordinates [0, 255] of annotated points - y - normalized Y coordinates [0, 255] of annotated points - i - body part labels 0,...,24 for annotated points - u - body part U coordinates [0, 1] for annotated points - v - body part V coordinates [0, 1] for annotated points - segm - 256x256 segmentation mask with values 0,...,14 - To obtain absolute x and y data wrt some bounding box one needs to first - divide the data by 256, multiply by the respective bounding box size - and add bounding box offset: - x_img = x0 + x_norm * w / 256.0 - y_img = y0 + y_norm * h / 256.0 - Segmentation masks are typically sampled to get image-based masks. - """ - - # Key for normalized X coordinates in annotation dict - X_KEY = "dp_x" - # Key for normalized Y coordinates in annotation dict - Y_KEY = "dp_y" - # Key for U part coordinates in annotation dict - U_KEY = "dp_U" - # Key for V part coordinates in annotation dict - V_KEY = "dp_V" - # Key for I point labels in annotation dict - I_KEY = "dp_I" - # Key for segmentation mask in annotation dict - S_KEY = "dp_masks" - # Number of body parts in segmentation masks - N_BODY_PARTS = 14 - # Number of parts in point labels - N_PART_LABELS = 24 - MASK_SIZE = 256 - - def __init__(self, annotation, cleanup=False): - is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation) - assert is_valid, "Invalid DensePose annotations: {}".format(reason_not_valid) - self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY]) - self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY]) - self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY]) - self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY]) - self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY]) - self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation) - self.device = torch.device("cpu") - if cleanup: - DensePoseDataRelative.cleanup_annotation(annotation) - - def to(self, device): - if self.device == device: - return self - new_data = DensePoseDataRelative.__new__(DensePoseDataRelative) - new_data.x = self.x - new_data.x = self.x.to(device) - new_data.y = self.y.to(device) - new_data.i = self.i.to(device) - new_data.u = self.u.to(device) - new_data.v = self.v.to(device) - new_data.segm = self.segm.to(device) - new_data.device = device - return new_data - - @staticmethod - def extract_segmentation_mask(annotation): - import pycocotools.mask as mask_utils - - poly_specs = annotation[DensePoseDataRelative.S_KEY] - segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32) - for i in range(DensePoseDataRelative.N_BODY_PARTS): - poly_i = poly_specs[i] - if poly_i: - mask_i = mask_utils.decode(poly_i) - segm[mask_i > 0] = i + 1 - return segm - - @staticmethod - def validate_annotation(annotation): - for key in [ - DensePoseDataRelative.X_KEY, - DensePoseDataRelative.Y_KEY, - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - DensePoseDataRelative.S_KEY, - ]: - if key not in annotation: - return False, "no {key} data in the annotation".format(key=key) - return True, None - - @staticmethod - def cleanup_annotation(annotation): - for key in [ - DensePoseDataRelative.X_KEY, - DensePoseDataRelative.Y_KEY, - DensePoseDataRelative.I_KEY, - DensePoseDataRelative.U_KEY, - DensePoseDataRelative.V_KEY, - DensePoseDataRelative.S_KEY, - ]: - if key in annotation: - del annotation[key] - - def apply_transform(self, transforms, densepose_transform_data): - self._transform_pts(transforms, densepose_transform_data) - self._transform_segm(transforms, densepose_transform_data) - - def _transform_pts(self, transforms, dp_transform_data): - import detectron2.data.transforms as T - - # NOTE: This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - if do_hflip: - self.x = self.segm.size(1) - self.x - self._flip_iuv_semantics(dp_transform_data) - - def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None: - i_old = self.i.clone() - uv_symmetries = dp_transform_data.uv_symmetries - pt_label_symmetries = dp_transform_data.point_label_symmetries - for i in range(self.N_PART_LABELS): - if i + 1 in i_old: - annot_indices_i = i_old == i + 1 - if pt_label_symmetries[i + 1] != i + 1: - self.i[annot_indices_i] = pt_label_symmetries[i + 1] - u_loc = (self.u[annot_indices_i] * 255).long() - v_loc = (self.v[annot_indices_i] * 255).long() - self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to( - device=self.u.device - ) - self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to( - device=self.v.device - ) - - def _transform_segm(self, transforms, dp_transform_data): - import detectron2.data.transforms as T - - # NOTE: This assumes that HorizFlipTransform is the only one that does flip - do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1 - if do_hflip: - self.segm = torch.flip(self.segm, [1]) - self._flip_segm_semantics(dp_transform_data) - - def _flip_segm_semantics(self, dp_transform_data): - old_segm = self.segm.clone() - mask_label_symmetries = dp_transform_data.mask_label_symmetries - for i in range(self.N_BODY_PARTS): - if mask_label_symmetries[i + 1] != i + 1: - self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1] - - -def normalized_coords_transform(x0, y0, w, h): - """ - Coordinates transform that maps top left corner to (-1, -1) and bottom - right corner to (1, 1). Used for torch.grid_sample to initialize the - grid - """ - - def f(p): - return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1) - - return f - - -class DensePoseOutput(object): - def __init__(self, S, I, U, V, confidences): - """ - Args: - S (`torch.Tensor`): coarse segmentation tensor of size (N, A, H, W) - I (`torch.Tensor`): fine segmentation tensor of size (N, C, H, W) - U (`torch.Tensor`): U coordinates for each fine segmentation label of size (N, C, H, W) - V (`torch.Tensor`): V coordinates for each fine segmentation label of size (N, C, H, W) - confidences (dict of str -> `torch.Tensor`) estimated confidence model parameters - """ - self.S = S - self.I = I # noqa: E741 - self.U = U - self.V = V - self.confidences = confidences - self._check_output_dims(S, I, U, V) - - def _check_output_dims(self, S, I, U, V): - assert ( - len(S.size()) == 4 - ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( - S.size() - ) - assert ( - len(I.size()) == 4 - ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( - S.size() - ) - assert ( - len(U.size()) == 4 - ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( - S.size() - ) - assert ( - len(V.size()) == 4 - ), "Segmentation output should have 4 " "dimensions (NCHW), but has size {}".format( - S.size() - ) - assert len(S) == len(I), ( - "Number of output segmentation planes {} " - "should be equal to the number of output part index " - "planes {}".format(len(S), len(I)) - ) - assert S.size()[2:] == I.size()[2:], ( - "Output segmentation plane size {} " - "should be equal to the output part index " - "plane size {}".format(S.size()[2:], I.size()[2:]) - ) - assert I.size() == U.size(), ( - "Part index output shape {} " - "should be the same as U coordinates output shape {}".format(I.size(), U.size()) - ) - assert I.size() == V.size(), ( - "Part index output shape {} " - "should be the same as V coordinates output shape {}".format(I.size(), V.size()) - ) - - def resize(self, image_size_hw): - # do nothing - outputs are invariant to resize - pass - - def _crop(self, S, I, U, V, bbox_old_xywh, bbox_new_xywh): - """ - Resample S, I, U, V from bbox_old to the cropped bbox_new - """ - x0old, y0old, wold, hold = bbox_old_xywh - x0new, y0new, wnew, hnew = bbox_new_xywh - tr_coords = normalized_coords_transform(x0old, y0old, wold, hold) - topleft = (x0new, y0new) - bottomright = (x0new + wnew, y0new + hnew) - topleft_norm = tr_coords(topleft) - bottomright_norm = tr_coords(bottomright) - hsize = S.size(1) - wsize = S.size(2) - grid = torch.meshgrid( - torch.arange( - topleft_norm[1], - bottomright_norm[1], - (bottomright_norm[1] - topleft_norm[1]) / hsize, - )[:hsize], - torch.arange( - topleft_norm[0], - bottomright_norm[0], - (bottomright_norm[0] - topleft_norm[0]) / wsize, - )[:wsize], - ) - grid = torch.stack(grid, dim=2).to(S.device) - assert ( - grid.size(0) == hsize - ), "Resampled grid expected " "height={}, actual height={}".format(hsize, grid.size(0)) - assert grid.size(1) == wsize, "Resampled grid expected " "width={}, actual width={}".format( - wsize, grid.size(1) - ) - S_new = F.grid_sample( - S.unsqueeze(0), - torch.unsqueeze(grid, 0), - mode="bilinear", - padding_mode="border", - align_corners=True, - ).squeeze(0) - I_new = F.grid_sample( - I.unsqueeze(0), - torch.unsqueeze(grid, 0), - mode="bilinear", - padding_mode="border", - align_corners=True, - ).squeeze(0) - U_new = F.grid_sample( - U.unsqueeze(0), - torch.unsqueeze(grid, 0), - mode="bilinear", - padding_mode="border", - align_corners=True, - ).squeeze(0) - V_new = F.grid_sample( - V.unsqueeze(0), - torch.unsqueeze(grid, 0), - mode="bilinear", - padding_mode="border", - align_corners=True, - ).squeeze(0) - return S_new, I_new, U_new, V_new - - def crop(self, indices_cropped, bboxes_old, bboxes_new): - """ - Crop outputs for selected bounding boxes to the new bounding boxes. - """ - # VK: cropping is ignored for now - # for i, ic in enumerate(indices_cropped): - # self.S[ic], self.I[ic], self.U[ic], self.V[ic] = \ - # self._crop(self.S[ic], self.I[ic], self.U[ic], self.V[ic], - # bboxes_old[i], bboxes_new[i]) - pass - - def hflip(self, transform_data: DensePoseTransformData) -> None: - """ - Change S, I, U and V to take into account a Horizontal flip. - """ - if self.I.shape[0] > 0: - for el in "SIUV": - self.__dict__[el] = torch.flip(self.__dict__[el], [3]) - self._flip_iuv_semantics_tensor(transform_data) - self._flip_segm_semantics_tensor(transform_data) - - def _flip_iuv_semantics_tensor(self, dp_transform_data: DensePoseTransformData) -> None: - point_label_symmetries = dp_transform_data.point_label_symmetries - uv_symmetries = dp_transform_data.uv_symmetries - - N, C, H, W = self.U.shape - u_loc = (self.U[:, 1:, :, :].clamp(0, 1) * 255).long() - v_loc = (self.V[:, 1:, :, :].clamp(0, 1) * 255).long() - Iindex = torch.arange(C - 1, device=self.U.device)[None, :, None, None].expand( - N, C - 1, H, W - ) - self.U[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc].to( - device=self.U.device - ) - self.V[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc].to( - device=self.V.device - ) - - for el in "IUV": - self.__dict__[el] = self.__dict__[el][:, point_label_symmetries, :, :] - - def _flip_segm_semantics_tensor(self, dp_transform_data): - if self.S.shape[1] == DensePoseDataRelative.N_BODY_PARTS + 1: - self.S = self.S[:, dp_transform_data.mask_label_symmetries, :, :] - - def to_result(self, boxes_xywh): - """ - Convert DensePose outputs to results format. Results are more compact, - but cannot be resampled any more - """ - result = DensePoseResult(boxes_xywh, self.S, self.I, self.U, self.V) - return result - - def __getitem__(self, item): - if isinstance(item, int): - S_selected = self.S[item].unsqueeze(0) - I_selected = self.I[item].unsqueeze(0) - U_selected = self.U[item].unsqueeze(0) - V_selected = self.V[item].unsqueeze(0) - conf_selected = {} - for key in self.confidences: - conf_selected[key] = self.confidences[key][item].unsqueeze(0) - else: - S_selected = self.S[item] - I_selected = self.I[item] - U_selected = self.U[item] - V_selected = self.V[item] - conf_selected = {} - for key in self.confidences: - conf_selected[key] = self.confidences[key][item] - return DensePoseOutput(S_selected, I_selected, U_selected, V_selected, conf_selected) - - def __str__(self): - s = "DensePoseOutput S {}, I {}, U {}, V {}".format( - list(self.S.size()), list(self.I.size()), list(self.U.size()), list(self.V.size()) - ) - s_conf = "confidences: [{}]".format( - ", ".join([f"{key} {list(self.confidences[key].size())}" for key in self.confidences]) - ) - return ", ".join([s, s_conf]) - - def __len__(self): - return self.S.size(0) - - -class DensePoseResult(object): - def __init__(self, boxes_xywh, S, I, U, V): - self.results = [] - self.boxes_xywh = boxes_xywh.cpu().tolist() - assert len(boxes_xywh.size()) == 2 - assert boxes_xywh.size(1) == 4 - for i, box_xywh in enumerate(boxes_xywh): - result_i = self._output_to_result(box_xywh, S[[i]], I[[i]], U[[i]], V[[i]]) - result_numpy_i = result_i.cpu().numpy() - result_encoded_i = DensePoseResult.encode_png_data(result_numpy_i) - result_encoded_with_shape_i = (result_numpy_i.shape, result_encoded_i) - self.results.append(result_encoded_with_shape_i) - - def __str__(self): - s = "DensePoseResult: N={} [{}]".format( - len(self.results), ", ".join([str(list(r[0])) for r in self.results]) - ) - return s - - def _output_to_result(self, box_xywh, S, I, U, V): - x, y, w, h = box_xywh - w = max(int(w), 1) - h = max(int(h), 1) - result = torch.zeros([3, h, w], dtype=torch.uint8, device=U.device) - assert ( - len(S.size()) == 4 - ), "AnnIndex tensor size should have {} " "dimensions but has {}".format(4, len(S.size())) - s_bbox = F.interpolate(S, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) - assert ( - len(I.size()) == 4 - ), "IndexUV tensor size should have {} " "dimensions but has {}".format(4, len(S.size())) - i_bbox = ( - F.interpolate(I, (h, w), mode="bilinear", align_corners=False).argmax(dim=1) - * (s_bbox > 0).long() - ).squeeze(0) - assert len(U.size()) == 4, "U tensor size should have {} " "dimensions but has {}".format( - 4, len(U.size()) - ) - u_bbox = F.interpolate(U, (h, w), mode="bilinear", align_corners=False) - assert len(V.size()) == 4, "V tensor size should have {} " "dimensions but has {}".format( - 4, len(V.size()) - ) - v_bbox = F.interpolate(V, (h, w), mode="bilinear", align_corners=False) - result[0] = i_bbox - for part_id in range(1, u_bbox.size(1)): - result[1][i_bbox == part_id] = ( - (u_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8) - ) - result[2][i_bbox == part_id] = ( - (v_bbox[0, part_id][i_bbox == part_id] * 255).clamp(0, 255).to(torch.uint8) - ) - assert ( - result.size(1) == h - ), "Results height {} should be equal" "to bounding box height {}".format(result.size(1), h) - assert ( - result.size(2) == w - ), "Results width {} should be equal" "to bounding box width {}".format(result.size(2), w) - return result - - @staticmethod - def encode_png_data(arr): - """ - Encode array data as a PNG image using the highest compression rate - @param arr [in] Data stored in an array of size (3, M, N) of type uint8 - @return Base64-encoded string containing PNG-compressed data - """ - assert len(arr.shape) == 3, "Expected a 3D array as an input," " got a {0}D array".format( - len(arr.shape) - ) - assert arr.shape[0] == 3, "Expected first array dimension of size 3," " got {0}".format( - arr.shape[0] - ) - assert arr.dtype == np.uint8, "Expected an array of type np.uint8, " " got {0}".format( - arr.dtype - ) - data = np.moveaxis(arr, 0, -1) - im = Image.fromarray(data) - fstream = BytesIO() - im.save(fstream, format="png", optimize=True) - s = base64.encodebytes(fstream.getvalue()).decode() - return s - - @staticmethod - def decode_png_data(shape, s): - """ - Decode array data from a string that contains PNG-compressed data - @param Base64-encoded string containing PNG-compressed data - @return Data stored in an array of size (3, M, N) of type uint8 - """ - fstream = BytesIO(base64.decodebytes(s.encode())) - im = Image.open(fstream) - data = np.moveaxis(np.array(im.getdata(), dtype=np.uint8), -1, 0) - return data.reshape(shape) - - def __len__(self): - return len(self.results) - - def __getitem__(self, item): - result_encoded = self.results[item] - bbox_xywh = self.boxes_xywh[item] - return result_encoded, bbox_xywh - - -class DensePoseList(object): - - _TORCH_DEVICE_CPU = torch.device("cpu") - - def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU): - assert len(densepose_datas) == len( - boxes_xyxy_abs - ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format( - len(densepose_datas), len(boxes_xyxy_abs) - ) - self.densepose_datas = [] - for densepose_data in densepose_datas: - assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, ( - "Attempt to initialize DensePoseList with DensePose datas " - "of type {}, expected DensePoseDataRelative".format(type(densepose_data)) - ) - densepose_data_ondevice = ( - densepose_data.to(device) if densepose_data is not None else None - ) - self.densepose_datas.append(densepose_data_ondevice) - self.boxes_xyxy_abs = boxes_xyxy_abs.to(device) - self.image_size_hw = image_size_hw - self.device = device - - def to(self, device): - if self.device == device: - return self - return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device) - - def __iter__(self): - return iter(self.densepose_datas) - - def __len__(self): - return len(self.densepose_datas) - - def __repr__(self): - s = self.__class__.__name__ + "(" - s += "num_instances={}, ".format(len(self.densepose_datas)) - s += "image_width={}, ".format(self.image_size_hw[1]) - s += "image_height={})".format(self.image_size_hw[0]) - return s - - def __getitem__(self, item): - if isinstance(item, int): - densepose_data_rel = self.densepose_datas[item] - return densepose_data_rel - elif isinstance(item, slice): - densepose_datas_rel = self.densepose_datas[item] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) - elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool): - densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) - else: - densepose_datas_rel = [self.densepose_datas[i] for i in item] - boxes_xyxy_abs = self.boxes_xyxy_abs[item] - return DensePoseList( - densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py deleted file mode 100644 index 489e7b0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_coco_evaluation.py +++ /dev/null @@ -1,1138 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# All rights reserved. -# -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. -# This is a modified version of cocoeval.py where we also have the densepose evaluation. - -__author__ = "tsungyi" - -import copy -import datetime -import itertools -import logging -import numpy as np -import pickle -import time -from collections import defaultdict -from enum import Enum -from typing import Any, Dict, Tuple -import scipy.spatial.distance as ssd -from fvcore.common.file_io import PathManager -from pycocotools import mask as maskUtils -from scipy.io import loadmat -from scipy.ndimage import zoom as spzoom - -from .data.structures import DensePoseDataRelative, DensePoseResult - -logger = logging.getLogger(__name__) - - -class DensePoseEvalMode(str, Enum): - # use both masks and geodesic distances (GPS * IOU) to compute scores - GPSM = "gpsm" - # use only geodesic distances (GPS) to compute scores - GPS = "gps" - # use only masks (IOU) to compute scores - IOU = "iou" - - -class DensePoseDataMode(str, Enum): - # use estimated IUV data (default mode) - IUV_DT = "iuvdt" - # use ground truth IUV data - IUV_GT = "iuvgt" - # use ground truth labels I and set UV to 0 - I_GT_UV_0 = "igtuv0" - # use ground truth labels I and estimated UV coordinates - I_GT_UV_DT = "igtuvdt" - # use estimated labels I and set UV to 0 - I_DT_UV_0 = "idtuv0" - - -class DensePoseCocoEval(object): - # Interface for evaluating detection on the Microsoft COCO dataset. - # - # The usage for CocoEval is as follows: - # cocoGt=..., cocoDt=... # load dataset and results - # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object - # E.params.recThrs = ...; # set parameters as desired - # E.evaluate(); # run per image evaluation - # E.accumulate(); # accumulate per image results - # E.summarize(); # display summary metrics of results - # For example usage see evalDemo.m and http://mscoco.org/. - # - # The evaluation parameters are as follows (defaults in brackets): - # imgIds - [all] N demo ids to use for evaluation - # catIds - [all] K cat ids to use for evaluation - # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation - # recThrs - [0:.01:1] R=101 recall thresholds for evaluation - # areaRng - [...] A=4 object area ranges for evaluation - # maxDets - [1 10 100] M=3 thresholds on max detections per image - # iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose' - # iouType replaced the now DEPRECATED useSegm parameter. - # useCats - [1] if true use category labels for evaluation - # Note: if useCats=0 category labels are ignored as in proposal scoring. - # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. - # - # evaluate(): evaluates detections on every image and every category and - # concats the results into the "evalImgs" with fields: - # dtIds - [1xD] id for each of the D detections (dt) - # gtIds - [1xG] id for each of the G ground truths (gt) - # dtMatches - [TxD] matching gt id at each IoU or 0 - # gtMatches - [TxG] matching dt id at each IoU or 0 - # dtScores - [1xD] confidence of each dt - # gtIgnore - [1xG] ignore flag for each gt - # dtIgnore - [TxD] ignore flag for each dt at each IoU - # - # accumulate(): accumulates the per-image, per-category evaluation - # results in "evalImgs" into the dictionary "eval" with fields: - # params - parameters used for evaluation - # date - date evaluation was performed - # counts - [T,R,K,A,M] parameter dimensions (see above) - # precision - [TxRxKxAxM] precision for every evaluation setting - # recall - [TxKxAxM] max recall for every evaluation setting - # Note: precision and recall==-1 for settings with no gt objects. - # - # See also coco, mask, pycocoDemo, pycocoEvalDemo - # - # Microsoft COCO Toolbox. version 2.0 - # Data, paper, and tutorials available at: http://mscoco.org/ - # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. - # Licensed under the Simplified BSD License [see coco/license.txt] - def __init__( - self, - cocoGt=None, - cocoDt=None, - iouType: str = "densepose", - dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS, - dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT, - ): - """ - Initialize CocoEval using coco APIs for gt and dt - :param cocoGt: coco object with ground truth annotations - :param cocoDt: coco object with detection results - :return: None - """ - self.cocoGt = cocoGt # ground truth COCO API - self.cocoDt = cocoDt # detections COCO API - self._dpEvalMode = dpEvalMode - self._dpDataMode = dpDataMode - self.params = {} # evaluation parameters - self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI] - self.eval = {} # accumulated evaluation results - self._gts = defaultdict(list) # gt for evaluation - self._dts = defaultdict(list) # dt for evaluation - self.params = Params(iouType=iouType) # parameters - self._paramsEval = {} # parameters for evaluation - self.stats = [] # result summarization - self.ious = {} # ious between all gts and dts - if cocoGt is not None: - self.params.imgIds = sorted(cocoGt.getImgIds()) - self.params.catIds = sorted(cocoGt.getCatIds()) - self.ignoreThrBB = 0.7 - self.ignoreThrUV = 0.9 - - def _loadGEval(self): - smpl_subdiv_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat" - ) - pdist_transform_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat" - ) - pdist_matrix_fpath = PathManager.get_local_path( - "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120 - ) - SMPL_subdiv = loadmat(smpl_subdiv_fpath) - self.PDIST_transform = loadmat(pdist_transform_fpath) - self.PDIST_transform = self.PDIST_transform["index"].squeeze() - UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze() - ClosestVertInds = np.arange(UV.shape[1]) + 1 - self.Part_UVs = [] - self.Part_ClosestVertInds = [] - for i in np.arange(24): - self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]) - self.Part_ClosestVertInds.append( - ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)] - ) - - with open(pdist_matrix_fpath, "rb") as hFile: - arrays = pickle.load(hFile, encoding="latin1") - self.Pdist_matrix = arrays["Pdist_matrix"] - self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze()) - # Mean geodesic distances for parts. - self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150]) - # Coarse Part labels. - self.CoarseParts = np.array( - [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8] - ) - - def _prepare(self): - """ - Prepare ._gts and ._dts for evaluation based on params - :return: None - """ - - def _toMask(anns, coco): - # modify ann['segmentation'] by reference - for ann in anns: - rle = coco.annToRLE(ann) - ann["segmentation"] = rle - - def _getIgnoreRegion(iid, coco): - img = coco.imgs[iid] - - if "ignore_regions_x" not in img.keys(): - return None - - if len(img["ignore_regions_x"]) == 0: - return None - - rgns_merged = [] - for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"]): - rgns = [iter(region_x), iter(region_y)] - rgns_merged.append([next(it) for it in itertools.cycle(rgns)]) - rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"]) - rle = maskUtils.merge(rles) - return maskUtils.decode(rle) - - def _checkIgnore(dt, iregion): - if iregion is None: - return True - - bb = np.array(dt["bbox"]).astype(np.int) - x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3] - x2 = min([x2, iregion.shape[1]]) - y2 = min([y2, iregion.shape[0]]) - - if bb[2] * bb[3] == 0: - return False - - crop_iregion = iregion[y1:y2, x1:x2] - - if crop_iregion.sum() == 0: - return True - - if "densepose" not in dt.keys(): # filtering boxes - return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB - - # filtering UVs - ignoremask = np.require(crop_iregion, requirements=["F"]) - mask = self._extract_mask(dt) - uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) - uvmask_ = maskUtils.encode(uvmask) - ignoremask_ = maskUtils.encode(ignoremask) - uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0] - return uviou < self.ignoreThrUV - - p = self.params - - if p.useCats: - gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) - dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) - else: - gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) - dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) - - imns = self.cocoGt.loadImgs(p.imgIds) - self.size_mapping = {} - for im in imns: - self.size_mapping[im["id"]] = [im["height"], im["width"]] - - # if iouType == 'uv', add point gt annotations - if p.iouType == "densepose": - self._loadGEval() - - # convert ground truth to mask if iouType == 'segm' - if p.iouType == "segm": - _toMask(gts, self.cocoGt) - _toMask(dts, self.cocoDt) - - # set ignore flag - for gt in gts: - gt["ignore"] = gt["ignore"] if "ignore" in gt else 0 - gt["ignore"] = "iscrowd" in gt and gt["iscrowd"] - if p.iouType == "keypoints": - gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"] - if p.iouType == "densepose": - gt["ignore"] = ("dp_x" in gt) == 0 - - self._gts = defaultdict(list) # gt for evaluation - self._dts = defaultdict(list) # dt for evaluation - self._igrgns = defaultdict(list) - - for gt in gts: - iid = gt["image_id"] - if iid not in self._igrgns.keys(): - self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt) - if _checkIgnore(gt, self._igrgns[iid]): - self._gts[iid, gt["category_id"]].append(gt) - for dt in dts: - iid = dt["image_id"] - if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]): - self._dts[iid, dt["category_id"]].append(dt) - - self.evalImgs = defaultdict(list) # per-image per-category evaluation results - self.eval = {} # accumulated evaluation results - - def evaluate(self): - """ - Run per image evaluation on given images and store results (a list of dict) in self.evalImgs - :return: None - """ - tic = time.time() - logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType)) - p = self.params - # add backward compatibility if useSegm is specified in params - if p.useSegm is not None: - p.iouType = "segm" if p.useSegm == 1 else "bbox" - logger.info("useSegm (deprecated) is not None. Running DensePose evaluation") - p.imgIds = list(np.unique(p.imgIds)) - if p.useCats: - p.catIds = list(np.unique(p.catIds)) - p.maxDets = sorted(p.maxDets) - self.params = p - - self._prepare() - # loop through images, area range, max detection number - catIds = p.catIds if p.useCats else [-1] - - if p.iouType in ["segm", "bbox"]: - computeIoU = self.computeIoU - elif p.iouType == "keypoints": - computeIoU = self.computeOks - elif p.iouType == "densepose": - computeIoU = self.computeOgps - if self._dpEvalMode == DensePoseEvalMode.GPSM: - self.real_ious = { - (imgId, catId): self.computeDPIoU(imgId, catId) - for imgId in p.imgIds - for catId in catIds - } - - self.ious = { - (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds - } - - evaluateImg = self.evaluateImg - maxDet = p.maxDets[-1] - self.evalImgs = [ - evaluateImg(imgId, catId, areaRng, maxDet) - for catId in catIds - for areaRng in p.areaRng - for imgId in p.imgIds - ] - self._paramsEval = copy.deepcopy(self.params) - toc = time.time() - logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic)) - - def getDensePoseMask(self, polys): - maskGen = np.zeros([256, 256]) - for i in range(1, 15): - if polys[i - 1]: - currentMask = maskUtils.decode(polys[i - 1]) - maskGen[currentMask > 0] = i - return maskGen - - def _generate_rlemask_on_image(self, mask, imgId, data): - bbox_xywh = np.array(data["bbox"]) - x, y, w, h = bbox_xywh - im_h, im_w = self.size_mapping[imgId] - im_mask = np.zeros((im_h, im_w), dtype=np.uint8) - if mask is not None: - x0 = max(int(x), 0) - x1 = min(int(x + w), im_w, int(x) + mask.shape[1]) - y0 = max(int(y), 0) - y1 = min(int(y + h), im_h, int(y) + mask.shape[0]) - y = int(y) - x = int(x) - im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x] - im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"]) - rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0] - return rle_mask - - def computeDPIoU(self, imgId, catId): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return [] - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - gtmasks = [] - for g in gt: - if DensePoseDataRelative.S_KEY in g: - mask = self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]) - _, _, w, h = g["bbox"] - scale_x = float(max(w, 1)) / mask.shape[1] - scale_y = float(max(h, 1)) / mask.shape[0] - mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False) - mask = np.array(mask > 0.5, dtype=np.uint8) - rle_mask = self._generate_rlemask_on_image(mask, imgId, g) - elif "segmentation" in g: - segmentation = g["segmentation"] - if isinstance(segmentation, list) and segmentation: - # polygons - im_h, im_w = self.size_mapping[imgId] - rles = maskUtils.frPyObjects(segmentation, im_h, im_w) - rle_mask = maskUtils.merge(rles) - elif isinstance(segmentation, dict): - if isinstance(segmentation["counts"], list): - # uncompressed RLE - im_h, im_w = self.size_mapping[imgId] - rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w) - else: - # compressed RLE - rle_mask = segmentation - else: - rle_mask = self._generate_rlemask_on_image(None, imgId, g) - else: - rle_mask = self._generate_rlemask_on_image(None, imgId, g) - gtmasks.append(rle_mask) - - dtmasks = [] - for d in dt: - mask = self._extract_mask(d) - mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"]) - rle_mask = self._generate_rlemask_on_image(mask, imgId, d) - dtmasks.append(rle_mask) - - # compute iou between each dt and gt region - iscrowd = [int(o["iscrowd"]) for o in gt] - iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd) - return iousDP - - def computeIoU(self, imgId, catId): - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return [] - inds = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in inds] - if len(dt) > p.maxDets[-1]: - dt = dt[0 : p.maxDets[-1]] - - if p.iouType == "segm": - g = [g["segmentation"] for g in gt] - d = [d["segmentation"] for d in dt] - elif p.iouType == "bbox": - g = [g["bbox"] for g in gt] - d = [d["bbox"] for d in dt] - else: - raise Exception("unknown iouType for iou computation") - - # compute iou between each dt and gt region - iscrowd = [int(o["iscrowd"]) for o in gt] - ious = maskUtils.iou(d, g, iscrowd) - return ious - - def computeOks(self, imgId, catId): - p = self.params - # dimension here should be Nxm - gts = self._gts[imgId, catId] - dts = self._dts[imgId, catId] - inds = np.argsort([-d["score"] for d in dts], kind="mergesort") - dts = [dts[i] for i in inds] - if len(dts) > p.maxDets[-1]: - dts = dts[0 : p.maxDets[-1]] - # if len(gts) == 0 and len(dts) == 0: - if len(gts) == 0 or len(dts) == 0: - return [] - ious = np.zeros((len(dts), len(gts))) - sigmas = ( - np.array( - [ - 0.26, - 0.25, - 0.25, - 0.35, - 0.35, - 0.79, - 0.79, - 0.72, - 0.72, - 0.62, - 0.62, - 1.07, - 1.07, - 0.87, - 0.87, - 0.89, - 0.89, - ] - ) - / 10.0 - ) - vars = (sigmas * 2) ** 2 - k = len(sigmas) - # compute oks between each detection and ground truth object - for j, gt in enumerate(gts): - # create bounds for ignore regions(double the gt bbox) - g = np.array(gt["keypoints"]) - xg = g[0::3] - yg = g[1::3] - vg = g[2::3] - k1 = np.count_nonzero(vg > 0) - bb = gt["bbox"] - x0 = bb[0] - bb[2] - x1 = bb[0] + bb[2] * 2 - y0 = bb[1] - bb[3] - y1 = bb[1] + bb[3] * 2 - for i, dt in enumerate(dts): - d = np.array(dt["keypoints"]) - xd = d[0::3] - yd = d[1::3] - if k1 > 0: - # measure the per-keypoint distance if keypoints visible - dx = xd - xg - dy = yd - yg - else: - # measure minimum distance to keypoints in (x0,y0) & (x1,y1) - z = np.zeros(k) - dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0) - dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0) - e = (dx ** 2 + dy ** 2) / vars / (gt["area"] + np.spacing(1)) / 2 - if k1 > 0: - e = e[vg > 0] - ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] - return ious - - def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray: - (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt["densepose"] - densepose_data = DensePoseResult.decode_png_data(densepose_shape, densepose_data_encoded) - return densepose_data[0] - - def _extract_iuv( - self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any] - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """ - Extract arrays of I, U and V values at given points as numpy arrays - given the data mode stored in self._dpDataMode - """ - if self._dpDataMode == DensePoseDataMode.IUV_DT: - # estimated labels and UV (default) - ipoints = densepose_data[0, py, px] - upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. - vpoints = densepose_data[2, py, px] / 255.0 - elif self._dpDataMode == DensePoseDataMode.IUV_GT: - # ground truth - ipoints = np.array(gt["dp_I"]) - upoints = np.array(gt["dp_U"]) - vpoints = np.array(gt["dp_V"]) - elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0: - # ground truth labels, UV = 0 - ipoints = np.array(gt["dp_I"]) - upoints = upoints * 0.0 - vpoints = vpoints * 0.0 - elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT: - # ground truth labels, estimated UV - ipoints = np.array(gt["dp_I"]) - upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255. - vpoints = densepose_data[2, py, px] / 255.0 - elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0: - # estimated labels, UV = 0 - ipoints = densepose_data[0, py, px] - upoints = upoints * 0.0 - vpoints = vpoints * 0.0 - else: - raise ValueError(f"Unknown data mode: {self._dpDataMode}") - return ipoints, upoints, vpoints - - def computeOgps(self, imgId, catId): - p = self.params - # dimension here should be Nxm - g = self._gts[imgId, catId] - d = self._dts[imgId, catId] - inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort") - d = [d[i] for i in inds] - if len(d) > p.maxDets[-1]: - d = d[0 : p.maxDets[-1]] - # if len(gts) == 0 and len(dts) == 0: - if len(g) == 0 or len(d) == 0: - return [] - ious = np.zeros((len(d), len(g))) - # compute opgs between each detection and ground truth object - # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5 - # 1 # dist = 0.3m corresponds to ogps = 0.96 - # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5) - for j, gt in enumerate(g): - if not gt["ignore"]: - g_ = gt["bbox"] - for i, dt in enumerate(d): - # - dy = int(dt["bbox"][3]) - dx = int(dt["bbox"][2]) - dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0 - dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0 - py = (dp_y + g_[1] - dt["bbox"][1]).astype(np.int) - px = (dp_x + g_[0] - dt["bbox"][0]).astype(np.int) - # - pts = np.zeros(len(px)) - pts[px >= dx] = -1 - pts[py >= dy] = -1 - pts[px < 0] = -1 - pts[py < 0] = -1 - if len(pts) < 1: - ogps = 0.0 - elif np.max(pts) == -1: - ogps = 0.0 - else: - px[pts == -1] = 0 - py[pts == -1] = 0 - (densepose_shape, densepose_data_encoded), densepose_bbox_xywh = dt[ - "densepose" - ] - densepose_data = DensePoseResult.decode_png_data( - densepose_shape, densepose_data_encoded - ) - assert densepose_data.shape[2] == dx, ( - "DensePoseData width {} should be equal to " - "detection bounding box width {}".format(densepose_data.shape[2], dx) - ) - assert densepose_data.shape[1] == dy, ( - "DensePoseData height {} should be equal to " - "detection bounding box height {}".format(densepose_data.shape[1], dy) - ) - ipoints, upoints, vpoints = self._extract_iuv(densepose_data, py, px, gt) - ipoints[pts == -1] = 0 - # Find closest vertices in subsampled mesh. - cVerts, cVertsGT = self.findAllClosestVerts(gt, upoints, vpoints, ipoints) - # Get pairwise geodesic distances between gt and estimated mesh points. - dist = self.getDistances(cVertsGT, cVerts) - # Compute the Ogps measure. - # Find the mean geodesic normalization distance for - # each GT point, based on which part it is on. - Current_Mean_Distances = self.Mean_Distances[ - self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]] - ] - # Compute gps - ogps_values = np.exp(-(dist ** 2) / (2 * (Current_Mean_Distances ** 2))) - # - if len(dist) > 0: - ogps = np.sum(ogps_values) / len(dist) - ious[i, j] = ogps - - gbb = [gt["bbox"] for gt in g] - dbb = [dt["bbox"] for dt in d] - - # compute iou between each dt and gt region - iscrowd = [int(o["iscrowd"]) for o in g] - ious_bb = maskUtils.iou(dbb, gbb, iscrowd) - return ious, ious_bb - - def evaluateImg(self, imgId, catId, aRng, maxDet): - """ - perform evaluation for single category and image - :return: dict (single image results) - """ - - p = self.params - if p.useCats: - gt = self._gts[imgId, catId] - dt = self._dts[imgId, catId] - else: - gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] - dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] - if len(gt) == 0 and len(dt) == 0: - return None - - for g in gt: - # g['_ignore'] = g['ignore'] - if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]): - g["_ignore"] = True - else: - g["_ignore"] = False - - # sort dt highest score first, sort gt ignore last - gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort") - gt = [gt[i] for i in gtind] - dtind = np.argsort([-d["score"] for d in dt], kind="mergesort") - dt = [dt[i] for i in dtind[0:maxDet]] - iscrowd = [int(o["iscrowd"]) for o in gt] - # load computed ious - if p.iouType == "densepose": - # print('Checking the length', len(self.ious[imgId, catId])) - # if len(self.ious[imgId, catId]) == 0: - # print(self.ious[imgId, catId]) - ious = ( - self.ious[imgId, catId][0][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - ioubs = ( - self.ious[imgId, catId][1][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - if self._dpEvalMode == DensePoseEvalMode.GPSM: - iousM = ( - self.real_ious[imgId, catId][:, gtind] - if len(self.real_ious[imgId, catId]) > 0 - else self.real_ious[imgId, catId] - ) - else: - ious = ( - self.ious[imgId, catId][:, gtind] - if len(self.ious[imgId, catId]) > 0 - else self.ious[imgId, catId] - ) - - T = len(p.iouThrs) - G = len(gt) - D = len(dt) - gtm = np.zeros((T, G)) - dtm = np.zeros((T, D)) - gtIg = np.array([g["_ignore"] for g in gt]) - dtIg = np.zeros((T, D)) - if np.all(gtIg) and p.iouType == "densepose": - dtIg = np.logical_or(dtIg, True) - - if len(ious) > 0: # and not p.iouType == 'densepose': - for tind, t in enumerate(p.iouThrs): - for dind, d in enumerate(dt): - # information about best match so far (m=-1 -> unmatched) - iou = min([t, 1 - 1e-10]) - m = -1 - for gind, _g in enumerate(gt): - # if this gt already matched, and not a crowd, continue - if gtm[tind, gind] > 0 and not iscrowd[gind]: - continue - # if dt matched to reg gt, and on ignore gt, stop - if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1: - break - if p.iouType == "densepose": - if self._dpEvalMode == DensePoseEvalMode.GPSM: - new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind]) - elif self._dpEvalMode == DensePoseEvalMode.IOU: - new_iou = iousM[dind, gind] - elif self._dpEvalMode == DensePoseEvalMode.GPS: - new_iou = ious[dind, gind] - else: - new_iou = ious[dind, gind] - if new_iou < iou: - continue - if new_iou == 0.0: - continue - # if match successful and best so far, store appropriately - iou = new_iou - m = gind - # if match made store id of match for both dt and gt - if m == -1: - continue - dtIg[tind, dind] = gtIg[m] - dtm[tind, dind] = gt[m]["id"] - gtm[tind, m] = d["id"] - - if p.iouType == "densepose": - if not len(ioubs) == 0: - for dind, d in enumerate(dt): - # information about best match so far (m=-1 -> unmatched) - if dtm[tind, dind] == 0: - ioub = 0.8 - m = -1 - for gind, _g in enumerate(gt): - # if this gt already matched, and not a crowd, continue - if gtm[tind, gind] > 0 and not iscrowd[gind]: - continue - # continue to next gt unless better match made - if ioubs[dind, gind] < ioub: - continue - # if match successful and best so far, store appropriately - ioub = ioubs[dind, gind] - m = gind - # if match made store id of match for both dt and gt - if m > -1: - dtIg[:, dind] = gtIg[m] - if gtIg[m]: - dtm[tind, dind] = gt[m]["id"] - gtm[tind, m] = d["id"] - # set unmatched detections outside of area range to ignore - a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt))) - dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0))) - # store results for given image and category - # print('Done with the function', len(self.ious[imgId, catId])) - return { - "image_id": imgId, - "category_id": catId, - "aRng": aRng, - "maxDet": maxDet, - "dtIds": [d["id"] for d in dt], - "gtIds": [g["id"] for g in gt], - "dtMatches": dtm, - "gtMatches": gtm, - "dtScores": [d["score"] for d in dt], - "gtIgnore": gtIg, - "dtIgnore": dtIg, - } - - def accumulate(self, p=None): - """ - Accumulate per image evaluation results and store the result in self.eval - :param p: input params for evaluation - :return: None - """ - logger.info("Accumulating evaluation results...") - tic = time.time() - if not self.evalImgs: - logger.info("Please run evaluate() first") - # allows input customized parameters - if p is None: - p = self.params - p.catIds = p.catIds if p.useCats == 1 else [-1] - T = len(p.iouThrs) - R = len(p.recThrs) - K = len(p.catIds) if p.useCats else 1 - A = len(p.areaRng) - M = len(p.maxDets) - precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories - recall = -(np.ones((T, K, A, M))) - - # create dictionary for future indexing - logger.info("Categories: {}".format(p.catIds)) - _pe = self._paramsEval - catIds = _pe.catIds if _pe.useCats else [-1] - setK = set(catIds) - setA = set(map(tuple, _pe.areaRng)) - setM = set(_pe.maxDets) - setI = set(_pe.imgIds) - # get inds to evaluate - k_list = [n for n, k in enumerate(p.catIds) if k in setK] - m_list = [m for n, m in enumerate(p.maxDets) if m in setM] - a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] - i_list = [n for n, i in enumerate(p.imgIds) if i in setI] - I0 = len(_pe.imgIds) - A0 = len(_pe.areaRng) - # retrieve E at each category, area range, and max number of detections - for k, k0 in enumerate(k_list): - Nk = k0 * A0 * I0 - for a, a0 in enumerate(a_list): - Na = a0 * I0 - for m, maxDet in enumerate(m_list): - E = [self.evalImgs[Nk + Na + i] for i in i_list] - E = [e for e in E if e is not None] - if len(E) == 0: - continue - dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) - - # different sorting method generates slightly different results. - # mergesort is used to be consistent as Matlab implementation. - inds = np.argsort(-dtScores, kind="mergesort") - - dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] - dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] - gtIg = np.concatenate([e["gtIgnore"] for e in E]) - npig = np.count_nonzero(gtIg == 0) - if npig == 0: - continue - tps = np.logical_and(dtm, np.logical_not(dtIg)) - fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) - tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) - fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) - for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): - tp = np.array(tp) - fp = np.array(fp) - nd = len(tp) - rc = tp / npig - pr = tp / (fp + tp + np.spacing(1)) - q = np.zeros((R,)) - - if nd: - recall[t, k, a, m] = rc[-1] - else: - recall[t, k, a, m] = 0 - - # numpy is slow without cython optimization for accessing elements - # use python array gets significant speed improvement - pr = pr.tolist() - q = q.tolist() - - for i in range(nd - 1, 0, -1): - if pr[i] > pr[i - 1]: - pr[i - 1] = pr[i] - - inds = np.searchsorted(rc, p.recThrs, side="left") - try: - for ri, pi in enumerate(inds): - q[ri] = pr[pi] - except Exception: - pass - precision[t, :, k, a, m] = np.array(q) - logger.info( - "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision)) - ) - self.eval = { - "params": p, - "counts": [T, R, K, A, M], - "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), - "precision": precision, - "recall": recall, - } - toc = time.time() - logger.info("DONE (t={:0.2f}s).".format(toc - tic)) - - def summarize(self): - """ - Compute and display summary metrics for evaluation results. - Note this function can *only* be applied on the default parameter setting - """ - - def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100): - p = self.params - iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}" - titleStr = "Average Precision" if ap == 1 else "Average Recall" - typeStr = "(AP)" if ap == 1 else "(AR)" - measure = "IoU" - if self.params.iouType == "keypoints": - measure = "OKS" - elif self.params.iouType == "densepose": - measure = "OGPS" - iouStr = ( - "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1]) - if iouThr is None - else "{:0.2f}".format(iouThr) - ) - - aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] - mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] - if ap == 1: - # dimension of precision: [TxRxKxAxM] - s = self.eval["precision"] - # IoU - if iouThr is not None: - t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0] - s = s[t] - s = s[:, :, :, aind, mind] - else: - # dimension of recall: [TxKxAxM] - s = self.eval["recall"] - if iouThr is not None: - t = np.where(iouThr == p.iouThrs)[0] - s = s[t] - s = s[:, :, aind, mind] - if len(s[s > -1]) == 0: - mean_s = -1 - else: - mean_s = np.mean(s[s > -1]) - logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s)) - return mean_s - - def _summarizeDets(): - stats = np.zeros((12,)) - stats[0] = _summarize(1) - stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2]) - stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2]) - stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2]) - stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2]) - stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) - stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) - stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2]) - stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2]) - stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2]) - return stats - - def _summarizeKps(): - stats = np.zeros((10,)) - stats[0] = _summarize(1, maxDets=20) - stats[1] = _summarize(1, maxDets=20, iouThr=0.5) - stats[2] = _summarize(1, maxDets=20, iouThr=0.75) - stats[3] = _summarize(1, maxDets=20, areaRng="medium") - stats[4] = _summarize(1, maxDets=20, areaRng="large") - stats[5] = _summarize(0, maxDets=20) - stats[6] = _summarize(0, maxDets=20, iouThr=0.5) - stats[7] = _summarize(0, maxDets=20, iouThr=0.75) - stats[8] = _summarize(0, maxDets=20, areaRng="medium") - stats[9] = _summarize(0, maxDets=20, areaRng="large") - return stats - - def _summarizeUvs(): - stats = np.zeros((10,)) - stats[0] = _summarize(1, maxDets=self.params.maxDets[0]) - stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[3] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium") - stats[4] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large") - stats[5] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[6] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[7] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[8] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium") - stats[9] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large") - return stats - - def _summarizeUvsOld(): - stats = np.zeros((18,)) - stats[0] = _summarize(1, maxDets=self.params.maxDets[0]) - stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55) - stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60) - stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65) - stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70) - stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80) - stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85) - stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90) - stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95) - stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium") - stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large") - stats[13] = _summarize(0, maxDets=self.params.maxDets[0]) - stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5) - stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75) - stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium") - stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large") - return stats - - if not self.eval: - raise Exception("Please run accumulate() first") - iouType = self.params.iouType - if iouType in ["segm", "bbox"]: - summarize = _summarizeDets - elif iouType in ["keypoints"]: - summarize = _summarizeKps - elif iouType in ["densepose"]: - summarize = _summarizeUvs - self.stats = summarize() - - def __str__(self): - self.summarize() - - # ================ functions for dense pose ============================== - def findAllClosestVerts(self, gt, U_points, V_points, Index_points): - # - I_gt = np.array(gt["dp_I"]) - U_gt = np.array(gt["dp_U"]) - V_gt = np.array(gt["dp_V"]) - # - # print(I_gt) - # - ClosestVerts = np.ones(Index_points.shape) * -1 - for i in np.arange(24): - # - if sum(Index_points == (i + 1)) > 0: - UVs = np.array( - [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]] - ) - Current_Part_UVs = self.Part_UVs[i] - Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] - D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() - ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[ - np.argmin(D, axis=0) - ] - # - ClosestVertsGT = np.ones(Index_points.shape) * -1 - for i in np.arange(24): - if sum(I_gt == (i + 1)) > 0: - UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]]) - Current_Part_UVs = self.Part_UVs[i] - Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i] - D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze() - ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)] - # - return ClosestVerts, ClosestVertsGT - - def getDistances(self, cVertsGT, cVerts): - - ClosestVertsTransformed = self.PDIST_transform[cVerts.astype(int) - 1] - ClosestVertsGTTransformed = self.PDIST_transform[cVertsGT.astype(int) - 1] - # - ClosestVertsTransformed[cVerts < 0] = 0 - ClosestVertsGTTransformed[cVertsGT < 0] = 0 - # - cVertsGT = ClosestVertsGTTransformed - cVerts = ClosestVertsTransformed - # - n = 27554 - dists = [] - for d in range(len(cVertsGT)): - if cVertsGT[d] > 0: - if cVerts[d] > 0: - i = cVertsGT[d] - 1 - j = cVerts[d] - 1 - if j == i: - dists.append(0) - elif j > i: - ccc = i - i = j - j = ccc - i = n - i - 1 - j = n - j - 1 - k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 - k = (n * n - n) / 2 - k - 1 - dists.append(self.Pdist_matrix[int(k)][0]) - else: - i = n - i - 1 - j = n - j - 1 - k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1 - k = (n * n - n) / 2 - k - 1 - dists.append(self.Pdist_matrix[int(k)][0]) - else: - dists.append(np.inf) - return np.atleast_1d(np.array(dists).squeeze()) - - -class Params: - """ - Params for coco evaluation api - """ - - def setDetParams(self): - self.imgIds = [] - self.catIds = [] - # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True) - self.maxDets = [1, 10, 100] - self.areaRng = [ - [0 ** 2, 1e5 ** 2], - [0 ** 2, 32 ** 2], - [32 ** 2, 96 ** 2], - [96 ** 2, 1e5 ** 2], - ] - self.areaRngLbl = ["all", "small", "medium", "large"] - self.useCats = 1 - - def setKpParams(self): - self.imgIds = [] - self.catIds = [] - # np.arange causes trouble. the data point on arange is slightly larger than the true value - self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True) - self.maxDets = [20] - self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] - self.areaRngLbl = ["all", "medium", "large"] - self.useCats = 1 - - def setUvParams(self): - self.imgIds = [] - self.catIds = [] - self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) - self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) - self.maxDets = [20] - self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] - self.areaRngLbl = ["all", "medium", "large"] - self.useCats = 1 - - def __init__(self, iouType="segm"): - if iouType == "segm" or iouType == "bbox": - self.setDetParams() - elif iouType == "keypoints": - self.setKpParams() - elif iouType == "densepose": - self.setUvParams() - else: - raise Exception("iouType not supported") - self.iouType = iouType - # useSegm is deprecated - self.useSegm = None diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py deleted file mode 100644 index 3639706..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/densepose_head.py +++ /dev/null @@ -1,1216 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import math -from dataclasses import dataclass -from enum import Enum -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.config import CfgNode -from detectron2.layers import Conv2d, ConvTranspose2d, interpolate -from detectron2.structures.boxes import matched_boxlist_iou -from detectron2.utils.registry import Registry - -from .data.structures import DensePoseOutput - -ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD") - - -class DensePoseUVConfidenceType(Enum): - """ - Statistical model type for confidence learning, possible values: - - "iid_iso": statistically independent identically distributed residuals - with anisotropic covariance - - "indep_aniso": statistically independent residuals with anisotropic - covariances - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - # fmt: off - IID_ISO = "iid_iso" - INDEP_ANISO = "indep_aniso" - # fmt: on - - -@dataclass -class DensePoseUVConfidenceConfig: - """ - Configuration options for confidence on UV data - """ - - enabled: bool = False - # lower bound on UV confidences - epsilon: float = 0.01 - type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO - - -@dataclass -class DensePoseConfidenceModelConfig: - """ - Configuration options for confidence models - """ - - # confidence for U and V values - uv_confidence: DensePoseUVConfidenceConfig - - @staticmethod - def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig": - return DensePoseConfidenceModelConfig( - uv_confidence=DensePoseUVConfidenceConfig( - enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED, - epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON, - type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE), - ) - ) - - -def initialize_module_params(module): - for name, param in module.named_parameters(): - if "bias" in name: - nn.init.constant_(param, 0) - elif "weight" in name: - nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") - - -@ROI_DENSEPOSE_HEAD_REGISTRY.register() -class DensePoseDeepLabHead(nn.Module): - def __init__(self, cfg, input_channels): - super(DensePoseDeepLabHead, self).__init__() - # fmt: off - hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL - norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM - self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS - self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON - # fmt: on - pad_size = kernel_size // 2 - n_channels = input_channels - - self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56 - self.add_module("ASPP", self.ASPP) - - if self.use_nonlocal: - self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True) - self.add_module("NLBlock", self.NLBlock) - # weight_init.c2_msra_fill(self.ASPP) - - for i in range(self.n_stacked_convs): - norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None - layer = Conv2d( - n_channels, - hidden_dim, - kernel_size, - stride=1, - padding=pad_size, - bias=not norm, - norm=norm_module, - ) - weight_init.c2_msra_fill(layer) - n_channels = hidden_dim - layer_name = self._get_layer_name(i) - self.add_module(layer_name, layer) - self.n_out_channels = hidden_dim - # initialize_module_params(self) - - def forward(self, features): - x0 = features - x = self.ASPP(x0) - if self.use_nonlocal: - x = self.NLBlock(x) - output = x - for i in range(self.n_stacked_convs): - layer_name = self._get_layer_name(i) - x = getattr(self, layer_name)(x) - x = F.relu(x) - output = x - return output - - def _get_layer_name(self, i): - layer_name = "body_conv_fcn{}".format(i + 1) - return layer_name - - -# Copied from -# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py -# See https://arxiv.org/pdf/1706.05587.pdf for details -class ASPPConv(nn.Sequential): - def __init__(self, in_channels, out_channels, dilation): - modules = [ - nn.Conv2d( - in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False - ), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ] - super(ASPPConv, self).__init__(*modules) - - -class ASPPPooling(nn.Sequential): - def __init__(self, in_channels, out_channels): - super(ASPPPooling, self).__init__( - nn.AdaptiveAvgPool2d(1), - nn.Conv2d(in_channels, out_channels, 1, bias=False), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ) - - def forward(self, x): - size = x.shape[-2:] - x = super(ASPPPooling, self).forward(x) - return F.interpolate(x, size=size, mode="bilinear", align_corners=False) - - -class ASPP(nn.Module): - def __init__(self, in_channels, atrous_rates, out_channels): - super(ASPP, self).__init__() - modules = [] - modules.append( - nn.Sequential( - nn.Conv2d(in_channels, out_channels, 1, bias=False), - nn.GroupNorm(32, out_channels), - nn.ReLU(), - ) - ) - - rate1, rate2, rate3 = tuple(atrous_rates) - modules.append(ASPPConv(in_channels, out_channels, rate1)) - modules.append(ASPPConv(in_channels, out_channels, rate2)) - modules.append(ASPPConv(in_channels, out_channels, rate3)) - modules.append(ASPPPooling(in_channels, out_channels)) - - self.convs = nn.ModuleList(modules) - - self.project = nn.Sequential( - nn.Conv2d(5 * out_channels, out_channels, 1, bias=False), - # nn.BatchNorm2d(out_channels), - nn.ReLU() - # nn.Dropout(0.5) - ) - - def forward(self, x): - res = [] - for conv in self.convs: - res.append(conv(x)) - res = torch.cat(res, dim=1) - return self.project(res) - - -# copied from -# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py -# See https://arxiv.org/abs/1711.07971 for details -class _NonLocalBlockND(nn.Module): - def __init__( - self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True - ): - super(_NonLocalBlockND, self).__init__() - - assert dimension in [1, 2, 3] - - self.dimension = dimension - self.sub_sample = sub_sample - - self.in_channels = in_channels - self.inter_channels = inter_channels - - if self.inter_channels is None: - self.inter_channels = in_channels // 2 - if self.inter_channels == 0: - self.inter_channels = 1 - - if dimension == 3: - conv_nd = nn.Conv3d - max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2)) - bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d - elif dimension == 2: - conv_nd = nn.Conv2d - max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2)) - bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d - else: - conv_nd = nn.Conv1d - max_pool_layer = nn.MaxPool1d(kernel_size=2) - bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d - - self.g = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - - if bn_layer: - self.W = nn.Sequential( - conv_nd( - in_channels=self.inter_channels, - out_channels=self.in_channels, - kernel_size=1, - stride=1, - padding=0, - ), - bn(32, self.in_channels), - ) - nn.init.constant_(self.W[1].weight, 0) - nn.init.constant_(self.W[1].bias, 0) - else: - self.W = conv_nd( - in_channels=self.inter_channels, - out_channels=self.in_channels, - kernel_size=1, - stride=1, - padding=0, - ) - nn.init.constant_(self.W.weight, 0) - nn.init.constant_(self.W.bias, 0) - - self.theta = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - self.phi = conv_nd( - in_channels=self.in_channels, - out_channels=self.inter_channels, - kernel_size=1, - stride=1, - padding=0, - ) - - if sub_sample: - self.g = nn.Sequential(self.g, max_pool_layer) - self.phi = nn.Sequential(self.phi, max_pool_layer) - - def forward(self, x): - """ - :param x: (b, c, t, h, w) - :return: - """ - - batch_size = x.size(0) - - g_x = self.g(x).view(batch_size, self.inter_channels, -1) - g_x = g_x.permute(0, 2, 1) - - theta_x = self.theta(x).view(batch_size, self.inter_channels, -1) - theta_x = theta_x.permute(0, 2, 1) - phi_x = self.phi(x).view(batch_size, self.inter_channels, -1) - f = torch.matmul(theta_x, phi_x) - f_div_C = F.softmax(f, dim=-1) - - y = torch.matmul(f_div_C, g_x) - y = y.permute(0, 2, 1).contiguous() - y = y.view(batch_size, self.inter_channels, *x.size()[2:]) - W_y = self.W(y) - z = W_y + x - - return z - - -class NONLocalBlock2D(_NonLocalBlockND): - def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True): - super(NONLocalBlock2D, self).__init__( - in_channels, - inter_channels=inter_channels, - dimension=2, - sub_sample=sub_sample, - bn_layer=bn_layer, - ) - - -@ROI_DENSEPOSE_HEAD_REGISTRY.register() -class DensePoseV1ConvXHead(nn.Module): - def __init__(self, cfg, input_channels): - super(DensePoseV1ConvXHead, self).__init__() - # fmt: off - hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL - self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS - # fmt: on - pad_size = kernel_size // 2 - n_channels = input_channels - for i in range(self.n_stacked_convs): - layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size) - layer_name = self._get_layer_name(i) - self.add_module(layer_name, layer) - n_channels = hidden_dim - self.n_out_channels = n_channels - initialize_module_params(self) - - def forward(self, features): - x = features - output = x - for i in range(self.n_stacked_convs): - layer_name = self._get_layer_name(i) - x = getattr(self, layer_name)(x) - x = F.relu(x) - output = x - return output - - def _get_layer_name(self, i): - layer_name = "body_conv_fcn{}".format(i + 1) - return layer_name - - -class DensePosePredictor(nn.Module): - def __init__(self, cfg, input_channels): - - super(DensePosePredictor, self).__init__() - dim_in = input_channels - n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - self.ann_index_lowres = ConvTranspose2d( - dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.index_uv_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.u_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.v_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE - self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) - self._initialize_confidence_estimation_layers(cfg, self.confidence_model_cfg, dim_in) - initialize_module_params(self) - - def forward(self, head_outputs): - ann_index_lowres = self.ann_index_lowres(head_outputs) - index_uv_lowres = self.index_uv_lowres(head_outputs) - u_lowres = self.u_lowres(head_outputs) - v_lowres = self.v_lowres(head_outputs) - - def interp2d(input): - return interpolate( - input, scale_factor=self.scale_factor, mode="bilinear", align_corners=False - ) - - ann_index = interp2d(ann_index_lowres) - index_uv = interp2d(index_uv_lowres) - u = interp2d(u_lowres) - v = interp2d(v_lowres) - ( - (sigma_1, sigma_2, kappa_u, kappa_v), - (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres), - (ann_index, index_uv), - ) = self._forward_confidence_estimation_layers( - self.confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv - ) - return ( - (ann_index, index_uv, u, v), - (ann_index_lowres, index_uv_lowres, u_lowres, v_lowres), - (sigma_1, sigma_2, kappa_u, kappa_v), - (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres), - ) - - def _initialize_confidence_estimation_layers( - self, cfg: CfgNode, confidence_model_cfg: DensePoseConfidenceModelConfig, dim_in: int - ): - dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1 - kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL - if confidence_model_cfg.uv_confidence.enabled: - if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - self.sigma_2_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO: - self.sigma_2_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.kappa_u_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - self.kappa_v_lowres = ConvTranspose2d( - dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1) - ) - else: - raise ValueError( - f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}" - ) - - def _forward_confidence_estimation_layers( - self, confidence_model_cfg, head_outputs, interp2d, ann_index, index_uv - ): - sigma_1, sigma_2, kappa_u, kappa_v = None, None, None, None - sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres = None, None, None, None - if confidence_model_cfg.uv_confidence.enabled: - if confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - sigma_2_lowres = self.sigma_2_lowres(head_outputs) - sigma_2 = interp2d(sigma_2_lowres) - elif confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO: - sigma_2_lowres = self.sigma_2_lowres(head_outputs) - kappa_u_lowres = self.kappa_u_lowres(head_outputs) - kappa_v_lowres = self.kappa_v_lowres(head_outputs) - sigma_2 = interp2d(sigma_2_lowres) - kappa_u = interp2d(kappa_u_lowres) - kappa_v = interp2d(kappa_v_lowres) - else: - raise ValueError( - f"Unknown confidence model type: {confidence_model_cfg.confidence_model_type}" - ) - return ( - (sigma_1, sigma_2, kappa_u, kappa_v), - (sigma_1_lowres, sigma_2_lowres, kappa_u_lowres, kappa_v_lowres), - (ann_index, index_uv), - ) - - -class DensePoseDataFilter(object): - def __init__(self, cfg): - self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD - - @torch.no_grad() - def __call__(self, proposals_with_targets): - """ - Filters proposals with targets to keep only the ones relevant for - DensePose training - proposals: list(Instances), each element of the list corresponds to - various instances (proposals, GT for boxes and densepose) for one - image - """ - proposals_filtered = [] - for proposals_per_image in proposals_with_targets: - if not hasattr(proposals_per_image, "gt_densepose"): - continue - assert hasattr(proposals_per_image, "gt_boxes") - assert hasattr(proposals_per_image, "proposal_boxes") - gt_boxes = proposals_per_image.gt_boxes - est_boxes = proposals_per_image.proposal_boxes - # apply match threshold for densepose head - iou = matched_boxlist_iou(gt_boxes, est_boxes) - iou_select = iou > self.iou_threshold - proposals_per_image = proposals_per_image[iou_select] - assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) - # filter out any target without densepose annotation - gt_densepose = proposals_per_image.gt_densepose - assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose) - selected_indices = [ - i for i, dp_target in enumerate(gt_densepose) if dp_target is not None - ] - if len(selected_indices) != len(gt_densepose): - proposals_per_image = proposals_per_image[selected_indices] - assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) - assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose) - proposals_filtered.append(proposals_per_image) - return proposals_filtered - - -def build_densepose_head(cfg, input_channels): - head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME - return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels) - - -def build_densepose_predictor(cfg, input_channels): - predictor = DensePosePredictor(cfg, input_channels) - return predictor - - -def build_densepose_data_filter(cfg): - dp_filter = DensePoseDataFilter(cfg) - return dp_filter - - -def densepose_inference(densepose_outputs, densepose_confidences, detections): - """ - Infer dense pose estimate based on outputs from the DensePose head - and detections. The estimate for each detection instance is stored in its - "pred_densepose" attribute. - - Args: - densepose_outputs (tuple(`torch.Tensor`)): iterable containing 4 elements: - - s (:obj: `torch.Tensor`): coarse segmentation tensor of size (N, A, H, W), - - i (:obj: `torch.Tensor`): fine segmentation tensor of size (N, C, H, W), - - u (:obj: `torch.Tensor`): U coordinates for each class of size (N, C, H, W), - - v (:obj: `torch.Tensor`): V coordinates for each class of size (N, C, H, W), - where N is the total number of detections in a batch, - A is the number of coarse segmentations labels - (e.g. 15 for coarse body parts + background), - C is the number of fine segmentation labels - (e.g. 25 for fine body parts + background), - W is the resolution along the X axis - H is the resolution along the Y axis - densepose_confidences (tuple(`torch.Tensor`)): iterable containing 4 elements: - - sigma_1 (:obj: `torch.Tensor`): global confidences for UV coordinates - of size (N, C, H, W) - - sigma_2 (:obj: `torch.Tensor`): individual confidences for UV coordinates - of size (N, C, H, W) - - kappa_u (:obj: `torch.Tensor`): first component of confidence direction - vector of size (N, C, H, W) - - kappa_v (:obj: `torch.Tensor`): second component of confidence direction - vector of size (N, C, H, W) - detections (list[Instances]): A list of N Instances, where N is the number of images - in the batch. Instances are modified by this method: "pred_densepose" attribute - is added to each instance, the attribute contains the corresponding - DensePoseOutput object. - """ - # DensePose outputs: segmentation, body part indices, U, V - s, index_uv, u, v = densepose_outputs - sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences - k = 0 - for detection in detections: - n_i = len(detection) - s_i = s[k : k + n_i] - index_uv_i = index_uv[k : k + n_i] - u_i = u[k : k + n_i] - v_i = v[k : k + n_i] - _local_vars = locals() - confidences = { - name: _local_vars[name] - for name in ("sigma_1", "sigma_2", "kappa_u", "kappa_v") - if _local_vars.get(name) is not None - } - densepose_output_i = DensePoseOutput(s_i, index_uv_i, u_i, v_i, confidences) - detection.pred_densepose = densepose_output_i - k += n_i - - -def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z): - """ - Computes utility values for linear interpolation at points v. - The points are given as normalized offsets in the source interval - (v0_src, v0_src + size_src), more precisely: - v = v0_src + v_norm * size_src / 256.0 - The computed utilities include lower points v_lo, upper points v_hi, - interpolation weights v_w and flags j_valid indicating whether the - points falls into the destination interval (v0_dst, v0_dst + size_dst). - - Args: - v_norm (:obj: `torch.Tensor`): tensor of size N containing - normalized point offsets - v0_src (:obj: `torch.Tensor`): tensor of size N containing - left bounds of source intervals for normalized points - size_src (:obj: `torch.Tensor`): tensor of size N containing - source interval sizes for normalized points - v0_dst (:obj: `torch.Tensor`): tensor of size N containing - left bounds of destination intervals - size_dst (:obj: `torch.Tensor`): tensor of size N containing - destination interval sizes - size_z (int): interval size for data to be interpolated - - Returns: - v_lo (:obj: `torch.Tensor`): int tensor of size N containing - indices of lower values used for interpolation, all values are - integers from [0, size_z - 1] - v_hi (:obj: `torch.Tensor`): int tensor of size N containing - indices of upper values used for interpolation, all values are - integers from [0, size_z - 1] - v_w (:obj: `torch.Tensor`): float tensor of size N containing - interpolation weights - j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing - 0 for points outside the estimation interval - (v0_est, v0_est + size_est) and 1 otherwise - """ - v = v0_src + v_norm * size_src / 256.0 - j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst) - v_grid = (v - v0_dst) * size_z / size_dst - v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1) - v_hi = (v_lo + 1).clamp(max=size_z - 1) - v_grid = torch.min(v_hi.float(), v_grid) - v_w = v_grid - v_lo.float() - return v_lo, v_hi, v_w, j_valid - - -def _grid_sampling_utilities( - zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt, x_norm, y_norm, index_bbox -): - """ - Prepare tensors used in grid sampling. - - Args: - z_est (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with estimated - values of Z to be extracted for the points X, Y and channel - indices I - bbox_xywh_est (:obj: `torch.Tensor`): tensor of size (N, 4) containing - estimated bounding boxes in format XYWH - bbox_xywh_gt (:obj: `torch.Tensor`): tensor of size (N, 4) containing - matched ground truth bounding boxes in format XYWH - index_gt (:obj: `torch.Tensor`): tensor of size K with point labels for - ground truth points - x_norm (:obj: `torch.Tensor`): tensor of size K with X normalized - coordinates of ground truth points. Image X coordinates can be - obtained as X = Xbbox + x_norm * Wbbox / 255 - y_norm (:obj: `torch.Tensor`): tensor of size K with Y normalized - coordinates of ground truth points. Image Y coordinates can be - obtained as Y = Ybbox + y_norm * Hbbox / 255 - index_bbox (:obj: `torch.Tensor`): tensor of size K with bounding box - indices for each ground truth point. The values are thus in - [0, N-1] - - Returns: - j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing - 0 for points to be discarded and 1 for points to be selected - y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values - in z_est for each point - y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values - in z_est for each point - x_lo (:obj: `torch.Tensor`): int tensor of indices of left values - in z_est for each point - x_hi (:obj: `torch.Tensor`): int tensor of indices of right values - in z_est for each point - w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M; - contains upper-left value weight for each point - w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M; - contains upper-right value weight for each point - w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M; - contains lower-left value weight for each point - w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M; - contains lower-right value weight for each point - """ - - x0_gt, y0_gt, w_gt, h_gt = bbox_xywh_gt[index_bbox].unbind(dim=1) - x0_est, y0_est, w_est, h_est = bbox_xywh_est[index_bbox].unbind(dim=1) - x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities( - x_norm, x0_gt, w_gt, x0_est, w_est, zw - ) - y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities( - y_norm, y0_gt, h_gt, y0_est, h_est, zh - ) - j_valid = jx_valid * jy_valid - - w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w) - w_ylo_xhi = x_w * (1.0 - y_w) - w_yhi_xlo = (1.0 - x_w) * y_w - w_yhi_xhi = x_w * y_w - - return j_valid, y_lo, y_hi, x_lo, x_hi, w_ylo_xlo, w_ylo_xhi, w_yhi_xlo, w_yhi_xhi - - -def _extract_at_points_packed( - z_est, - index_bbox_valid, - slice_index_uv, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, -): - """ - Extract ground truth values z_gt for valid point indices and estimated - values z_est using bilinear interpolation over top-left (y_lo, x_lo), - top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right - (y_hi, x_hi) values in z_est with corresponding weights: - w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi. - Use slice_index_uv to slice dim=1 in z_est - """ - z_est_sampled = ( - z_est[index_bbox_valid, slice_index_uv, y_lo, x_lo] * w_ylo_xlo - + z_est[index_bbox_valid, slice_index_uv, y_lo, x_hi] * w_ylo_xhi - + z_est[index_bbox_valid, slice_index_uv, y_hi, x_lo] * w_yhi_xlo - + z_est[index_bbox_valid, slice_index_uv, y_hi, x_hi] * w_yhi_xhi - ) - return z_est_sampled - - -def _resample_data( - z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode="nearest", padding_mode="zeros" -): - """ - Args: - z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be - resampled - bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing - source bounding boxes in format XYWH - bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing - destination bounding boxes in format XYWH - Return: - zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout) - with resampled values of z, where D is the discretization size - """ - n = bbox_xywh_src.size(0) - assert n == bbox_xywh_dst.size(0), ( - "The number of " - "source ROIs for resampling ({}) should be equal to the number " - "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0)) - ) - x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1) - x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1) - x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1 - y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1 - x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1 - y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1 - grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout - grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout - grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout) - grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout) - dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout) - dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout) - x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout) - y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout) - grid_x = grid_w_expanded * dx_expanded + x0_expanded - grid_y = grid_h_expanded * dy_expanded + y0_expanded - grid = torch.stack((grid_x, grid_y), dim=3) - # resample Z from (N, C, H, W) into (N, C, Hout, Wout) - zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True) - return zresampled - - -def _extract_single_tensors_from_matches_one_image( - proposals_targets, bbox_with_dp_offset, bbox_global_offset -): - i_gt_all = [] - x_norm_all = [] - y_norm_all = [] - u_gt_all = [] - v_gt_all = [] - s_gt_all = [] - bbox_xywh_gt_all = [] - bbox_xywh_est_all = [] - # Ibbox_all == k should be true for all data that corresponds - # to bbox_xywh_gt[k] and bbox_xywh_est[k] - # index k here is global wrt images - i_bbox_all = [] - # at offset k (k is global) contains index of bounding box data - # within densepose output tensor - i_with_dp = [] - - boxes_xywh_est = proposals_targets.proposal_boxes.clone() - boxes_xywh_gt = proposals_targets.gt_boxes.clone() - n_i = len(boxes_xywh_est) - assert n_i == len(boxes_xywh_gt) - - if n_i: - boxes_xywh_est.tensor[:, 2] -= boxes_xywh_est.tensor[:, 0] - boxes_xywh_est.tensor[:, 3] -= boxes_xywh_est.tensor[:, 1] - boxes_xywh_gt.tensor[:, 2] -= boxes_xywh_gt.tensor[:, 0] - boxes_xywh_gt.tensor[:, 3] -= boxes_xywh_gt.tensor[:, 1] - if hasattr(proposals_targets, "gt_densepose"): - densepose_gt = proposals_targets.gt_densepose - for k, box_xywh_est, box_xywh_gt, dp_gt in zip( - range(n_i), boxes_xywh_est.tensor, boxes_xywh_gt.tensor, densepose_gt - ): - if (dp_gt is not None) and (len(dp_gt.x) > 0): - i_gt_all.append(dp_gt.i) - x_norm_all.append(dp_gt.x) - y_norm_all.append(dp_gt.y) - u_gt_all.append(dp_gt.u) - v_gt_all.append(dp_gt.v) - s_gt_all.append(dp_gt.segm.unsqueeze(0)) - bbox_xywh_gt_all.append(box_xywh_gt.view(-1, 4)) - bbox_xywh_est_all.append(box_xywh_est.view(-1, 4)) - i_bbox_k = torch.full_like(dp_gt.i, bbox_with_dp_offset + len(i_with_dp)) - i_bbox_all.append(i_bbox_k) - i_with_dp.append(bbox_global_offset + k) - return ( - i_gt_all, - x_norm_all, - y_norm_all, - u_gt_all, - v_gt_all, - s_gt_all, - bbox_xywh_gt_all, - bbox_xywh_est_all, - i_bbox_all, - i_with_dp, - ) - - -def _extract_single_tensors_from_matches(proposals_with_targets): - i_img = [] - i_gt_all = [] - x_norm_all = [] - y_norm_all = [] - u_gt_all = [] - v_gt_all = [] - s_gt_all = [] - bbox_xywh_gt_all = [] - bbox_xywh_est_all = [] - i_bbox_all = [] - i_with_dp_all = [] - n = 0 - for i, proposals_targets_per_image in enumerate(proposals_with_targets): - n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0) - if not n_i: - continue - ( - i_gt_img, - x_norm_img, - y_norm_img, - u_gt_img, - v_gt_img, - s_gt_img, - bbox_xywh_gt_img, - bbox_xywh_est_img, - i_bbox_img, - i_with_dp_img, - ) = _extract_single_tensors_from_matches_one_image( # noqa - proposals_targets_per_image, len(i_with_dp_all), n - ) - i_gt_all.extend(i_gt_img) - x_norm_all.extend(x_norm_img) - y_norm_all.extend(y_norm_img) - u_gt_all.extend(u_gt_img) - v_gt_all.extend(v_gt_img) - s_gt_all.extend(s_gt_img) - bbox_xywh_gt_all.extend(bbox_xywh_gt_img) - bbox_xywh_est_all.extend(bbox_xywh_est_img) - i_bbox_all.extend(i_bbox_img) - i_with_dp_all.extend(i_with_dp_img) - i_img.extend([i] * len(i_with_dp_img)) - n += n_i - # concatenate all data into a single tensor - if (n > 0) and (len(i_with_dp_all) > 0): - i_gt = torch.cat(i_gt_all, 0).long() - x_norm = torch.cat(x_norm_all, 0) - y_norm = torch.cat(y_norm_all, 0) - u_gt = torch.cat(u_gt_all, 0) - v_gt = torch.cat(v_gt_all, 0) - s_gt = torch.cat(s_gt_all, 0) - bbox_xywh_gt = torch.cat(bbox_xywh_gt_all, 0) - bbox_xywh_est = torch.cat(bbox_xywh_est_all, 0) - i_bbox = torch.cat(i_bbox_all, 0).long() - else: - i_gt = None - x_norm = None - y_norm = None - u_gt = None - v_gt = None - s_gt = None - bbox_xywh_gt = None - bbox_xywh_est = None - i_bbox = None - return ( - i_img, - i_with_dp_all, - bbox_xywh_est, - bbox_xywh_gt, - i_gt, - x_norm, - y_norm, - u_gt, - v_gt, - s_gt, - i_bbox, - ) - - -class IIDIsotropicGaussianUVLoss(nn.Module): - """ - Loss for the case of iid residuals with isotropic covariance: - $Sigma_i = sigma_i^2 I$ - The loss (negative log likelihood) is then: - $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$, - where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates - difference between estimated and ground truth UV values - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - def __init__(self, sigma_lower_bound: float): - super(IIDIsotropicGaussianUVLoss, self).__init__() - self.sigma_lower_bound = sigma_lower_bound - self.log2pi = math.log(2 * math.pi) - - def forward( - self, - u: torch.Tensor, - v: torch.Tensor, - sigma_u: torch.Tensor, - target_u: torch.Tensor, - target_v: torch.Tensor, - ): - # compute $\sigma_i^2$ - # use sigma_lower_bound to avoid degenerate solution for variance - # (sigma -> 0) - sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound - # compute \|delta_i\|^2 - delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2 - # the total loss from the formula above: - loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2) - return loss.sum() - - -class IndepAnisotropicGaussianUVLoss(nn.Module): - """ - Loss for the case of independent residuals with anisotropic covariances: - $Sigma_i = sigma_i^2 I + r_i r_i^T$ - The loss (negative log likelihood) is then: - $1/2 sum_{i=1}^n (log(2 pi) - + log sigma_i^2 (sigma_i^2 + ||r_i||^2) - + ||delta_i||^2 / sigma_i^2 - - ^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$, - where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates - difference between estimated and ground truth UV values - For details, see: - N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning - Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019 - """ - - def __init__(self, sigma_lower_bound: float): - super(IndepAnisotropicGaussianUVLoss, self).__init__() - self.sigma_lower_bound = sigma_lower_bound - self.log2pi = math.log(2 * math.pi) - - def forward( - self, - u: torch.Tensor, - v: torch.Tensor, - sigma_u: torch.Tensor, - kappa_u_est: torch.Tensor, - kappa_v_est: torch.Tensor, - target_u: torch.Tensor, - target_v: torch.Tensor, - ): - # compute $\sigma_i^2$ - sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound - # compute \|r_i\|^2 - r_sqnorm2 = kappa_u_est ** 2 + kappa_v_est ** 2 - delta_u = u - target_u - delta_v = v - target_v - # compute \|delta_i\|^2 - delta_sqnorm = delta_u ** 2 + delta_v ** 2 - delta_u_r_u = delta_u * kappa_u_est - delta_v_r_v = delta_v * kappa_v_est - # compute the scalar product - delta_r = delta_u_r_u + delta_v_r_v - # compute squared scalar product ^2 - delta_r_sqnorm = delta_r ** 2 - denom2 = sigma2 * (sigma2 + r_sqnorm2) - loss = 0.5 * ( - self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2 - ) - return loss.sum() - - -class DensePoseLosses(object): - def __init__(self, cfg): - # fmt: off - self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE - self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS - self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS - self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS - self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS - # fmt: on - self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg) - if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO: - self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss( - self.confidence_model_cfg.uv_confidence.epsilon - ) - elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO: - self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss( - self.confidence_model_cfg.uv_confidence.epsilon - ) - - def __call__(self, proposals_with_gt, densepose_outputs, densepose_confidences): - losses = {} - # densepose outputs are computed for all images and all bounding boxes; - # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively, - # the outputs will have size(0) == 3+1+2+1 == 7 - s, index_uv, u, v = densepose_outputs - sigma_1, sigma_2, kappa_u, kappa_v = densepose_confidences - conf_type = self.confidence_model_cfg.uv_confidence.type - assert u.size(2) == v.size(2) - assert u.size(3) == v.size(3) - assert u.size(2) == index_uv.size(2) - assert u.size(3) == index_uv.size(3) - - with torch.no_grad(): - ( - index_uv_img, - i_with_dp, - bbox_xywh_est, - bbox_xywh_gt, - index_gt_all, - x_norm, - y_norm, - u_gt_all, - v_gt_all, - s_gt, - index_bbox, - ) = _extract_single_tensors_from_matches( # noqa - proposals_with_gt - ) - n_batch = len(i_with_dp) - - # NOTE: we need to keep the same computation graph on all the GPUs to - # perform reduction properly. Hence even if we have no data on one - # of the GPUs, we still need to generate the computation graph. - # Add fake (zero) loss in the form Tensor.sum() * 0 - if not n_batch: - losses["loss_densepose_I"] = index_uv.sum() * 0 - losses["loss_densepose_S"] = s.sum() * 0 - if self.confidence_model_cfg.uv_confidence.enabled: - losses["loss_densepose_UV"] = (u.sum() + v.sum()) * 0 - if conf_type == DensePoseUVConfidenceType.IID_ISO: - losses["loss_densepose_UV"] += sigma_2.sum() * 0 - elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: - losses["loss_densepose_UV"] += ( - sigma_2.sum() + kappa_u.sum() + kappa_v.sum() - ) * 0 - else: - losses["loss_densepose_U"] = u.sum() * 0 - losses["loss_densepose_V"] = v.sum() * 0 - return losses - - zh = u.size(2) - zw = u.size(3) - - ( - j_valid, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) = _grid_sampling_utilities( # noqa - zh, zw, bbox_xywh_est, bbox_xywh_gt, index_gt_all, x_norm, y_norm, index_bbox - ) - - j_valid_fg = j_valid * (index_gt_all > 0) - - u_gt = u_gt_all[j_valid_fg] - u_est_all = _extract_at_points_packed( - u[i_with_dp], - index_bbox, - index_gt_all, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) - u_est = u_est_all[j_valid_fg] - - v_gt = v_gt_all[j_valid_fg] - v_est_all = _extract_at_points_packed( - v[i_with_dp], - index_bbox, - index_gt_all, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) - v_est = v_est_all[j_valid_fg] - - index_uv_gt = index_gt_all[j_valid] - index_uv_est_all = _extract_at_points_packed( - index_uv[i_with_dp], - index_bbox, - slice(None), - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo[:, None], - w_ylo_xhi[:, None], - w_yhi_xlo[:, None], - w_yhi_xhi[:, None], - ) - index_uv_est = index_uv_est_all[j_valid, :] - - if self.confidence_model_cfg.uv_confidence.enabled: - sigma_2_est_all = _extract_at_points_packed( - sigma_2[i_with_dp], - index_bbox, - index_gt_all, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) - sigma_2_est = sigma_2_est_all[j_valid_fg] - if conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]: - kappa_u_est_all = _extract_at_points_packed( - kappa_u[i_with_dp], - index_bbox, - index_gt_all, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) - kappa_u_est = kappa_u_est_all[j_valid_fg] - kappa_v_est_all = _extract_at_points_packed( - kappa_v[i_with_dp], - index_bbox, - index_gt_all, - y_lo, - y_hi, - x_lo, - x_hi, - w_ylo_xlo, - w_ylo_xhi, - w_yhi_xlo, - w_yhi_xhi, - ) - kappa_v_est = kappa_v_est_all[j_valid_fg] - - # Resample everything to the estimated data size, no need to resample - # S_est then: - s_est = s[i_with_dp] - with torch.no_grad(): - s_gt = _resample_data( - s_gt.unsqueeze(1), - bbox_xywh_gt, - bbox_xywh_est, - self.heatmap_size, - self.heatmap_size, - mode="nearest", - padding_mode="zeros", - ).squeeze(1) - - # add point-based losses: - if self.confidence_model_cfg.uv_confidence.enabled: - if conf_type == DensePoseUVConfidenceType.IID_ISO: - uv_loss = ( - self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt) - * self.w_points - ) - losses["loss_densepose_UV"] = uv_loss - elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO: - uv_loss = ( - self.uv_loss_with_confidences( - u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt - ) - * self.w_points - ) - losses["loss_densepose_UV"] = uv_loss - else: - raise ValueError(f"Unknown confidence model type: {conf_type}") - else: - u_loss = F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points - losses["loss_densepose_U"] = u_loss - v_loss = F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points - losses["loss_densepose_V"] = v_loss - index_uv_loss = F.cross_entropy(index_uv_est, index_uv_gt.long()) * self.w_part - losses["loss_densepose_I"] = index_uv_loss - - if self.n_segm_chan == 2: - s_gt = s_gt > 0 - s_loss = F.cross_entropy(s_est, s_gt.long()) * self.w_segm - losses["loss_densepose_S"] = s_loss - return losses - - -def build_densepose_losses(cfg): - losses = DensePoseLosses(cfg) - return losses diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py deleted file mode 100644 index 3bb002b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/evaluator.py +++ /dev/null @@ -1,158 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import contextlib -import copy -import io -import itertools -import json -import logging -import os -from collections import OrderedDict -import torch -from fvcore.common.file_io import PathManager -from pycocotools.coco import COCO - -from detectron2.data import MetadataCatalog -from detectron2.evaluation import DatasetEvaluator -from detectron2.structures import BoxMode -from detectron2.utils.comm import all_gather, is_main_process, synchronize -from detectron2.utils.logger import create_small_table - -from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode - - -class DensePoseCOCOEvaluator(DatasetEvaluator): - def __init__(self, dataset_name, distributed, output_dir=None): - self._distributed = distributed - self._output_dir = output_dir - - self._cpu_device = torch.device("cpu") - self._logger = logging.getLogger(__name__) - - self._metadata = MetadataCatalog.get(dataset_name) - json_file = PathManager.get_local_path(self._metadata.json_file) - with contextlib.redirect_stdout(io.StringIO()): - self._coco_api = COCO(json_file) - - def reset(self): - self._predictions = [] - - def process(self, inputs, outputs): - """ - Args: - inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). - It is a list of dict. Each dict corresponds to an image and - contains keys like "height", "width", "file_name", "image_id". - outputs: the outputs of a COCO model. It is a list of dicts with key - "instances" that contains :class:`Instances`. - The :class:`Instances` object needs to have `densepose` field. - """ - for input, output in zip(inputs, outputs): - instances = output["instances"].to(self._cpu_device) - - boxes = instances.pred_boxes.tensor.clone() - boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - instances.pred_densepose = instances.pred_densepose.to_result(boxes) - - json_results = prediction_to_json(instances, input["image_id"]) - self._predictions.extend(json_results) - - def evaluate(self): - if self._distributed: - synchronize() - predictions = all_gather(self._predictions) - predictions = list(itertools.chain(*predictions)) - if not is_main_process(): - return - else: - predictions = self._predictions - - return copy.deepcopy(self._eval_predictions(predictions)) - - def _eval_predictions(self, predictions): - """ - Evaluate predictions on densepose. - Return results with the metrics of the tasks. - """ - self._logger.info("Preparing results for COCO format ...") - - if self._output_dir: - file_path = os.path.join(self._output_dir, "coco_densepose_results.json") - with open(file_path, "w") as f: - json.dump(predictions, f) - f.flush() - os.fsync(f.fileno()) - - self._logger.info("Evaluating predictions ...") - res = OrderedDict() - results_gps, results_gpsm = _evaluate_predictions_on_coco(self._coco_api, predictions) - res["densepose_gps"] = results_gps - res["densepose_gpsm"] = results_gpsm - return res - - -def prediction_to_json(instances, img_id): - """ - Args: - instances (Instances): the output of the model - img_id (str): the image id in COCO - - Returns: - list[dict]: the results in densepose evaluation format - """ - scores = instances.scores.tolist() - - results = [] - for k in range(len(instances)): - densepose = instances.pred_densepose[k] - result = { - "image_id": img_id, - "category_id": 1, # densepose only has one class - "bbox": densepose[1], - "score": scores[k], - "densepose": densepose, - } - results.append(result) - return results - - -def _evaluate_predictions_on_coco(coco_gt, coco_results): - metrics = ["AP", "AP50", "AP75", "APm", "APl"] - - logger = logging.getLogger(__name__) - - if len(coco_results) == 0: # cocoapi does not handle empty results very well - logger.warn("No predictions from the model! Set scores to -1") - results_gps = {metric: -1 for metric in metrics} - results_gpsm = {metric: -1 for metric in metrics} - return results_gps, results_gpsm - - coco_dt = coco_gt.loadRes(coco_results) - results_gps = _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics) - logger.info( - "Evaluation results for densepose, GPS metric: \n" + create_small_table(results_gps) - ) - results_gpsm = _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics) - logger.info( - "Evaluation results for densepose, GPSm metric: \n" + create_small_table(results_gpsm) - ) - return results_gps, results_gpsm - - -def _evaluate_predictions_on_coco_gps(coco_gt, coco_dt, metrics): - coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPS) - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} - return results - - -def _evaluate_predictions_on_coco_gpsm(coco_gt, coco_dt, metrics): - coco_eval = DensePoseCocoEval(coco_gt, coco_dt, "densepose", dpEvalMode=DensePoseEvalMode.GPSM) - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)} - return results diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py deleted file mode 100644 index fcf69db..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/modeling/test_time_augmentation.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from detectron2.modeling.test_time_augmentation import GeneralizedRCNNWithTTA - - -class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA): - def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1): - """ - Args: - cfg (CfgNode): - model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on. - transform_data (DensePoseTransformData): contains symmetry label - transforms used for horizontal flip - tta_mapper (callable): takes a dataset dict and returns a list of - augmented versions of the dataset dict. Defaults to - `DatasetMapperTTA(cfg)`. - batch_size (int): batch the augmented images into this batch size for inference. - """ - self._transform_data = transform_data - super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size) - - # the implementation follows closely the one from detectron2/modeling - def _inference_one_image(self, input): - """ - Args: - input (dict): one dataset dict - - Returns: - dict: one output dict - """ - - augmented_inputs, aug_vars = self._get_augmented_inputs(input) - # Detect boxes from all augmented versions - with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]): - # temporarily disable roi heads - all_boxes, all_scores, all_classes = self._get_augmented_boxes( - augmented_inputs, aug_vars - ) - merged_instances = self._merge_detections( - all_boxes, all_scores, all_classes, (aug_vars["height"], aug_vars["width"]) - ) - - if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON: - # Use the detected boxes to obtain new fields - augmented_instances = self._rescale_detected_boxes( - augmented_inputs, merged_instances, aug_vars - ) - # run forward on the detected boxes - outputs = self._batch_inference( - augmented_inputs, augmented_instances, do_postprocess=False - ) - # Delete now useless variables to avoid being out of memory - del augmented_inputs, augmented_instances, merged_instances - # average the predictions - if self.cfg.MODEL.MASK_ON: - outputs[0].pred_masks = self._reduce_pred_masks(outputs, aug_vars) - if self.cfg.MODEL.DENSEPOSE_ON: - outputs[0].pred_densepose = self._reduce_pred_densepose(outputs, aug_vars) - # postprocess - output = self._detector_postprocess(outputs[0], aug_vars) - return {"instances": output} - else: - return {"instances": merged_instances} - - def _reduce_pred_densepose(self, outputs, aug_vars): - for idx, output in enumerate(outputs): - if aug_vars["do_hflip"][idx]: - output.pred_densepose.hflip(self._transform_data) - # Less memory-intensive averaging - for attr in "SIUV": - setattr( - outputs[0].pred_densepose, - attr, - sum(getattr(o.pred_densepose, attr) for o in outputs) / len(outputs), - ) - return outputs[0].pred_densepose diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py deleted file mode 100644 index 0231197..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/roi_head.py +++ /dev/null @@ -1,213 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import numpy as np -from typing import Dict -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn as nn -from torch.nn import functional as F - -from detectron2.layers import Conv2d, ShapeSpec, get_norm -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads -from detectron2.modeling.poolers import ROIPooler -from detectron2.modeling.roi_heads import select_foreground_proposals - -from .densepose_head import ( - build_densepose_data_filter, - build_densepose_head, - build_densepose_losses, - build_densepose_predictor, - densepose_inference, -) - - -class Decoder(nn.Module): - """ - A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper - (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from - all levels of the FPN into single output. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features): - super(Decoder, self).__init__() - - # fmt: off - self.in_features = in_features - feature_strides = {k: v.stride for k, v in input_shape.items()} - feature_channels = {k: v.channels for k, v in input_shape.items()} - num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES - conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS - self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE - norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM - # fmt: on - - self.scale_heads = [] - for in_feature in self.in_features: - head_ops = [] - head_length = max( - 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)) - ) - for k in range(head_length): - conv = Conv2d( - feature_channels[in_feature] if k == 0 else conv_dims, - conv_dims, - kernel_size=3, - stride=1, - padding=1, - bias=not norm, - norm=get_norm(norm, conv_dims), - activation=F.relu, - ) - weight_init.c2_msra_fill(conv) - head_ops.append(conv) - if feature_strides[in_feature] != self.common_stride: - head_ops.append( - nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) - ) - self.scale_heads.append(nn.Sequential(*head_ops)) - self.add_module(in_feature, self.scale_heads[-1]) - self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) - weight_init.c2_msra_fill(self.predictor) - - def forward(self, features): - for i, _ in enumerate(self.in_features): - if i == 0: - x = self.scale_heads[i](features[i]) - else: - x = x + self.scale_heads[i](features[i]) - x = self.predictor(x) - return x - - -@ROI_HEADS_REGISTRY.register() -class DensePoseROIHeads(StandardROIHeads): - """ - A Standard ROIHeads which contains an addition of DensePose head. - """ - - def __init__(self, cfg, input_shape): - super().__init__(cfg, input_shape) - self._init_densepose_head(cfg, input_shape) - - def _init_densepose_head(self, cfg, input_shape): - # fmt: off - self.densepose_on = cfg.MODEL.DENSEPOSE_ON - if not self.densepose_on: - return - self.densepose_data_filter = build_densepose_data_filter(cfg) - dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION - dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO - dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE - self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON - # fmt: on - if self.use_decoder: - dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,) - else: - dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features) - in_channels = [input_shape[f].channels for f in self.in_features][0] - - if self.use_decoder: - self.decoder = Decoder(cfg, input_shape, self.in_features) - - self.densepose_pooler = ROIPooler( - output_size=dp_pooler_resolution, - scales=dp_pooler_scales, - sampling_ratio=dp_pooler_sampling_ratio, - pooler_type=dp_pooler_type, - ) - self.densepose_head = build_densepose_head(cfg, in_channels) - self.densepose_predictor = build_densepose_predictor( - cfg, self.densepose_head.n_out_channels - ) - self.densepose_losses = build_densepose_losses(cfg) - - def _forward_densepose(self, features, instances): - """ - Forward logic of the densepose prediction branch. - - Args: - features (list[Tensor]): #level input features for densepose prediction - instances (list[Instances]): the per-image instances to train/predict densepose. - In training, they can be the proposals. - In inference, they can be the predicted boxes. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "densepose" and return it. - """ - if not self.densepose_on: - return {} if self.training else instances - - features = [features[f] for f in self.in_features] - if self.training: - proposals, _ = select_foreground_proposals(instances, self.num_classes) - proposals_dp = self.densepose_data_filter(proposals) - if len(proposals_dp) > 0: - # NOTE may deadlock in DDP if certain workers have empty proposals_dp - proposal_boxes = [x.proposal_boxes for x in proposals_dp] - - if self.use_decoder: - features = [self.decoder(features)] - - features_dp = self.densepose_pooler(features, proposal_boxes) - densepose_head_outputs = self.densepose_head(features_dp) - densepose_outputs, _, confidences, _ = self.densepose_predictor( - densepose_head_outputs - ) - densepose_loss_dict = self.densepose_losses( - proposals_dp, densepose_outputs, confidences - ) - return densepose_loss_dict - else: - pred_boxes = [x.pred_boxes for x in instances] - - if self.use_decoder: - features = [self.decoder(features)] - - features_dp = self.densepose_pooler(features, pred_boxes) - if len(features_dp) > 0: - densepose_head_outputs = self.densepose_head(features_dp) - densepose_outputs, _, confidences, _ = self.densepose_predictor( - densepose_head_outputs - ) - else: - # If no detection occurred instances - # set densepose_outputs to empty tensors - empty_tensor = torch.zeros(size=(0, 0, 0, 0), device=features_dp.device) - densepose_outputs = tuple([empty_tensor] * 4) - confidences = tuple([empty_tensor] * 4) - - densepose_inference(densepose_outputs, confidences, instances) - return instances - - def forward(self, images, features, proposals, targets=None): - instances, losses = super().forward(images, features, proposals, targets) - del targets, images - - if self.training: - losses.update(self._forward_densepose(features, instances)) - return instances, losses - - def forward_with_given_boxes(self, features, instances): - """ - Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. - - This is useful for downstream tasks where a box is known, but need to obtain - other attributes (outputs of other heads). - Test-time augmentation also uses this. - - Args: - features: same as in `forward()` - instances (list[Instances]): instances to predict other outputs. Expect the keys - "pred_boxes" and "pred_classes" to exist. - - Returns: - instances (list[Instances]): - the same `Instances` objects, with extra - fields such as `pred_masks` or `pred_keypoints`. - """ - - instances = super().forward_with_given_boxes(features, instances) - instances = self._forward_densepose(features, instances) - return instances diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py deleted file mode 100644 index b28862c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/dbhelper.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from typing import Any, Dict, Optional, Tuple - - -class EntrySelector(object): - """ - Base class for entry selectors - """ - - @staticmethod - def from_string(spec: str) -> "EntrySelector": - if spec == "*": - return AllEntrySelector() - return FieldEntrySelector(spec) - - -class AllEntrySelector(EntrySelector): - """ - Selector that accepts all entries - """ - - SPECIFIER = "*" - - def __call__(self, entry): - return True - - -class FieldEntrySelector(EntrySelector): - """ - Selector that accepts only entries that match provided field - specifier(s). Only a limited set of specifiers is supported for now: - ::=[] - ::=[] - is a valid identifier - ::= "int" | "str" - ::= "=" - ::= "," - ::= ":" - ::= | - ::= - ::= "-" - is a string without spaces and special symbols - (e.g. , , , ) - """ - - _SPEC_DELIM = "," - _TYPE_DELIM = ":" - _RANGE_DELIM = "-" - _EQUAL = "=" - _ERROR_PREFIX = "Invalid field selector specifier" - - class _FieldEntryValuePredicate(object): - """ - Predicate that checks strict equality for the specified entry field - """ - - def __init__(self, name: str, typespec: str, value: str): - import builtins - - self.name = name - self.type = getattr(builtins, typespec) if typespec is not None else str - self.value = value - - def __call__(self, entry): - return entry[self.name] == self.type(self.value) - - class _FieldEntryRangePredicate(object): - """ - Predicate that checks whether an entry field falls into the specified range - """ - - def __init__(self, name: str, typespec: str, vmin: str, vmax: str): - import builtins - - self.name = name - self.type = getattr(builtins, typespec) if typespec is not None else str - self.vmin = vmin - self.vmax = vmax - - def __call__(self, entry): - return (entry[self.name] >= self.type(self.vmin)) and ( - entry[self.name] <= self.type(self.vmax) - ) - - def __init__(self, spec: str): - self._predicates = self._parse_specifier_into_predicates(spec) - - def __call__(self, entry: Dict[str, Any]): - for predicate in self._predicates: - if not predicate(entry): - return False - return True - - def _parse_specifier_into_predicates(self, spec: str): - predicates = [] - specs = spec.split(self._SPEC_DELIM) - for subspec in specs: - eq_idx = subspec.find(self._EQUAL) - if eq_idx > 0: - field_name_with_type = subspec[:eq_idx] - field_name, field_type = self._parse_field_name_type(field_name_with_type) - field_value_or_range = subspec[eq_idx + 1 :] - if self._is_range_spec(field_value_or_range): - vmin, vmax = self._get_range_spec(field_value_or_range) - predicate = FieldEntrySelector._FieldEntryRangePredicate( - field_name, field_type, vmin, vmax - ) - else: - predicate = FieldEntrySelector._FieldEntryValuePredicate( - field_name, field_type, field_value_or_range - ) - predicates.append(predicate) - elif eq_idx == 0: - self._parse_error(f'"{subspec}", field name is empty!') - else: - self._parse_error(f'"{subspec}", should have format ' "=!") - return predicates - - def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]: - type_delim_idx = field_name_with_type.find(self._TYPE_DELIM) - if type_delim_idx > 0: - field_name = field_name_with_type[:type_delim_idx] - field_type = field_name_with_type[type_delim_idx + 1 :] - elif type_delim_idx == 0: - self._parse_error(f'"{field_name_with_type}", field name is empty!') - else: - field_name = field_name_with_type - field_type = None - return field_name, field_type - - def _is_range_spec(self, field_value_or_range): - delim_idx = field_value_or_range.find(self._RANGE_DELIM) - return delim_idx > 0 - - def _get_range_spec(self, field_value_or_range): - if self._is_range_spec(field_value_or_range): - delim_idx = field_value_or_range.find(self._RANGE_DELIM) - vmin = field_value_or_range[:delim_idx] - vmax = field_value_or_range[delim_idx + 1 :] - return vmin, vmax - else: - self._parse_error('"field_value_or_range", range of values expected!') - - def _parse_error(self, msg): - raise ValueError(f"{self._ERROR_PREFIX}: {msg}") diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py deleted file mode 100644 index e3fa45e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/logger.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging - - -def verbosity_to_level(verbosity): - if verbosity is not None: - if verbosity == 0: - return logging.WARNING - elif verbosity == 1: - return logging.INFO - elif verbosity >= 2: - return logging.DEBUG - return logging.WARNING diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py deleted file mode 100644 index b7cfe09..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/utils/transform.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from fvcore.common.file_io import PathManager - -from detectron2.data import MetadataCatalog - -from densepose import DensePoseTransformData - - -def load_for_dataset(dataset_name): - path = MetadataCatalog.get(dataset_name).densepose_transform_src - densepose_transform_data_fpath = PathManager.get_local_path(path) - return DensePoseTransformData.load(densepose_transform_data_fpath) - - -def load_from_cfg(cfg): - return load_for_dataset(cfg.DATASETS.TEST[0]) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py deleted file mode 100644 index 2aa3e6e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/base.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import numpy as np -import cv2 -import torch - -Image = np.ndarray -Boxes = torch.Tensor - - -class MatrixVisualizer(object): - """ - Base visualizer for matrix data - """ - - def __init__( - self, - inplace=True, - cmap=cv2.COLORMAP_PARULA, - val_scale=1.0, - alpha=0.7, - interp_method_matrix=cv2.INTER_LINEAR, - interp_method_mask=cv2.INTER_NEAREST, - ): - self.inplace = inplace - self.cmap = cmap - self.val_scale = val_scale - self.alpha = alpha - self.interp_method_matrix = interp_method_matrix - self.interp_method_mask = interp_method_mask - - def visualize(self, image_bgr, mask, matrix, bbox_xywh): - self._check_image(image_bgr) - self._check_mask_matrix(mask, matrix) - if self.inplace: - image_target_bgr = image_bgr - else: - image_target_bgr = image_bgr * 0 - x, y, w, h = [int(v) for v in bbox_xywh] - if w <= 0 or h <= 0: - return image_bgr - mask, matrix = self._resize(mask, matrix, w, h) - mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3]) - matrix_scaled = matrix.astype(np.float32) * self.val_scale - _EPSILON = 1e-6 - if np.any(matrix_scaled > 255 + _EPSILON): - logger = logging.getLogger(__name__) - logger.warning( - f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]" - ) - matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8) - matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap) - matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg] - image_target_bgr[y : y + h, x : x + w, :] = ( - image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha - ) - return image_target_bgr.astype(np.uint8) - - def _resize(self, mask, matrix, w, h): - if (w != mask.shape[1]) or (h != mask.shape[0]): - mask = cv2.resize(mask, (w, h), self.interp_method_mask) - if (w != matrix.shape[1]) or (h != matrix.shape[0]): - matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix) - return mask, matrix - - def _check_image(self, image_rgb): - assert len(image_rgb.shape) == 3 - assert image_rgb.shape[2] == 3 - assert image_rgb.dtype == np.uint8 - - def _check_mask_matrix(self, mask, matrix): - assert len(matrix.shape) == 2 - assert len(mask.shape) == 2 - assert mask.dtype == np.uint8 - - -class RectangleVisualizer(object): - - _COLOR_GREEN = (18, 127, 15) - - def __init__(self, color=_COLOR_GREEN, thickness=1): - self.color = color - self.thickness = thickness - - def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None): - x, y, w, h = bbox_xywh - color = color or self.color - thickness = thickness or self.thickness - cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness) - return image_bgr - - -class PointsVisualizer(object): - - _COLOR_GREEN = (18, 127, 15) - - def __init__(self, color_bgr=_COLOR_GREEN, r=5): - self.color_bgr = color_bgr - self.r = r - - def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None): - for j, pt_xy in enumerate(pts_xy): - x, y = pt_xy - color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr - r = rs[j] if rs is not None else self.r - cv2.circle(image_bgr, (x, y), r, color_bgr, -1) - return image_bgr - - -class TextVisualizer(object): - - _COLOR_GRAY = (218, 227, 218) - _COLOR_WHITE = (255, 255, 255) - - def __init__( - self, - font_face=cv2.FONT_HERSHEY_SIMPLEX, - font_color_bgr=_COLOR_GRAY, - font_scale=0.35, - font_line_type=cv2.LINE_AA, - font_line_thickness=1, - fill_color_bgr=_COLOR_WHITE, - fill_color_transparency=1.0, - frame_color_bgr=_COLOR_WHITE, - frame_color_transparency=1.0, - frame_thickness=1, - ): - self.font_face = font_face - self.font_color_bgr = font_color_bgr - self.font_scale = font_scale - self.font_line_type = font_line_type - self.font_line_thickness = font_line_thickness - self.fill_color_bgr = fill_color_bgr - self.fill_color_transparency = fill_color_transparency - self.frame_color_bgr = frame_color_bgr - self.frame_color_transparency = frame_color_transparency - self.frame_thickness = frame_thickness - - def visualize(self, image_bgr, txt, topleft_xy): - txt_w, txt_h = self.get_text_size_wh(txt) - topleft_xy = tuple(map(int, topleft_xy)) - x, y = topleft_xy - if self.frame_color_transparency < 1.0: - t = self.frame_thickness - image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = ( - image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] - * self.frame_color_transparency - + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency) - ).astype(np.float) - if self.fill_color_transparency < 1.0: - image_bgr[y : y + txt_h, x : x + txt_w, :] = ( - image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency - + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency) - ).astype(np.float) - cv2.putText( - image_bgr, - txt, - topleft_xy, - self.font_face, - self.font_scale, - self.font_color_bgr, - self.font_line_thickness, - self.font_line_type, - ) - return image_bgr - - def get_text_size_wh(self, txt): - ((txt_w, txt_h), _) = cv2.getTextSize( - txt, self.font_face, self.font_scale, self.font_line_thickness - ) - return txt_w, txt_h - - -class CompoundVisualizer(object): - def __init__(self, visualizers): - self.visualizers = visualizers - - def visualize(self, image_bgr, data): - assert len(data) == len( - self.visualizers - ), "The number of datas {} should match the number of visualizers" " {}".format( - len(data), len(self.visualizers) - ) - image = image_bgr - for i, visualizer in enumerate(self.visualizers): - image = visualizer.visualize(image, data[i]) - return image - - def __str__(self): - visualizer_str = ", ".join([str(v) for v in self.visualizers]) - return "Compound Visualizer [{}]".format(visualizer_str) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py deleted file mode 100644 index d7951d6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/bounding_box.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .base import RectangleVisualizer, TextVisualizer - - -class BoundingBoxVisualizer(object): - def __init__(self): - self.rectangle_visualizer = RectangleVisualizer() - - def visualize(self, image_bgr, boxes_xywh): - for bbox_xywh in boxes_xywh: - image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh) - return image_bgr - - -class ScoredBoundingBoxVisualizer(object): - def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None): - if bbox_visualizer_params is None: - bbox_visualizer_params = {} - if score_visualizer_params is None: - score_visualizer_params = {} - self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params) - self.visualizer_score = TextVisualizer(**score_visualizer_params) - - def visualize(self, image_bgr, scored_bboxes): - boxes_xywh, box_scores = scored_bboxes - assert len(boxes_xywh) == len( - box_scores - ), "Number of bounding boxes {} should be equal to the number of scores {}".format( - len(boxes_xywh), len(box_scores) - ) - for i, box_xywh in enumerate(boxes_xywh): - score_i = box_scores[i] - image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh) - score_txt = "{0:6.4f}".format(score_i) - topleft_xy = box_xywh[0], box_xywh[1] - image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy) - return image_bgr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py deleted file mode 100644 index f2e77dc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/densepose.py +++ /dev/null @@ -1,593 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import numpy as np -from typing import Iterable, Optional, Tuple -import cv2 - -from ..data.structures import DensePoseDataRelative, DensePoseOutput, DensePoseResult -from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer - - -class DensePoseResultsVisualizer(object): - def visualize(self, image_bgr: Image, densepose_result: Optional[DensePoseResult]) -> Image: - if densepose_result is None: - return image_bgr - context = self.create_visualization_context(image_bgr) - for i, result_encoded_w_shape in enumerate(densepose_result.results): - iuv_arr = DensePoseResult.decode_png_data(*result_encoded_w_shape) - bbox_xywh = densepose_result.boxes_xywh[i] - self.visualize_iuv_arr(context, iuv_arr, bbox_xywh) - image_bgr = self.context_to_image_bgr(context) - return image_bgr - - -class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer): - def __init__( - self, - data_extractor, - segm_extractor, - inplace=True, - cmap=cv2.COLORMAP_PARULA, - alpha=0.7, - val_scale=1.0, - ): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha - ) - self.data_extractor = data_extractor - self.segm_extractor = segm_extractor - - def create_visualization_context(self, image_bgr: Image): - return image_bgr - - def context_to_image_bgr(self, context): - return context - - def get_image_bgr_from_context(self, context): - return context - - def visualize_iuv_arr(self, context, iuv_arr, bbox_xywh): - image_bgr = self.get_image_bgr_from_context(context) - matrix = self.data_extractor(iuv_arr) - segm = self.segm_extractor(iuv_arr) - mask = np.zeros(matrix.shape, dtype=np.uint8) - mask[segm > 0] = 1 - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh) - return image_bgr - - -def _extract_i_from_iuvarr(iuv_arr): - return iuv_arr[0, :, :] - - -def _extract_u_from_iuvarr(iuv_arr): - return iuv_arr[1, :, :] - - -def _extract_v_from_iuvarr(iuv_arr): - return iuv_arr[2, :, :] - - -class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer): - def __init__(self, levels=10, **kwargs): - self.levels = levels - self.plot_args = kwargs - - def create_visualization_context(self, image_bgr: Image): - import matplotlib.pyplot as plt - from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas - - context = {} - context["image_bgr"] = image_bgr - dpi = 100 - height_inches = float(image_bgr.shape[0]) / dpi - width_inches = float(image_bgr.shape[1]) / dpi - fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi) - plt.axes([0, 0, 1, 1]) - plt.axis("off") - context["fig"] = fig - canvas = FigureCanvas(fig) - context["canvas"] = canvas - extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0) - plt.imshow(image_bgr[:, :, ::-1], extent=extent) - return context - - def context_to_image_bgr(self, context): - fig = context["fig"] - w, h = map(int, fig.get_size_inches() * fig.get_dpi()) - canvas = context["canvas"] - canvas.draw() - image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8") - image_rgb = image_1d.reshape(h, w, 3) - image_bgr = image_rgb[:, :, ::-1].copy() - return image_bgr - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image: - import matplotlib.pyplot as plt - - u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0 - v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0 - extent = ( - bbox_xywh[0], - bbox_xywh[0] + bbox_xywh[2], - bbox_xywh[1], - bbox_xywh[1] + bbox_xywh[3], - ) - plt.contour(u, self.levels, extent=extent, **self.plot_args) - plt.contour(v, self.levels, extent=extent, **self.plot_args) - - -class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer): - """ - Contour visualization using marching squares - """ - - def __init__(self, levels=10, **kwargs): - # TODO: colormap is hardcoded - cmap = cv2.COLORMAP_PARULA - if isinstance(levels, int): - self.levels = np.linspace(0, 1, levels) - else: - self.levels = levels - if "linewidths" in kwargs: - self.linewidths = kwargs["linewidths"] - else: - self.linewidths = [1] * len(self.levels) - self.plot_args = kwargs - img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap) - self.level_colors_bgr = [ - [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr - ] - - def create_visualization_context(self, image_bgr: Image): - return image_bgr - - def context_to_image_bgr(self, context): - return context - - def get_image_bgr_from_context(self, context): - return context - - def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> Image: - image_bgr = self.get_image_bgr_from_context(context) - segm = _extract_i_from_iuvarr(iuv_arr) - u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0 - v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0 - self._contours(image_bgr, u, segm, bbox_xywh) - self._contours(image_bgr, v, segm, bbox_xywh) - - def _contours(self, image_bgr, arr, segm, bbox_xywh): - for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1): - mask = segm == part_idx - if not np.any(mask): - continue - arr_min = np.amin(arr[mask]) - arr_max = np.amax(arr[mask]) - I, J = np.nonzero(mask) - i0 = np.amin(I) - i1 = np.amax(I) + 1 - j0 = np.amin(J) - j1 = np.amax(J) + 1 - if (j1 == j0 + 1) or (i1 == i0 + 1): - continue - Nw = arr.shape[1] - 1 - Nh = arr.shape[0] - 1 - for level_idx, level in enumerate(self.levels): - if (level < arr_min) or (level > arr_max): - continue - vp = arr[i0:i1, j0:j1] >= level - bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8 - mp = mask[i0:i1, j0:j1] - bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8 - it = np.nditer(bin_codes, flags=["multi_index"]) - color_bgr = self.level_colors_bgr[level_idx] - linewidth = self.linewidths[level_idx] - while not it.finished: - if (it[0] != 0) and (it[0] != 15): - i, j = it.multi_index - if bin_mask_codes[i, j] != 0: - self._draw_line( - image_bgr, - arr, - mask, - level, - color_bgr, - linewidth, - it[0], - it.multi_index, - bbox_xywh, - Nw, - Nh, - (i0, j0), - ) - it.iternext() - - def _draw_line( - self, - image_bgr, - arr, - mask, - v, - color_bgr, - linewidth, - bin_code, - multi_idx, - bbox_xywh, - Nw, - Nh, - offset, - ): - lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset) - x0, y0, w, h = bbox_xywh - x1 = x0 + w - y1 = y0 + h - for line in lines: - x0r, y0r = line[0] - x1r, y1r = line[1] - pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0))) - pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0))) - cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth) - - def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset): - i0, j0 = offset - i, j = multi_idx - i += i0 - j += j0 - v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1] - x0i = float(j) / Nw - y0j = float(i) / Nh - He = 1.0 / Nh - We = 1.0 / Nw - if (bin_code == 1) or (bin_code == 14): - a = (v - v0) / (v1 - v0) - b = (v - v0) / (v3 - v0) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + b * We, y0j) - return [(pt1, pt2)] - elif (bin_code == 2) or (bin_code == 13): - a = (v - v0) / (v1 - v0) - b = (v - v1) / (v2 - v1) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + b * We, y0j + He) - return [(pt1, pt2)] - elif (bin_code == 3) or (bin_code == 12): - a = (v - v0) / (v3 - v0) - b = (v - v1) / (v2 - v1) - pt1 = (x0i + a * We, y0j) - pt2 = (x0i + b * We, y0j + He) - return [(pt1, pt2)] - elif (bin_code == 4) or (bin_code == 11): - a = (v - v1) / (v2 - v1) - b = (v - v3) / (v2 - v3) - pt1 = (x0i + a * We, y0j + He) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif (bin_code == 6) or (bin_code == 9): - a = (v - v0) / (v1 - v0) - b = (v - v3) / (v2 - v3) - pt1 = (x0i, y0j + a * He) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif (bin_code == 7) or (bin_code == 8): - a = (v - v0) / (v3 - v0) - b = (v - v3) / (v2 - v3) - pt1 = (x0i + a * We, y0j) - pt2 = (x0i + We, y0j + b * He) - return [(pt1, pt2)] - elif bin_code == 5: - a1 = (v - v0) / (v1 - v0) - b1 = (v - v1) / (v2 - v1) - pt11 = (x0i, y0j + a1 * He) - pt12 = (x0i + b1 * We, y0j + He) - a2 = (v - v0) / (v3 - v0) - b2 = (v - v3) / (v2 - v3) - pt21 = (x0i + a2 * We, y0j) - pt22 = (x0i + We, y0j + b2 * He) - return [(pt11, pt12), (pt21, pt22)] - elif bin_code == 10: - a1 = (v - v0) / (v3 - v0) - b1 = (v - v0) / (v1 - v0) - pt11 = (x0i + a1 * We, y0j) - pt12 = (x0i, y0j + b1 * He) - a2 = (v - v1) / (v2 - v1) - b2 = (v - v3) / (v2 - v3) - pt21 = (x0i + a2 * We, y0j + He) - pt22 = (x0i + We, y0j + b2 * He) - return [(pt11, pt12), (pt21, pt22)] - return [] - - -try: - import matplotlib - - matplotlib.use("Agg") - DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer -except ModuleNotFoundError: - logger = logging.getLogger(__name__) - logger.warning("Could not import matplotlib, using custom contour visualizer") - DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer - - -class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - super(DensePoseResultsFineSegmentationVisualizer, self).__init__( - _extract_i_from_iuvarr, - _extract_i_from_iuvarr, - inplace, - cmap, - alpha, - val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS, - ) - - -class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - super(DensePoseResultsUVisualizer, self).__init__( - _extract_u_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0 - ) - - -class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - super(DensePoseResultsVVisualizer, self).__init__( - _extract_v_from_iuvarr, _extract_i_from_iuvarr, inplace, cmap, alpha, val_scale=1.0 - ) - - -class DensePoseOutputsFineSegmentationVisualizer(object): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, - cmap=cmap, - val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS, - alpha=alpha, - ) - - def visualize( - self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]] - ) -> Image: - if dp_output_with_bboxes is None: - return image_bgr - densepose_output, bboxes_xywh = dp_output_with_bboxes - S = densepose_output.S - I = densepose_output.I # noqa - U = densepose_output.U - V = densepose_output.V - N = S.size(0) - assert N == I.size( - 0 - ), "densepose outputs S {} and I {}" " should have equal first dim size".format( - S.size(), I.size() - ) - assert N == U.size( - 0 - ), "densepose outputs S {} and U {}" " should have equal first dim size".format( - S.size(), U.size() - ) - assert N == V.size( - 0 - ), "densepose outputs S {} and V {}" " should have equal first dim size".format( - S.size(), V.size() - ) - assert N == len( - bboxes_xywh - ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format( - len(bboxes_xywh), N - ) - for n in range(N): - Sn = S[n].argmax(dim=0) - In = I[n].argmax(dim=0) * (Sn > 0).long() - matrix = In.cpu().numpy().astype(np.uint8) - mask = np.zeros(matrix.shape, dtype=np.uint8) - mask[matrix > 0] = 1 - bbox_xywh = bboxes_xywh[n] - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh) - return image_bgr - - -class DensePoseOutputsUVisualizer(object): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha - ) - - def visualize( - self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]] - ) -> Image: - if dp_output_with_bboxes is None: - return image_bgr - densepose_output, bboxes_xywh = dp_output_with_bboxes - assert isinstance( - densepose_output, DensePoseOutput - ), "DensePoseOutput expected, {} encountered".format(type(densepose_output)) - S = densepose_output.S - I = densepose_output.I # noqa - U = densepose_output.U - V = densepose_output.V - N = S.size(0) - assert N == I.size( - 0 - ), "densepose outputs S {} and I {}" " should have equal first dim size".format( - S.size(), I.size() - ) - assert N == U.size( - 0 - ), "densepose outputs S {} and U {}" " should have equal first dim size".format( - S.size(), U.size() - ) - assert N == V.size( - 0 - ), "densepose outputs S {} and V {}" " should have equal first dim size".format( - S.size(), V.size() - ) - assert N == len( - bboxes_xywh - ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format( - len(bboxes_xywh), N - ) - for n in range(N): - Sn = S[n].argmax(dim=0) - In = I[n].argmax(dim=0) * (Sn > 0).long() - segmentation = In.cpu().numpy().astype(np.uint8) - mask = np.zeros(segmentation.shape, dtype=np.uint8) - mask[segmentation > 0] = 1 - Un = U[n].cpu().numpy().astype(np.float32) - Uvis = np.zeros(segmentation.shape, dtype=np.float32) - for partId in range(Un.shape[0]): - Uvis[segmentation == partId] = Un[partId][segmentation == partId].clip(0, 1) * 255 - bbox_xywh = bboxes_xywh[n] - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Uvis, bbox_xywh) - return image_bgr - - -class DensePoseOutputsVVisualizer(object): - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha - ) - - def visualize( - self, image_bgr: Image, dp_output_with_bboxes: Optional[Tuple[DensePoseOutput, Boxes]] - ) -> Image: - if dp_output_with_bboxes is None: - return image_bgr - densepose_output, bboxes_xywh = dp_output_with_bboxes - assert isinstance( - densepose_output, DensePoseOutput - ), "DensePoseOutput expected, {} encountered".format(type(densepose_output)) - S = densepose_output.S - I = densepose_output.I # noqa - U = densepose_output.U - V = densepose_output.V - N = S.size(0) - assert N == I.size( - 0 - ), "densepose outputs S {} and I {}" " should have equal first dim size".format( - S.size(), I.size() - ) - assert N == U.size( - 0 - ), "densepose outputs S {} and U {}" " should have equal first dim size".format( - S.size(), U.size() - ) - assert N == V.size( - 0 - ), "densepose outputs S {} and V {}" " should have equal first dim size".format( - S.size(), V.size() - ) - assert N == len( - bboxes_xywh - ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format( - len(bboxes_xywh), N - ) - for n in range(N): - Sn = S[n].argmax(dim=0) - In = I[n].argmax(dim=0) * (Sn > 0).long() - segmentation = In.cpu().numpy().astype(np.uint8) - mask = np.zeros(segmentation.shape, dtype=np.uint8) - mask[segmentation > 0] = 1 - Vn = V[n].cpu().numpy().astype(np.float32) - Vvis = np.zeros(segmentation.shape, dtype=np.float32) - for partId in range(Vn.size(0)): - Vvis[segmentation == partId] = Vn[partId][segmentation == partId].clip(0, 1) * 255 - bbox_xywh = bboxes_xywh[n] - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, Vvis, bbox_xywh) - return image_bgr - - -class DensePoseDataCoarseSegmentationVisualizer(object): - """ - Visualizer for ground truth segmentation - """ - - def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7): - self.mask_visualizer = MatrixVisualizer( - inplace=inplace, - cmap=cmap, - val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS, - alpha=alpha, - ) - - def visualize( - self, - image_bgr: Image, - bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]], - ) -> Image: - if bbox_densepose_datas is None: - return image_bgr - for bbox_xywh, densepose_data in zip(*bbox_densepose_datas): - matrix = densepose_data.segm.numpy() - mask = np.zeros(matrix.shape, dtype=np.uint8) - mask[matrix > 0] = 1 - image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy()) - return image_bgr - - -class DensePoseDataPointsVisualizer(object): - def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA): - self.points_visualizer = PointsVisualizer() - self.densepose_data_to_value_fn = densepose_data_to_value_fn - self.cmap = cmap - - def visualize( - self, - image_bgr: Image, - bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]], - ) -> Image: - if bbox_densepose_datas is None: - return image_bgr - for bbox_xywh, densepose_data in zip(*bbox_densepose_datas): - x0, y0, w, h = bbox_xywh.numpy() - x = densepose_data.x.numpy() * w / 255.0 + x0 - y = densepose_data.y.numpy() * h / 255.0 + y0 - pts_xy = zip(x, y) - if self.densepose_data_to_value_fn is None: - image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy) - else: - v = self.densepose_data_to_value_fn(densepose_data) - img_colors_bgr = cv2.applyColorMap(v, self.cmap) - colors_bgr = [ - [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr - ] - image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr) - return image_bgr - - -def _densepose_data_u_for_cmap(densepose_data): - u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0 - return u.astype(np.uint8) - - -def _densepose_data_v_for_cmap(densepose_data): - v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0 - return v.astype(np.uint8) - - -def _densepose_data_i_for_cmap(densepose_data): - i = ( - np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS) - * 255.0 - / DensePoseDataRelative.N_PART_LABELS - ) - return i.astype(np.uint8) - - -class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer): - def __init__(self): - super(DensePoseDataPointsUVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_u_for_cmap - ) - - -class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer): - def __init__(self): - super(DensePoseDataPointsVVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_v_for_cmap - ) - - -class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer): - def __init__(self): - super(DensePoseDataPointsIVisualizer, self).__init__( - densepose_data_to_value_fn=_densepose_data_i_for_cmap - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py deleted file mode 100644 index b715a44..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/densepose/vis/extractor.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -from typing import Sequence -import torch - -from detectron2.layers.nms import batched_nms -from detectron2.structures.instances import Instances - -from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer -from densepose.vis.densepose import DensePoseResultsVisualizer - -from .base import CompoundVisualizer - -Scores = Sequence[float] - - -def extract_scores_from_instances(instances: Instances, select=None): - if instances.has("scores"): - return instances.scores if select is None else instances.scores[select] - return None - - -def extract_boxes_xywh_from_instances(instances: Instances, select=None): - if instances.has("pred_boxes"): - boxes_xywh = instances.pred_boxes.tensor.clone() - boxes_xywh[:, 2] -= boxes_xywh[:, 0] - boxes_xywh[:, 3] -= boxes_xywh[:, 1] - return boxes_xywh if select is None else boxes_xywh[select] - return None - - -def create_extractor(visualizer: object): - """ - Create an extractor for the provided visualizer - """ - if isinstance(visualizer, CompoundVisualizer): - extractors = [create_extractor(v) for v in visualizer.visualizers] - return CompoundExtractor(extractors) - elif isinstance(visualizer, DensePoseResultsVisualizer): - return DensePoseResultExtractor() - elif isinstance(visualizer, ScoredBoundingBoxVisualizer): - return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances]) - elif isinstance(visualizer, BoundingBoxVisualizer): - return extract_boxes_xywh_from_instances - else: - logger = logging.getLogger(__name__) - logger.error(f"Could not create extractor for {visualizer}") - return None - - -class BoundingBoxExtractor(object): - """ - Extracts bounding boxes from instances - """ - - def __call__(self, instances: Instances): - boxes_xywh = extract_boxes_xywh_from_instances(instances) - return boxes_xywh - - -class ScoredBoundingBoxExtractor(object): - """ - Extracts bounding boxes from instances - """ - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if (scores is None) or (boxes_xywh is None): - return (boxes_xywh, scores) - if select is not None: - scores = scores[select] - boxes_xywh = boxes_xywh[select] - return (boxes_xywh, scores) - - -class DensePoseResultExtractor(object): - """ - Extracts DensePose result from instances - """ - - def __call__(self, instances: Instances, select=None): - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if instances.has("pred_densepose") and (boxes_xywh is not None): - dpout = instances.pred_densepose - if select is not None: - dpout = dpout[select] - boxes_xywh = boxes_xywh[select] - return dpout.to_result(boxes_xywh) - else: - return None - - -class CompoundExtractor(object): - """ - Extracts data for CompoundVisualizer - """ - - def __init__(self, extractors): - self.extractors = extractors - - def __call__(self, instances: Instances, select=None): - datas = [] - for extractor in self.extractors: - data = extractor(instances, select) - datas.append(data) - return datas - - -class NmsFilteredExtractor(object): - """ - Extracts data in the format accepted by NmsFilteredVisualizer - """ - - def __init__(self, extractor, iou_threshold): - self.extractor = extractor - self.iou_threshold = iou_threshold - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - boxes_xywh = extract_boxes_xywh_from_instances(instances) - if boxes_xywh is None: - return None - select_local_idx = batched_nms( - boxes_xywh, - scores, - torch.zeros(len(scores), dtype=torch.int32), - iou_threshold=self.iou_threshold, - ).squeeze() - select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device) - select_local[select_local_idx] = True - select = select_local if select is None else (select & select_local) - return self.extractor(instances, select=select) - - -class ScoreThresholdedExtractor(object): - """ - Extracts data in the format accepted by ScoreThresholdedVisualizer - """ - - def __init__(self, extractor, min_score): - self.extractor = extractor - self.min_score = min_score - - def __call__(self, instances: Instances, select=None): - scores = extract_scores_from_instances(instances) - if scores is None: - return None - select_local = scores > self.min_score - select = select_local if select is None else (select & select_local) - data = self.extractor(instances, select=select) - return data diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md deleted file mode 100644 index e3a94b6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/README.md +++ /dev/null @@ -1,7 +0,0 @@ - -## Some scripts for developers to use, include: - -- `run_instant_tests.sh`: run training for a few iterations. -- `run_inference_tests.sh`: run inference on a small dataset. -- `../../dev/linter.sh`: lint the codebase before commit -- `../../dev/parse_results.sh`: parse results from log file. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh deleted file mode 100644 index 34f47d5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_inference_tests.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -BIN="python train_net.py" -OUTPUT="inference_test_output" -NUM_GPUS=2 -IMS_PER_GPU=2 -IMS_PER_BATCH=$(( NUM_GPUS * IMS_PER_GPU )) - -CFG_LIST=( "${@:1}" ) - -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN \ - --eval-only \ - --num-gpus $NUM_GPUS \ - --config-file "$cfg" \ - OUTPUT_DIR "$OUTPUT" \ - SOLVER.IMS_PER_BATCH $IMS_PER_BATCH - rm -rf $OUTPUT -done - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh deleted file mode 100644 index a537851..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/dev/run_instant_tests.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -e -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -BIN="python train_net.py" -OUTPUT="instant_test_output" -NUM_GPUS=2 -SOLVER_IMS_PER_BATCH=$((NUM_GPUS * 2)) - -CFG_LIST=( "${@:1}" ) -if [ ${#CFG_LIST[@]} -eq 0 ]; then - CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) -fi - -echo "========================================================================" -echo "Configs to run:" -echo "${CFG_LIST[@]}" -echo "========================================================================" - -for cfg in "${CFG_LIST[@]}"; do - echo "========================================================================" - echo "Running $cfg ..." - echo "========================================================================" - $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ - SOLVER.IMS_PER_BATCH $SOLVER_IMS_PER_BATCH \ - OUTPUT_DIR "$OUTPUT" - rm -rf "$OUTPUT" -done - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md deleted file mode 100644 index a6bcbed..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/GETTING_STARTED.md +++ /dev/null @@ -1,58 +0,0 @@ -# Getting Started with DensePose - -## Inference with Pre-trained Models - -1. Pick a model and its config file from [Model Zoo](MODEL_ZOO.md), for example [densepose_rcnn_R_50_FPN_s1x.yaml](../configs/densepose_rcnn_R_50_FPN_s1x.yaml) -2. Run the [Apply Net](TOOL_APPLY_NET.md) tool to visualize the results or save the to disk. For example, to use contour visualization for DensePose, one can run: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml densepose_rcnn_R_50_FPN_s1x.pkl image.jpg dp_contour,bbox --output image_densepose_contour.png -``` -Please see [Apply Net](TOOL_APPLY_NET.md) for more details on the tool. - -## Training - -First, prepare the [dataset](http://densepose.org/#dataset) into the following structure under the directory you'll run training scripts: -
-datasets/coco/
-  annotations/
-    densepose_{train,minival,valminusminival}2014.json
-    densepose_minival2014_100.json   (optional, for testing only)
-  {train,val}2014/
-    # image files that are mentioned in the corresponding json
-
- -To train a model one can use the [train_net.py](../train_net.py) script. -This script was used to train all DensePose models in [Model Zoo](MODEL_ZOO.md). -For example, to launch end-to-end DensePose-RCNN training with ResNet-50 FPN backbone -on 8 GPUs following the s1x schedule, one can run -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml --num-gpus 8 -``` -The configs are made for 8-GPU training. To train on 1 GPU, one can apply the -[linear learning rate scaling rule](https://arxiv.org/abs/1706.02677): -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \ - SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 -``` - -## Evaluation - -Model testing can be done in the same way as training, except for an additional flag `--eval-only` and -model location specification through `MODEL.WEIGHTS model.pth` in the command line -```bash -python train_net.py --config-file configs/densepose_rcnn_R_50_FPN_s1x.yaml \ - --eval-only MODEL.WEIGHTS model.pth -``` - -## Tools - -We provide tools which allow one to: - - easily view DensePose annotated data in a dataset; - - perform DensePose inference on a set of images; - - visualize DensePose model results; - -`query_db` is a tool to print or visualize DensePose data in a dataset. -Please refer to [Query DB](TOOL_QUERY_DB.md) for more details on this tool - -`apply_net` is a tool to print or visualize DensePose results. -Please refer to [Apply Net](TOOL_APPLY_NET.md) for more details on this tool diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md deleted file mode 100644 index c263084..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/MODEL_ZOO.md +++ /dev/null @@ -1,277 +0,0 @@ -# Model Zoo and Baselines - -# Introduction - -We provide baselines trained with Detectron2 DensePose. The corresponding -configuration files can be found in the [configs](../configs) directory. -All models were trained on COCO `train2014` + `valminusminival2014` and -evaluated on COCO `minival2014`. For the details on common settings in which -baselines were trained, please check [Detectron 2 Model Zoo](../../../MODEL_ZOO.md). - -## License - -All models available for download through this document are licensed under the -[Creative Commons Attribution-ShareAlike 3.0 license](https://creativecommons.org/licenses/by-sa/3.0/) - -## COCO DensePose Baselines with DensePose-RCNN - -### Legacy Models - -Baselines trained using schedules from [Güler et al, 2018](https://arxiv.org/pdf/1802.00434.pdf) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1x_legacys1x0.3070.0513.258.152.154.9164832157model | metrics
R_101_FPN_s1x_legacys1x0.3900.0634.359.553.256.1164832182model | metrics
- -### Improved Baselines, Original Fully Convolutional Haad - -These models use an improved training schedule and Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_s1xs1x0.3590.0664.561.263.765.3165712039model | metrics
R_101_FPN_s1xs1x0.4280.0795.862.364.566.4165712084model | metrics
- -### Improved Baselines, DeepLabV3 Head - -These models use an improved training schedule, Panoptic FPN head from [Kirillov et al, 2019](https://arxiv.org/abs/1901.02446) and DeepLabV3 head from [Chen et al, 2017](https://arxiv.org/abs/1706.05587). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_DL_s1xs1x0.3920.0706.761.165.666.8165712097model | metrics
R_101_FPN_DL_s1xs1x0.4780.0837.062.366.367.7165712116model | metrics
- -### Baselines with Confidence Estimation - -These models perform additional estimation of confidence in regressed UV coodrinates, along the lines of [Neverova et al., 2019](https://papers.nips.cc/paper/8378-correlated-uncertainty-for-learning-dense-correspondences-from-noisy-labels). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Namelr
sched
train
time
(s/iter)
inference
time
(s/im)
train
mem
(GB)
box
AP
dp. AP
GPS
dp. AP
GPSm
model iddownload
R_50_FPN_WC1_s1xs1x0.3530.0644.660.564.265.6173862049model | metrics
R_50_FPN_WC2_s1xs1x0.3640.0664.860.764.265.7173861455model | metrics
R_50_FPN_DL_WC1_s1xs1x0.3970.0686.761.165.867.1173067973model | metrics
R_50_FPN_DL_WC2_s1xs1x0.4100.0706.860.865.666.7173859335model | metrics
R_101_FPN_WC1_s1xs1x0.4350.0765.762.564.966.5171402969model | metrics
R_101_FPN_WC2_s1xs1x0.4500.0785.762.364.866.6173860702model | metrics
R_101_FPN_DL_WC1_s1xs1x0.4790.0817.962.066.267.4173858525model | metrics
R_101_FPN_DL_WC2_s1xs1x0.4910.0827.661.765.967.3173294801model | metrics
- -## Old Baselines - -It is still possible to use some baselines from [DensePose 1](https://github.com/facebookresearch/DensePose). -Below are evaluation metrics for the baselines recomputed in the current framework: - -| Model | bbox AP | AP | AP50 | AP75 | APm |APl | -|-----|-----|-----|--- |--- |--- |--- | -| [`ResNet50_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet50_FPN_s1x-e2e.pkl) | 54.673 | 48.894 | 84.963 | 50.717 | 43.132 | 50.433 | -| [`ResNet101_FPN_s1x-e2e`](https://dl.fbaipublicfiles.com/densepose/DensePose_ResNet101_FPN_s1x-e2e.pkl) | 56.032 | 51.088 | 86.250 | 55.057 | 46.542 | 52.563 | - -Note: these scores are close, but not strictly equal to the ones reported in the [DensePose 1 Model Zoo](https://github.com/facebookresearch/DensePose/blob/master/MODEL_ZOO.md), -which is due to small incompatibilities between the frameworks. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md deleted file mode 100644 index f5cf257..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_APPLY_NET.md +++ /dev/null @@ -1,130 +0,0 @@ -# Apply Net - -`apply_net` is a tool to print or visualize DensePose results on a set of images. -It has two modes: `dump` to save DensePose model results to a pickle file -and `show` to visualize them on images. - -## Dump Mode - -The general command form is: -```bash -python apply_net.py dump [-h] [-v] [--output ] -``` - -There are three mandatory arguments: - - ``, configuration file for a given model; - - ``, model file with trained parameters - - ``, input image file name, pattern or folder - -One can additionally provide `--output` argument to define the output file name, -which defaults to `output.pkl`. - - -Examples: - -1. Dump results of a DensePose model with ResNet-50 FPN backbone for images - in a folder `images` to file `dump.pkl`: -```bash -python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl images --output dump.pkl -v -``` - -2. Dump results of a DensePose model with ResNet-50 FPN backbone for images - with file name matching a pattern `image*.jpg` to file `results.pkl`: -```bash -python apply_net.py dump configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl "image*.jpg" --output results.pkl -v -``` - -If you want to load the pickle file generated by the above command: -``` -# make sure DensePose is in your PYTHONPATH, or use the following line to add it: -sys.path.append("/your_detectron2_path/detectron2_repo/projects/DensePose/") - -f = open('/your_result_path/results.pkl', 'rb') -data = pickle.load(f) -``` - -The file `results.pkl` contains the list of results per image, for each image the result is a dictionary: -``` -data: [{'file_name': '/your_path/image1.jpg', - 'scores': tensor([0.9884]), - 'pred_boxes_XYXY': tensor([[ 69.6114, 0.0000, 706.9797, 706.0000]]), - 'pred_densepose': }, - {'file_name': '/your_path/image2.jpg', - 'scores': tensor([0.9999, 0.5373, 0.3991]), - 'pred_boxes_XYXY': tensor([[ 59.5734, 7.7535, 579.9311, 932.3619], - [612.9418, 686.1254, 612.9999, 704.6053], - [164.5081, 407.4034, 598.3944, 920.4266]]), - 'pred_densepose': }] -``` - -We can use the following code, to parse the outputs of the first -detected instance on the first image. -``` -img_id, instance_id = 0, 0 # Look at the first image and the first detected instance -bbox_xyxy = data[img_id]['pred_boxes_XYXY'][instance_id] -result_encoded = data[img_id]['pred_densepose'].results[instance_id] -iuv_arr = DensePoseResult.decode_png_data(*result_encoded) -``` -The array `bbox_xyxy` contains (x0, y0, x1, y1) of the bounding box. - -The shape of `iuv_arr` is `[3, H, W]`, where (H, W) is the shape of the bounding box. -- `iuv_arr[0,:,:]`: The patch index of image points, indicating which of the 24 surface patches the point is on. -- `iuv_arr[1,:,:]`: The U-coordinate value of image points. -- `iuv_arr[2,:,:]`: The V-coordinate value of image points. - - -## Visualization Mode - -The general command form is: -```bash -python apply_net.py show [-h] [-v] [--min_score ] [--nms_thresh ] [--output ] -``` - -There are four mandatory arguments: - - ``, configuration file for a given model; - - ``, model file with trained parameters - - ``, input image file name, pattern or folder - - ``, visualizations specifier; currently available visualizations are: - * `bbox` - bounding boxes of detected persons; - * `dp_segm` - segmentation masks for detected persons; - * `dp_u` - each body part is colored according to the estimated values of the - U coordinate in part parameterization; - * `dp_v` - each body part is colored according to the estimated values of the - V coordinate in part parameterization; - * `dp_contour` - plots contours with color-coded U and V coordinates - - -One can additionally provide the following optional arguments: - - `--min_score` to only show detections with sufficient scores that are not lower than provided value - - `--nms_thresh` to additionally apply non-maximum suppression to detections at a given threshold - - `--output` to define visualization file name template, which defaults to `output.png`. - To distinguish output file names for different images, the tool appends 1-based entry index, - e.g. output.0001.png, output.0002.png, etc... - - -The following examples show how to output results of a DensePose model -with ResNet-50 FPN backbone using different visualizations for image `image.jpg`: - -1. Show bounding box and segmentation: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_segm -v -``` -![Bounding Box + Segmentation Visualization](images/res_bbox_dp_segm.jpg) - -2. Show bounding box and estimated U coordinates for body parts: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_u -v -``` -![Bounding Box + U Coordinate Visualization](images/res_bbox_dp_u.jpg) - -3. Show bounding box and estimated V coordinates for body parts: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg bbox,dp_v -v -``` -![Bounding Box + V Coordinate Visualization](images/res_bbox_dp_v.jpg) - -4. Show bounding box and estimated U and V coordinates via contour plots: -```bash -python apply_net.py show configs/densepose_rcnn_R_50_FPN_s1x.yaml DensePose_ResNet50_FPN_s1x-e2e.pkl image.jpg dp_contour,bbox -v -``` -![Bounding Box + Contour Visualization](images/res_bbox_dp_contour.jpg) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md deleted file mode 100644 index b0a764b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/doc/TOOL_QUERY_DB.md +++ /dev/null @@ -1,105 +0,0 @@ - -# Query Dataset - -`query_db` is a tool to print or visualize DensePose data from a dataset. -It has two modes: `print` and `show` to output dataset entries to standard -output or to visualize them on images. - -## Print Mode - -The general command form is: -```bash -python query_db.py print [-h] [-v] [--max-entries N] -``` - -There are two mandatory arguments: - - ``, DensePose dataset specification, from which to select - the entries (e.g. `densepose_coco_2014_train`). - - ``, dataset entry selector which can be a single specification, - or a comma-separated list of specifications of the form - `field[:type]=value` for exact match with the value - or `field[:type]=min-max` for a range of values - -One can additionally limit the maximum number of entries to output -by providing `--max-entries` argument. - -Examples: - -1. Output at most 10 first entries from the `densepose_coco_2014_train` dataset: -```bash -python query_db.py print densepose_coco_2014_train \* --max-entries 10 -v -``` - -2. Output all entries with `file_name` equal to `COCO_train2014_000000000036.jpg`: -```bash -python query_db.py print densepose_coco_2014_train file_name=COCO_train2014_000000000036.jpg -v -``` - -3. Output all entries with `image_id` between 36 and 156: -```bash -python query_db.py print densepose_coco_2014_train image_id:int=36-156 -v -``` - -## Visualization Mode - -The general command form is: -```bash -python query_db.py show [-h] [-v] [--max-entries N] [--output ] -``` - -There are three mandatory arguments: - - ``, DensePose dataset specification, from which to select - the entries (e.g. `densepose_coco_2014_train`). - - ``, dataset entry selector which can be a single specification, - or a comma-separated list of specifications of the form - `field[:type]=value` for exact match with the value - or `field[:type]=min-max` for a range of values - - ``, visualizations specifier; currently available visualizations are: - * `bbox` - bounding boxes of annotated persons; - * `dp_i` - annotated points colored according to the containing part; - * `dp_pts` - annotated points in green color; - * `dp_segm` - segmentation masks for annotated persons; - * `dp_u` - annotated points colored according to their U coordinate in part parameterization; - * `dp_v` - annotated points colored according to their V coordinate in part parameterization; - -One can additionally provide one of the two optional arguments: - - `--max_entries` to limit the maximum number of entries to visualize - - `--output` to provide visualization file name template, which defaults - to `output.png`. To distinguish file names for different dataset - entries, the tool appends 1-based entry index to the output file name, - e.g. output.0001.png, output.0002.png, etc. - -The following examples show how to output different visualizations for image with `id = 322` -from `densepose_coco_2014_train` dataset: - -1. Show bounding box and segmentation: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v -``` -![Bounding Box + Segmentation Visualization](images/vis_bbox_dp_segm.jpg) - -2. Show bounding box and points colored according to the containing part: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_i -v -``` -![Bounding Box + Point Label Visualization](images/vis_bbox_dp_i.jpg) - -3. Show bounding box and annotated points in green color: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_segm -v -``` -![Bounding Box + Point Visualization](images/vis_bbox_dp_pts.jpg) - -4. Show bounding box and annotated points colored according to their U coordinate in part parameterization: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_u -v -``` -![Bounding Box + Point U Visualization](images/vis_bbox_dp_u.jpg) - -5. Show bounding box and annotated points colored according to their V coordinate in part parameterization: -```bash -python query_db.py show densepose_coco_2014_train image_id:int=322 bbox,dp_v -v -``` -![Bounding Box + Point V Visualization](images/vis_bbox_dp_v.jpg) - - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py deleted file mode 100644 index 6d3ea2f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/query_db.py +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import argparse -import logging -import os -import sys -from timeit import default_timer as timer -from typing import Any, ClassVar, Dict, List -import torch -from fvcore.common.file_io import PathManager - -from detectron2.data.catalog import DatasetCatalog -from detectron2.utils.logger import setup_logger - -from densepose.data.structures import DensePoseDataRelative -from densepose.utils.dbhelper import EntrySelector -from densepose.utils.logger import verbosity_to_level -from densepose.vis.base import CompoundVisualizer -from densepose.vis.bounding_box import BoundingBoxVisualizer -from densepose.vis.densepose import ( - DensePoseDataCoarseSegmentationVisualizer, - DensePoseDataPointsIVisualizer, - DensePoseDataPointsUVisualizer, - DensePoseDataPointsVisualizer, - DensePoseDataPointsVVisualizer, -) - -DOC = """Query DB - a tool to print / visualize data from a database -""" - -LOGGER_NAME = "query_db" - -logger = logging.getLogger(LOGGER_NAME) - -_ACTION_REGISTRY: Dict[str, "Action"] = {} - - -class Action(object): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - parser.add_argument( - "-v", - "--verbosity", - action="count", - help="Verbose mode. Multiple -v options increase the verbosity.", - ) - - -def register_action(cls: type): - """ - Decorator for action classes to automate action registration - """ - global _ACTION_REGISTRY - _ACTION_REGISTRY[cls.COMMAND] = cls - return cls - - -class EntrywiseAction(Action): - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(EntrywiseAction, cls).add_arguments(parser) - parser.add_argument( - "dataset", metavar="", help="Dataset name (e.g. densepose_coco_2014_train)" - ) - parser.add_argument( - "selector", - metavar="", - help="Dataset entry selector in the form field1[:type]=value1[," - "field2[:type]=value_min-value_max...] which selects all " - "entries from the dataset that satisfy the constraints", - ) - parser.add_argument( - "--max-entries", metavar="N", help="Maximum number of entries to process", type=int - ) - - @classmethod - def execute(cls: type, args: argparse.Namespace): - dataset = setup_dataset(args.dataset) - entry_selector = EntrySelector.from_string(args.selector) - context = cls.create_context(args) - if args.max_entries is not None: - for _, entry in zip(range(args.max_entries), dataset): - if entry_selector(entry): - cls.execute_on_entry(entry, context) - else: - for entry in dataset: - if entry_selector(entry): - cls.execute_on_entry(entry, context) - - @classmethod - def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: - context = {} - return context - - -@register_action -class PrintAction(EntrywiseAction): - """ - Print action that outputs selected entries to stdout - """ - - COMMAND: ClassVar[str] = "print" - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Output selected entries to stdout. ") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(PrintAction, cls).add_arguments(parser) - - @classmethod - def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): - import pprint - - printer = pprint.PrettyPrinter(indent=2, width=200, compact=True) - printer.pprint(entry) - - -@register_action -class ShowAction(EntrywiseAction): - """ - Show action that visualizes selected entries on an image - """ - - COMMAND: ClassVar[str] = "show" - VISUALIZERS: ClassVar[Dict[str, object]] = { - "dp_segm": DensePoseDataCoarseSegmentationVisualizer(), - "dp_i": DensePoseDataPointsIVisualizer(), - "dp_u": DensePoseDataPointsUVisualizer(), - "dp_v": DensePoseDataPointsVVisualizer(), - "dp_pts": DensePoseDataPointsVisualizer(), - "bbox": BoundingBoxVisualizer(), - } - - @classmethod - def add_parser(cls: type, subparsers: argparse._SubParsersAction): - parser = subparsers.add_parser(cls.COMMAND, help="Visualize selected entries") - cls.add_arguments(parser) - parser.set_defaults(func=cls.execute) - - @classmethod - def add_arguments(cls: type, parser: argparse.ArgumentParser): - super(ShowAction, cls).add_arguments(parser) - parser.add_argument( - "visualizations", - metavar="", - help="Comma separated list of visualizations, possible values: " - "[{}]".format(",".join(sorted(cls.VISUALIZERS.keys()))), - ) - parser.add_argument( - "--output", - metavar="", - default="output.png", - help="File name to save output to", - ) - - @classmethod - def execute_on_entry(cls: type, entry: Dict[str, Any], context: Dict[str, Any]): - import cv2 - import numpy as np - - image_fpath = PathManager.get_local_path(entry["file_name"]) - image = cv2.imread(image_fpath, cv2.IMREAD_GRAYSCALE) - image = np.tile(image[:, :, np.newaxis], [1, 1, 3]) - datas = cls._extract_data_for_visualizers_from_entry(context["vis_specs"], entry) - visualizer = context["visualizer"] - image_vis = visualizer.visualize(image, datas) - entry_idx = context["entry_idx"] + 1 - out_fname = cls._get_out_fname(entry_idx, context["out_fname"]) - cv2.imwrite(out_fname, image_vis) - logger.info(f"Output saved to {out_fname}") - context["entry_idx"] += 1 - - @classmethod - def _get_out_fname(cls: type, entry_idx: int, fname_base: str): - base, ext = os.path.splitext(fname_base) - return base + ".{0:04d}".format(entry_idx) + ext - - @classmethod - def create_context(cls: type, args: argparse.Namespace) -> Dict[str, Any]: - vis_specs = args.visualizations.split(",") - visualizers = [] - for vis_spec in vis_specs: - vis = cls.VISUALIZERS[vis_spec] - visualizers.append(vis) - context = { - "vis_specs": vis_specs, - "visualizer": CompoundVisualizer(visualizers), - "out_fname": args.output, - "entry_idx": 0, - } - return context - - @classmethod - def _extract_data_for_visualizers_from_entry( - cls: type, vis_specs: List[str], entry: Dict[str, Any] - ): - dp_list = [] - bbox_list = [] - for annotation in entry["annotations"]: - is_valid, _ = DensePoseDataRelative.validate_annotation(annotation) - if not is_valid: - continue - bbox = torch.as_tensor(annotation["bbox"]) - bbox_list.append(bbox) - dp_data = DensePoseDataRelative(annotation) - dp_list.append(dp_data) - datas = [] - for vis_spec in vis_specs: - datas.append(bbox_list if "bbox" == vis_spec else (bbox_list, dp_list)) - return datas - - -def setup_dataset(dataset_name): - logger.info("Loading dataset {}".format(dataset_name)) - start = timer() - dataset = DatasetCatalog.get(dataset_name) - stop = timer() - logger.info("Loaded dataset {} in {:.3f}s".format(dataset_name, stop - start)) - return dataset - - -def create_argument_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser( - description=DOC, - formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=120), - ) - parser.set_defaults(func=lambda _: parser.print_help(sys.stdout)) - subparsers = parser.add_subparsers(title="Actions") - for _, action in _ACTION_REGISTRY.items(): - action.add_parser(subparsers) - return parser - - -def main(): - parser = create_argument_parser() - args = parser.parse_args() - verbosity = args.verbosity if hasattr(args, "verbosity") else None - global logger - logger = setup_logger(name=LOGGER_NAME) - logger.setLevel(verbosity_to_level(verbosity)) - args.func(args) - - -if __name__ == "__main__": - main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py deleted file mode 100644 index 13bf0dd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/common.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import os -import torch - -from detectron2.config import get_cfg -from detectron2.engine import default_setup -from detectron2.modeling import build_model - -from densepose import add_dataset_category_config, add_densepose_config - -_BASE_CONFIG_DIR = "configs" -_EVOLUTION_CONFIG_SUB_DIR = "evolution" -_QUICK_SCHEDULES_CONFIG_SUB_DIR = "quick_schedules" -_BASE_CONFIG_FILE_PREFIX = "Base-" -_CONFIG_FILE_EXT = ".yaml" - - -def _get_base_config_dir(): - """ - Return the base directory for configurations - """ - return os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", _BASE_CONFIG_DIR) - - -def _get_evolution_config_dir(): - """ - Return the base directory for evolution configurations - """ - return os.path.join(_get_base_config_dir(), _EVOLUTION_CONFIG_SUB_DIR) - - -def _get_quick_schedules_config_dir(): - """ - Return the base directory for quick schedules configurations - """ - return os.path.join(_get_base_config_dir(), _QUICK_SCHEDULES_CONFIG_SUB_DIR) - - -def _collect_config_files(config_dir): - """ - Collect all configuration files (i.e. densepose_*.yaml) directly in the specified directory - """ - start = _get_base_config_dir() - results = [] - for entry in os.listdir(config_dir): - path = os.path.join(config_dir, entry) - if not os.path.isfile(path): - continue - _, ext = os.path.splitext(entry) - if ext != _CONFIG_FILE_EXT: - continue - if entry.startswith(_BASE_CONFIG_FILE_PREFIX): - continue - config_file = os.path.relpath(path, start) - results.append(config_file) - return results - - -def get_config_files(): - """ - Get all the configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_base_config_dir()) - - -def get_evolution_config_files(): - """ - Get all the evolution configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_evolution_config_dir()) - - -def get_quick_schedules_config_files(): - """ - Get all the quick schedules configuration files (relative to the base configuration directory) - """ - return _collect_config_files(_get_quick_schedules_config_dir()) - - -def _get_model_config(config_file): - """ - Load and return the configuration from the specified file (relative to the base configuration - directory) - """ - cfg = get_cfg() - add_dataset_category_config(cfg) - add_densepose_config(cfg) - path = os.path.join(_get_base_config_dir(), config_file) - cfg.merge_from_file(path) - if not torch.cuda.is_available(): - cfg.MODEL_DEVICE = "cpu" - return cfg - - -def get_model(config_file): - """ - Get the model from the specified file (relative to the base configuration directory) - """ - cfg = _get_model_config(config_file) - return build_model(cfg) - - -def setup(config_file): - """ - Setup the configuration from the specified file (relative to the base configuration directory) - """ - cfg = _get_model_config(config_file) - cfg.freeze() - default_setup(cfg, {}) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py deleted file mode 100644 index eed1310..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_model_e2e.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import unittest -import torch - -from detectron2.structures import BitMasks, Boxes, Instances - -from .common import get_model - - -# TODO(plabatut): Modularize detectron2 tests and re-use -def make_model_inputs(image, instances=None): - if instances is None: - return {"image": image} - - return {"image": image, "instances": instances} - - -def make_empty_instances(h, w): - instances = Instances((h, w)) - instances.gt_boxes = Boxes(torch.rand(0, 4)) - instances.gt_classes = torch.tensor([]).to(dtype=torch.int64) - instances.gt_masks = BitMasks(torch.rand(0, h, w)) - return instances - - -class ModelE2ETest(unittest.TestCase): - CONFIG_PATH = "" - - def setUp(self): - self.model = get_model(self.CONFIG_PATH) - - def _test_eval(self, sizes): - inputs = [make_model_inputs(torch.rand(3, size[0], size[1])) for size in sizes] - self.model.eval() - self.model(inputs) - - -class DensePoseRCNNE2ETest(ModelE2ETest): - CONFIG_PATH = "densepose_rcnn_R_101_FPN_s1x.yaml" - - def test_empty_data(self): - self._test_eval([(200, 250), (200, 249)]) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py deleted file mode 100644 index 96827f1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_setup.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import unittest - -from .common import ( - get_config_files, - get_evolution_config_files, - get_quick_schedules_config_files, - setup, -) - - -class TestSetup(unittest.TestCase): - def _test_setup(self, config_file): - setup(config_file) - - def test_setup_configs(self): - config_files = get_config_files() - for config_file in config_files: - self._test_setup(config_file) - - def test_setup_evolution_configs(self): - config_files = get_evolution_config_files() - for config_file in config_files: - self._test_setup(config_file) - - def test_setup_quick_schedules_configs(self): - config_files = get_quick_schedules_config_files() - for config_file in config_files: - self._test_setup(config_file) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py deleted file mode 100644 index ad97c23..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/tests/test_structures.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import unittest - -from densepose.data.structures import normalized_coords_transform - - -class TestStructures(unittest.TestCase): - def test_normalized_coords_transform(self): - bbox = (32, 24, 288, 216) - x0, y0, w, h = bbox - xmin, ymin, xmax, ymax = x0, y0, x0 + w, y0 + h - f = normalized_coords_transform(*bbox) - # Top-left - expected_p, actual_p = (-1, -1), f((xmin, ymin)) - self.assertEqual(expected_p, actual_p) - # Top-right - expected_p, actual_p = (1, -1), f((xmax, ymin)) - self.assertEqual(expected_p, actual_p) - # Bottom-left - expected_p, actual_p = (-1, 1), f((xmin, ymax)) - self.assertEqual(expected_p, actual_p) - # Bottom-right - expected_p, actual_p = (1, 1), f((xmax, ymax)) - self.assertEqual(expected_p, actual_p) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py deleted file mode 100644 index 9d2e7bd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/DensePose/train_net.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -DensePose Training Script. - -This script is similar to the training script in detectron2/tools. - -It is an example of how a user might use detectron2 for a new project. -""" - -import logging -import os -from collections import OrderedDict -from fvcore.common.file_io import PathManager - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import CfgNode, get_cfg -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch -from detectron2.evaluation import COCOEvaluator, DatasetEvaluators, verify_results -from detectron2.modeling import DatasetMapperTTA -from detectron2.utils.logger import setup_logger - -from densepose import ( - DensePoseCOCOEvaluator, - DensePoseGeneralizedRCNNWithTTA, - add_dataset_category_config, - add_densepose_config, - load_from_cfg, -) -from densepose.data import DatasetMapper, build_detection_test_loader, build_detection_train_loader - - -class Trainer(DefaultTrainer): - @classmethod - def build_evaluator(cls, cfg: CfgNode, dataset_name, output_folder=None): - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluators = [COCOEvaluator(dataset_name, cfg, True, output_folder)] - if cfg.MODEL.DENSEPOSE_ON: - evaluators.append(DensePoseCOCOEvaluator(dataset_name, True, output_folder)) - return DatasetEvaluators(evaluators) - - @classmethod - def build_test_loader(cls, cfg: CfgNode, dataset_name): - return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False)) - - @classmethod - def build_train_loader(cls, cfg: CfgNode): - return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True)) - - @classmethod - def test_with_TTA(cls, cfg: CfgNode, model): - logger = logging.getLogger("detectron2.trainer") - # In the end of training, run an evaluation with TTA - # Only support some R-CNN models. - logger.info("Running inference with test-time augmentation ...") - transform_data = load_from_cfg(cfg) - model = DensePoseGeneralizedRCNNWithTTA(cfg, model, transform_data, DatasetMapperTTA(cfg)) - evaluators = [ - cls.build_evaluator( - cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") - ) - for name in cfg.DATASETS.TEST - ] - res = cls.test(cfg, model, evaluators) - res = OrderedDict({k + "_TTA": v for k, v in res.items()}) - return res - - -def setup(args): - cfg = get_cfg() - add_dataset_category_config(cfg) - add_densepose_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - # Setup logger for "densepose" module - setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="densepose") - return cfg - - -def main(args): - cfg = setup(args) - # disable strict kwargs checking: allow one to specify path handle - # hints through kwargs, like timeout in DP evaluation - PathManager.set_strict_kwargs_checking(False) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - if cfg.TEST.AUG.ENABLED: - trainer.register_hooks( - [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] - ) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md deleted file mode 100644 index 443736f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/README.md +++ /dev/null @@ -1,135 +0,0 @@ -# PointRend: Image Segmentation as Rendering - -Alexander Kirillov, Yuxin Wu, Kaiming He, Ross Girshick - -[[`arXiv`](https://arxiv.org/abs/1912.08193)] [[`BibTeX`](#CitingPointRend)] - -
- -

- -In this repository, we release code for PointRend in Detectron2. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models. - -## Installation -Install Detectron 2 following [INSTALL.md](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md). You are ready to go! - -## Quick start and visualization - -This [Colab Notebook](https://colab.research.google.com/drive/1isGPL5h5_cKoPPhVL9XhMokRtHDvmMVL) tutorial contains examples of PointRend usage and visualizations of its point sampling stages. - -## Training - -To train a model with 8 GPUs run: -```bash -cd /path/to/detectron2/projects/PointRend -python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -cd /path/to/detectron2/projects/PointRend -python train_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -# Pretrained Models - -## Instance Segmentation -#### COCO - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Mask
head
Backbonelr
sched
Output
resolution
mask
AP
mask
AP*
model iddownload
PointRendR50-FPN224×22436.239.7164254221model | metrics
PointRendR50-FPN224×22438.341.6164955410model | metrics
- -AP* is COCO mask AP evaluated against the higher-quality LVIS annotations; see the paper for details. Run `python detectron2/datasets/prepare_cocofied_lvis.py` to prepare GT files for AP* evaluation. Since LVIS annotations are not exhaustive `lvis-api` and not `cocoapi` should be used to evaluate AP*. - -#### Cityscapes -Cityscapes model is trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - - - -
Mask
head
Backbonelr
sched
Output
resolution
mask
AP
model iddownload
PointRendR50-FPN224×22435.9164255101model | metrics
- - -## Semantic Segmentation - -#### Cityscapes -Cityscapes model is trained with ImageNet pretraining. - - - - - - - - - - - - - - - - - - -
MethodBackboneOutput
resolution
mIoUmodel iddownload
SemanticFPN + PointRendR101-FPN1024×204878.6186480235model | metrics
- -## Citing PointRend - -If you use PointRend, please use the following BibTeX entry. - -```BibTeX -@InProceedings{kirillov2019pointrend, - title={{PointRend}: Image Segmentation as Rendering}, - author={Alexander Kirillov and Yuxin Wu and Kaiming He and Ross Girshick}, - journal={ArXiv:1912.08193}, - year={2019} -} -``` diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml deleted file mode 100644 index d391718..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/Base-PointRend-RCNN-FPN.yaml +++ /dev/null @@ -1,21 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-FPN.yaml" -MODEL: - ROI_HEADS: - NAME: "PointRendROIHeads" - IN_FEATURES: ["p2", "p3", "p4", "p5"] - ROI_BOX_HEAD: - TRAIN_ON_PRED_BOXES: True - ROI_MASK_HEAD: - NAME: "CoarseMaskHead" - FC_DIM: 1024 - NUM_FC: 2 - OUTPUT_SIDE_RESOLUTION: 7 - IN_FEATURES: ["p2"] - POINT_HEAD_ON: True - POINT_HEAD: - FC_DIM: 256 - NUM_FC: 3 - IN_FEATURES: ["p2"] -INPUT: - # PointRend for instance segmenation does not work with "polygon" mask_format. - MASK_FORMAT: "bitmask" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml deleted file mode 100644 index c23dbe1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_cityscapes.yaml +++ /dev/null @@ -1,23 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - MASK_ON: true - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 8 - POINT_HEAD: - NUM_CLASSES: 8 -DATASETS: - TEST: ("cityscapes_fine_instance_seg_val",) - TRAIN: ("cityscapes_fine_instance_seg_train",) -SOLVER: - BASE_LR: 0.01 - IMS_PER_BATCH: 8 - MAX_ITER: 24000 - STEPS: (18000,) -INPUT: - MAX_SIZE_TEST: 2048 - MAX_SIZE_TRAIN: 2048 - MIN_SIZE_TEST: 1024 - MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml deleted file mode 100644 index e9fc573..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_1x_coco.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - MASK_ON: true - RESNETS: - DEPTH: 50 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml deleted file mode 100644 index 2f013f3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_coco.yaml +++ /dev/null @@ -1,13 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - MASK_ON: true - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml deleted file mode 100644 index a4af81d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml +++ /dev/null @@ -1,20 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - MASK_ON: true - RESNETS: - DEPTH: 50 - ROI_HEADS: - NUM_CLASSES: 1 - POINT_HEAD: - NUM_CLASSES: 1 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 - IMS_PER_BATCH: 1 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") -DATASETS: - TRAIN: ("CIHP_train",) - TEST: ("CIHP_val",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml deleted file mode 100644 index 8e52d82..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml +++ /dev/null @@ -1,28 +0,0 @@ -_BASE_: Base-PointRend-RCNN-FPN.yaml -MODEL: - WEIGHTS: "./X-101-32x8d.pkl" - PIXEL_STD: [57.375, 57.120, 58.395] - MASK_ON: true - RESNETS: - STRIDE_IN_1X1: False # this is a C2 model - NUM_GROUPS: 32 - WIDTH_PER_GROUP: 8 - DEPTH: 101 - ROI_HEADS: - NUM_CLASSES: 1 - POINT_HEAD: - NUM_CLASSES: 1 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 - IMS_PER_BATCH: 1 -# To add COCO AP evaluation against the higher-quality LVIS annotations. -# DATASETS: -# TEST: ("coco_2017_val", "lvis_v0.5_val_cocofied") -INPUT: - MIN_SIZE_TRAIN: (640, 864) - MIN_SIZE_TRAIN_SAMPLING: "range" - MAX_SIZE_TRAIN: 1440 -DATASETS: - TRAIN: ("CIHP_train",) - TEST: ("CIHP_val",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml deleted file mode 100644 index 00562a9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/Base-PointRend-Semantic-FPN.yaml +++ /dev/null @@ -1,19 +0,0 @@ -_BASE_: "../../../../configs/Base-RCNN-FPN.yaml" -MODEL: - META_ARCHITECTURE: "SemanticSegmentor" - BACKBONE: - FREEZE_AT: 0 - SEM_SEG_HEAD: - NAME: "PointRendSemSegHead" - POINT_HEAD: - NUM_CLASSES: 54 - FC_DIM: 256 - NUM_FC: 3 - IN_FEATURES: ["p2"] - TRAIN_NUM_POINTS: 1024 - SUBDIVISION_STEPS: 2 - SUBDIVISION_NUM_POINTS: 8192 - COARSE_SEM_SEG_HEAD_NAME: "SemSegFPNHead" -DATASETS: - TRAIN: ("coco_2017_train_panoptic_stuffonly",) - TEST: ("coco_2017_val_panoptic_stuffonly",) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml deleted file mode 100644 index 4965b06..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_101_FPN_1x_cityscapes.yaml +++ /dev/null @@ -1,33 +0,0 @@ -_BASE_: Base-PointRend-Semantic-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-101.pkl - RESNETS: - DEPTH: 101 - SEM_SEG_HEAD: - NUM_CLASSES: 19 - POINT_HEAD: - NUM_CLASSES: 19 - TRAIN_NUM_POINTS: 2048 - SUBDIVISION_NUM_POINTS: 8192 -DATASETS: - TRAIN: ("cityscapes_fine_sem_seg_train",) - TEST: ("cityscapes_fine_sem_seg_val",) -SOLVER: - BASE_LR: 0.01 - STEPS: (40000, 55000) - MAX_ITER: 65000 - IMS_PER_BATCH: 32 -INPUT: - MIN_SIZE_TRAIN: (512, 768, 1024, 1280, 1536, 1792, 2048) - MIN_SIZE_TRAIN_SAMPLING: "choice" - MIN_SIZE_TEST: 1024 - MAX_SIZE_TRAIN: 4096 - MAX_SIZE_TEST: 2048 - CROP: - ENABLED: True - TYPE: "absolute" - SIZE: (512, 1024) - SINGLE_CATEGORY_MAX_AREA: 0.75 - COLOR_AUG_SSD: True -DATALOADER: - NUM_WORKERS: 16 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml deleted file mode 100644 index 7948bd8..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/configs/SemanticSegmentation/pointrend_semantic_R_50_FPN_1x_coco.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: Base-PointRend-Semantic-FPN.yaml -MODEL: - WEIGHTS: detectron2://ImageNetPretrained/MSRA/R-50.pkl - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py deleted file mode 100644 index b99baf9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/finetune_net.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -PointRend Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os -import torch - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - DatasetEvaluators, - LVISEvaluator, - SemSegEvaluator, - verify_results, -) - -from point_rend import SemSegDatasetMapper, add_pointrend_config - -os.environ['CUDA_VISIBLE_DEVICES'] = '4' -# Register Custom Dataset -from detectron2.data.datasets import register_coco_instances -register_coco_instances("CIHP_train", {}, "/data03/v_xuyunqiu/multi_parsing/data/msrcnn_finetune_annotations/CIHP_train.json", "/data03/v_xuyunqiu/data/instance-level_human_parsing/Training/Images") -register_coco_instances("CIHP_val", {}, "/data03/v_xuyunqiu/multi_parsing/data/msrcnn_finetune_annotations/CIHP_val.json", "/data03/v_xuyunqiu/data/instance-level_human_parsing/Validation/Images") - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains a number pre-defined logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if evaluator_type == "coco": - return COCOEvaluator(dataset_name, cfg, True, output_folder) - if evaluator_type == "sem_seg": - return SemSegEvaluator( - dataset_name, - distributed=True, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - output_dir=output_folder, - ) - if evaluator_type == "cityscapes_instance": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesSemSegEvaluator(dataset_name) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE: - mapper = SemSegDatasetMapper(cfg, True) - else: - mapper = None - return build_detection_train_loader(cfg, mapper=mapper) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_pointrend_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/logs/hadoop.kylin.libdfs.log b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/logs/hadoop.kylin.libdfs.log deleted file mode 100644 index e69de29..0000000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py deleted file mode 100644 index 4020fe0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .config import add_pointrend_config -from .coarse_mask_head import CoarseMaskHead -from .roi_heads import PointRendROIHeads -from .dataset_mapper import SemSegDatasetMapper -from .semantic_seg import PointRendSemSegHead diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py deleted file mode 100644 index 3f1cffb..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/coarse_mask_head.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import Conv2d, ShapeSpec -from detectron2.modeling import ROI_MASK_HEAD_REGISTRY - - -@ROI_MASK_HEAD_REGISTRY.register() -class CoarseMaskHead(nn.Module): - """ - A mask head with fully connected layers. Given pooled features it first reduces channels and - spatial dimensions with conv layers and then uses FC layers to predict coarse masks analogously - to the standard box head. - """ - - def __init__(self, cfg, input_shape: ShapeSpec): - """ - The following attributes are parsed from config: - conv_dim: the output dimension of the conv layers - fc_dim: the feature dimenstion of the FC layers - num_fc: the number of FC layers - output_side_resolution: side resolution of the output square mask prediction - """ - super(CoarseMaskHead, self).__init__() - - # fmt: off - self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES - conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM - self.fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM - num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC - self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION - self.input_channels = input_shape.channels - self.input_h = input_shape.height - self.input_w = input_shape.width - # fmt: on - - self.conv_layers = [] - if self.input_channels > conv_dim: - self.reduce_channel_dim_conv = Conv2d( - self.input_channels, - conv_dim, - kernel_size=1, - stride=1, - padding=0, - bias=True, - activation=F.relu, - ) - self.conv_layers.append(self.reduce_channel_dim_conv) - - self.reduce_spatial_dim_conv = Conv2d( - conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation=F.relu - ) - self.conv_layers.append(self.reduce_spatial_dim_conv) - - input_dim = conv_dim * self.input_h * self.input_w - input_dim //= 4 - - self.fcs = [] - for k in range(num_fc): - fc = nn.Linear(input_dim, self.fc_dim) - self.add_module("coarse_mask_fc{}".format(k + 1), fc) - self.fcs.append(fc) - input_dim = self.fc_dim - - output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution - - self.prediction = nn.Linear(self.fc_dim, output_dim) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.prediction.weight, std=0.001) - nn.init.constant_(self.prediction.bias, 0) - - for layer in self.conv_layers: - weight_init.c2_msra_fill(layer) - for layer in self.fcs: - weight_init.c2_xavier_fill(layer) - - def forward(self, x): - # unlike BaseMaskRCNNHead, this head only outputs intermediate - # features, because the features will be used later by PointHead. - N = x.shape[0] - x = x.view(N, self.input_channels, self.input_h, self.input_w) - for layer in self.conv_layers: - x = layer(x) - x = torch.flatten(x, start_dim=1) - for layer in self.fcs: - x = F.relu(layer(x)) - return self.prediction(x).view( - N, self.num_classes, self.output_side_resolution, self.output_side_resolution - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py deleted file mode 100644 index 27344c4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/color_augmentation.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import random -import cv2 -from fvcore.transforms.transform import Transform - - -class ColorAugSSDTransform(Transform): - """ - A color related data augmentation used in Single Shot Multibox Detector (SSD). - - Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, - Scott Reed, Cheng-Yang Fu, Alexander C. Berg. - SSD: Single Shot MultiBox Detector. ECCV 2016. - - Implementation based on: - - https://github.com/weiliu89/caffe/blob - /4817bf8b4200b35ada8ed0dc378dceaf38c539e4 - /src/caffe/util/im_transforms.cpp - - https://github.com/chainer/chainercv/blob - /7159616642e0be7c5b3ef380b848e16b7e99355b/chainercv - /links/model/ssd/transforms.py - """ - - def __init__( - self, - img_format, - brightness_delta=32, - contrast_low=0.5, - contrast_high=1.5, - saturation_low=0.5, - saturation_high=1.5, - hue_delta=18, - ): - super().__init__() - assert img_format in ["BGR", "RGB"] - self.is_rgb = img_format == "RGB" - del img_format - self._set_attributes(locals()) - - def apply_coords(self, coords): - return coords - - def apply_segmentation(self, segmentation): - return segmentation - - def apply_image(self, img, interp=None): - if self.is_rgb: - img = img[:, :, [2, 1, 0]] - img = self.brightness(img) - if random.randrange(2): - img = self.contrast(img) - img = self.saturation(img) - img = self.hue(img) - else: - img = self.saturation(img) - img = self.hue(img) - img = self.contrast(img) - if self.is_rgb: - img = img[:, :, [2, 1, 0]] - return img - - def convert(self, img, alpha=1, beta=0): - img = img.astype(np.float32) * alpha + beta - img = np.clip(img, 0, 255) - return img.astype(np.uint8) - - def brightness(self, img): - if random.randrange(2): - return self.convert( - img, beta=random.uniform(-self.brightness_delta, self.brightness_delta) - ) - return img - - def contrast(self, img): - if random.randrange(2): - return self.convert(img, alpha=random.uniform(self.contrast_low, self.contrast_high)) - return img - - def saturation(self, img): - if random.randrange(2): - img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img[:, :, 1] = self.convert( - img[:, :, 1], alpha=random.uniform(self.saturation_low, self.saturation_high) - ) - return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) - return img - - def hue(self, img): - if random.randrange(2): - img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) - img[:, :, 0] = ( - img[:, :, 0].astype(int) + random.randint(-self.hue_delta, self.hue_delta) - ) % 180 - return cv2.cvtColor(img, cv2.COLOR_HSV2BGR) - return img diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py deleted file mode 100644 index 74f6367..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/config.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from detectron2.config import CfgNode as CN - - -def add_pointrend_config(cfg): - """ - Add config for PointRend. - """ - # We retry random cropping until no single category in semantic segmentation GT occupies more - # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. - cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 - # Color augmentatition from SSD paper for semantic segmentation model during training. - cfg.INPUT.COLOR_AUG_SSD = False - - # Names of the input feature maps to be used by a coarse mask head. - cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES = ("p2",) - cfg.MODEL.ROI_MASK_HEAD.FC_DIM = 1024 - cfg.MODEL.ROI_MASK_HEAD.NUM_FC = 2 - # The side size of a coarse mask head prediction. - cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION = 7 - # True if point head is used. - cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON = False - - cfg.MODEL.POINT_HEAD = CN() - cfg.MODEL.POINT_HEAD.NAME = "StandardPointHead" - cfg.MODEL.POINT_HEAD.NUM_CLASSES = 80 - # Names of the input feature maps to be used by a mask point head. - cfg.MODEL.POINT_HEAD.IN_FEATURES = ("p2",) - # Number of points sampled during training for a mask point head. - cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS = 14 * 14 - # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the - # original paper. - cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO = 3 - # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in - # the original paper. - cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO = 0.75 - # Number of subdivision steps during inference. - cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS = 5 - # Maximum number of points selected at each subdivision step (N). - cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS = 28 * 28 - cfg.MODEL.POINT_HEAD.FC_DIM = 256 - cfg.MODEL.POINT_HEAD.NUM_FC = 3 - cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK = False - # If True, then coarse prediction features are used as inout for each layer in PointRend's MLP. - cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER = True - cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME = "SemSegFPNHead" diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py deleted file mode 100644 index 76b64ee..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/dataset_mapper.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import numpy as np -import torch -from fvcore.common.file_io import PathManager -from fvcore.transforms.transform import CropTransform -from PIL import Image - -from detectron2.data import detection_utils as utils -from detectron2.data import transforms as T - -from .color_augmentation import ColorAugSSDTransform - -""" -This file contains the mapping that's applied to "dataset dicts" for semantic segmentation models. -Unlike the default DatasetMapper this mapper uses cropping as the last transformation. -""" - -__all__ = ["SemSegDatasetMapper"] - - -class SemSegDatasetMapper: - """ - A callable which takes a dataset dict in Detectron2 Dataset format, - and map it into a format used by semantic segmentation models. - - The callable currently does the following: - - 1. Read the image from "file_name" - 2. Applies geometric transforms to the image and annotation - 3. Find and applies suitable cropping to the image and annotation - 4. Prepare image and annotation to Tensors - """ - - def __init__(self, cfg, is_train=True): - if cfg.INPUT.CROP.ENABLED and is_train: - self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE) - logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen)) - else: - self.crop_gen = None - - self.tfm_gens = utils.build_transform_gen(cfg, is_train) - - if cfg.INPUT.COLOR_AUG_SSD: - self.tfm_gens.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) - logging.getLogger(__name__).info( - "Color augmnetation used in training: " + str(self.tfm_gens[-1]) - ) - - # fmt: off - self.img_format = cfg.INPUT.FORMAT - self.single_category_max_area = cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA - self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE - # fmt: on - - self.is_train = is_train - - def __call__(self, dataset_dict): - """ - Args: - dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. - - Returns: - dict: a format that builtin models in detectron2 accept - """ - dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below - image = utils.read_image(dataset_dict["file_name"], format=self.img_format) - utils.check_image_size(dataset_dict, image) - assert "sem_seg_file_name" in dataset_dict - - image, transforms = T.apply_transform_gens(self.tfm_gens, image) - if self.is_train: - with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: - sem_seg_gt = Image.open(f) - sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") - sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) - if self.crop_gen: - image, sem_seg_gt = crop_transform( - image, - sem_seg_gt, - self.crop_gen, - self.single_category_max_area, - self.ignore_value, - ) - dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) - - # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, - # but not efficient on large generic data structures due to the use of pickle & mp.Queue. - # Therefore it's important to use torch.Tensor. - dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) - - if not self.is_train: - dataset_dict.pop("sem_seg_file_name", None) - return dataset_dict - - return dataset_dict - - -def crop_transform(image, sem_seg, crop_gen, single_category_max_area, ignore_value): - """ - Find a cropping window such that no single category occupies more than - `single_category_max_area` in `sem_seg`. The function retries random cropping 10 times max. - """ - if single_category_max_area >= 1.0: - crop_tfm = crop_gen.get_transform(image) - sem_seg_temp = crop_tfm.apply_segmentation(sem_seg) - else: - h, w = sem_seg.shape - crop_size = crop_gen.get_crop_size((h, w)) - for _ in range(10): - y0 = np.random.randint(h - crop_size[0] + 1) - x0 = np.random.randint(w - crop_size[1] + 1) - sem_seg_temp = sem_seg[y0 : y0 + crop_size[0], x0 : x0 + crop_size[1]] - labels, cnt = np.unique(sem_seg_temp, return_counts=True) - cnt = cnt[labels != ignore_value] - if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < single_category_max_area: - break - crop_tfm = CropTransform(x0, y0, crop_size[1], crop_size[0]) - image = crop_tfm.apply_image(image) - return image, sem_seg_temp diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py deleted file mode 100644 index 320a33d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_features.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch -from torch.nn import functional as F - -from detectron2.layers import cat -from detectron2.structures import Boxes - - -""" -Shape shorthand in this module: - - N: minibatch dimension size, i.e. the number of RoIs for instance segmenation or the - number of images for semantic segmenation. - R: number of ROIs, combined over all images, in the minibatch - P: number of points -""" - - -def point_sample(input, point_coords, **kwargs): - """ - A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors. - Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside - [0, 1] x [0, 1] square. - - Args: - input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid. - point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains - [0, 1] x [0, 1] normalized point coordinates. - - Returns: - output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains - features for points in `point_coords`. The features are obtained via bilinear - interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`. - """ - add_dim = False - if point_coords.dim() == 3: - add_dim = True - point_coords = point_coords.unsqueeze(2) - output = F.grid_sample(input, 2.0 * point_coords - 1.0, **kwargs) - if add_dim: - output = output.squeeze(3) - return output - - -def generate_regular_grid_point_coords(R, side_size, device): - """ - Generate regular square grid of points in [0, 1] x [0, 1] coordinate space. - - Args: - R (int): The number of grids to sample, one for each region. - side_size (int): The side size of the regular grid. - device (torch.device): Desired device of returned tensor. - - Returns: - (Tensor): A tensor of shape (R, side_size^2, 2) that contains coordinates - for the regular grids. - """ - aff = torch.tensor([[[0.5, 0, 0.5], [0, 0.5, 0.5]]], device=device) - r = F.affine_grid(aff, torch.Size((1, 1, side_size, side_size)), align_corners=False) - return r.view(1, -1, 2).expand(R, -1, -1) - - -def get_uncertain_point_coords_with_randomness( - coarse_logits, uncertainty_func, num_points, oversample_ratio, importance_sample_ratio -): - """ - Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The unceratinties - are calculated for each point using 'uncertainty_func' function that takes point's logit - prediction as input. - See PointRend paper for details. - - Args: - coarse_logits (Tensor): A tensor of shape (N, C, Hmask, Wmask) or (N, 1, Hmask, Wmask) for - class-specific or class-agnostic prediction. - uncertainty_func: A function that takes a Tensor of shape (N, C, P) or (N, 1, P) that - contains logit predictions for P points and returns their uncertainties as a Tensor of - shape (N, 1, P). - num_points (int): The number of points P to sample. - oversample_ratio (int): Oversampling parameter. - importance_sample_ratio (float): Ratio of points that are sampled via importnace sampling. - - Returns: - point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P - sampled points. - """ - assert oversample_ratio >= 1 - assert importance_sample_ratio <= 1 and importance_sample_ratio >= 0 - num_boxes = coarse_logits.shape[0] - num_sampled = int(num_points * oversample_ratio) - point_coords = torch.rand(num_boxes, num_sampled, 2, device=coarse_logits.device) - point_logits = point_sample(coarse_logits, point_coords, align_corners=False) - # It is crucial to calculate uncertainty based on the sampled prediction value for the points. - # Calculating uncertainties of the coarse predictions first and sampling them for points leads - # to incorrect results. - # To illustrate this: assume uncertainty_func(logits)=-abs(logits), a sampled point between - # two coarse predictions with -1 and 1 logits has 0 logits, and therefore 0 uncertainty value. - # However, if we calculate uncertainties for the coarse predictions first, - # both will have -1 uncertainty, and the sampled point will get -1 uncertainty. - point_uncertainties = uncertainty_func(point_logits) - num_uncertain_points = int(importance_sample_ratio * num_points) - num_random_points = num_points - num_uncertain_points - idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] - shift = num_sampled * torch.arange(num_boxes, dtype=torch.long, device=coarse_logits.device) - idx += shift[:, None] - point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( - num_boxes, num_uncertain_points, 2 - ) - if num_random_points > 0: - point_coords = cat( - [ - point_coords, - torch.rand(num_boxes, num_random_points, 2, device=coarse_logits.device), - ], - dim=1, - ) - return point_coords - - -def get_uncertain_point_coords_on_grid(uncertainty_map, num_points): - """ - Find `num_points` most uncertain points from `uncertainty_map` grid. - - Args: - uncertainty_map (Tensor): A tensor of shape (N, 1, H, W) that contains uncertainty - values for a set of points on a regular H x W grid. - num_points (int): The number of points P to select. - - Returns: - point_indices (Tensor): A tensor of shape (N, P) that contains indices from - [0, H x W) of the most uncertain points. - point_coords (Tensor): A tensor of shape (N, P, 2) that contains [0, 1] x [0, 1] normalized - coordinates of the most uncertain points from the H x W grid. - """ - R, _, H, W = uncertainty_map.shape - h_step = 1.0 / float(H) - w_step = 1.0 / float(W) - - num_points = min(H * W, num_points) - point_indices = torch.topk(uncertainty_map.view(R, H * W), k=num_points, dim=1)[1] - point_coords = torch.zeros(R, num_points, 2, dtype=torch.float, device=uncertainty_map.device) - point_coords[:, :, 0] = w_step / 2.0 + (point_indices % W).to(torch.float) * w_step - point_coords[:, :, 1] = h_step / 2.0 + (point_indices // W).to(torch.float) * h_step - return point_indices, point_coords - - -def point_sample_fine_grained_features(features_list, feature_scales, boxes, point_coords): - """ - Get features from feature maps in `features_list` that correspond to specific point coordinates - inside each bounding box from `boxes`. - - Args: - features_list (list[Tensor]): A list of feature map tensors to get features from. - feature_scales (list[float]): A list of scales for tensors in `features_list`. - boxes (list[Boxes]): A list of I Boxes objects that contain R_1 + ... + R_I = R boxes all - together. - point_coords (Tensor): A tensor of shape (R, P, 2) that contains - [0, 1] x [0, 1] box-normalized coordinates of the P sampled points. - - Returns: - point_features (Tensor): A tensor of shape (R, C, P) that contains features sampled - from all features maps in feature_list for P sampled points for all R boxes in `boxes`. - point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains image-level - coordinates of P points. - """ - cat_boxes = Boxes.cat(boxes) - num_boxes = [len(b) for b in boxes] - - point_coords_wrt_image = get_point_coords_wrt_image(cat_boxes.tensor, point_coords) - split_point_coords_wrt_image = torch.split(point_coords_wrt_image, num_boxes) - - point_features = [] - for idx_img, point_coords_wrt_image_per_image in enumerate(split_point_coords_wrt_image): - point_features_per_image = [] - for idx_feature, feature_map in enumerate(features_list): - h, w = feature_map.shape[-2:] - scale = torch.tensor([w, h], device=feature_map.device) / feature_scales[idx_feature] - point_coords_scaled = point_coords_wrt_image_per_image / scale - point_features_per_image.append( - point_sample( - feature_map[idx_img].unsqueeze(0), - point_coords_scaled.unsqueeze(0), - align_corners=False, - ) - .squeeze(0) - .transpose(1, 0) - ) - point_features.append(cat(point_features_per_image, dim=1)) - - return cat(point_features, dim=0), point_coords_wrt_image - - -def get_point_coords_wrt_image(boxes_coords, point_coords): - """ - Convert box-normalized [0, 1] x [0, 1] point cooordinates to image-level coordinates. - - Args: - boxes_coords (Tensor): A tensor of shape (R, 4) that contains bounding boxes. - coordinates. - point_coords (Tensor): A tensor of shape (R, P, 2) that contains - [0, 1] x [0, 1] box-normalized coordinates of the P sampled points. - - Returns: - point_coords_wrt_image (Tensor): A tensor of shape (R, P, 2) that contains - image-normalized coordinates of P sampled points. - """ - with torch.no_grad(): - point_coords_wrt_image = point_coords.clone() - point_coords_wrt_image[:, :, 0] = point_coords_wrt_image[:, :, 0] * ( - boxes_coords[:, None, 2] - boxes_coords[:, None, 0] - ) - point_coords_wrt_image[:, :, 1] = point_coords_wrt_image[:, :, 1] * ( - boxes_coords[:, None, 3] - boxes_coords[:, None, 1] - ) - point_coords_wrt_image[:, :, 0] += boxes_coords[:, None, 0] - point_coords_wrt_image[:, :, 1] += boxes_coords[:, None, 1] - return point_coords_wrt_image diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py deleted file mode 100644 index 6f35bae..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/point_head.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import fvcore.nn.weight_init as weight_init -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import ShapeSpec, cat -from detectron2.structures import BitMasks -from detectron2.utils.events import get_event_storage -from detectron2.utils.registry import Registry - -from .point_features import point_sample - -POINT_HEAD_REGISTRY = Registry("POINT_HEAD") -POINT_HEAD_REGISTRY.__doc__ = """ -Registry for point heads, which makes prediction for a given set of per-point features. - -The registered object will be called with `obj(cfg, input_shape)`. -""" - - -def roi_mask_point_loss(mask_logits, instances, points_coord): - """ - Compute the point-based loss for instance segmentation mask predictions. - - Args: - mask_logits (Tensor): A tensor of shape (R, C, P) or (R, 1, P) for class-specific or - class-agnostic, where R is the total number of predicted masks in all images, C is the - number of foreground classes, and P is the number of points sampled for each mask. - The values are logits. - instances (list[Instances]): A list of N Instances, where N is the number of images - in the batch. These instances are in 1:1 correspondence with the `mask_logits`. So, i_th - elememt of the list contains R_i objects and R_1 + ... + R_N is equal to R. - The ground-truth labels (class, box, mask, ...) associated with each instance are stored - in fields. - points_coords (Tensor): A tensor of shape (R, P, 2), where R is the total number of - predicted masks and P is the number of points for each mask. The coordinates are in - the image pixel coordinate space, i.e. [0, H] x [0, W]. - Returns: - point_loss (Tensor): A scalar tensor containing the loss. - """ - assert len(instances) == 0 or isinstance( - instances[0].gt_masks, BitMasks - ), "Point head works with GT in 'bitmask' format only. Set INPUT.MASK_FORMAT to 'bitmask'." - with torch.no_grad(): - cls_agnostic_mask = mask_logits.size(1) == 1 - total_num_masks = mask_logits.size(0) - - gt_classes = [] - gt_mask_logits = [] - idx = 0 - for instances_per_image in instances: - if not cls_agnostic_mask: - gt_classes_per_image = instances_per_image.gt_classes.to(dtype=torch.int64) - gt_classes.append(gt_classes_per_image) - - gt_bit_masks = instances_per_image.gt_masks.tensor - h, w = instances_per_image.gt_masks.image_size - scale = torch.tensor([w, h], dtype=torch.float, device=gt_bit_masks.device) - points_coord_grid_sample_format = ( - points_coord[idx : idx + len(instances_per_image)] / scale - ) - idx += len(instances_per_image) - gt_mask_logits.append( - point_sample( - gt_bit_masks.to(torch.float32).unsqueeze(1), - points_coord_grid_sample_format, - align_corners=False, - ).squeeze(1) - ) - gt_mask_logits = cat(gt_mask_logits) - - # torch.mean (in binary_cross_entropy_with_logits) doesn't - # accept empty tensors, so handle it separately - if gt_mask_logits.numel() == 0: - return mask_logits.sum() * 0 - - if cls_agnostic_mask: - mask_logits = mask_logits[:, 0] - else: - indices = torch.arange(total_num_masks) - gt_classes = cat(gt_classes, dim=0) - mask_logits = mask_logits[indices, gt_classes] - - # Log the training accuracy (using gt classes and 0.0 threshold for the logits) - mask_accurate = (mask_logits > 0.0) == gt_mask_logits.to(dtype=torch.uint8) - mask_accuracy = mask_accurate.nonzero().size(0) / mask_accurate.numel() - get_event_storage().put_scalar("point_rend/accuracy", mask_accuracy) - - point_loss = F.binary_cross_entropy_with_logits( - mask_logits, gt_mask_logits.to(dtype=torch.float32), reduction="mean" - ) - return point_loss - - -@POINT_HEAD_REGISTRY.register() -class StandardPointHead(nn.Module): - """ - A point head multi-layer perceptron which we model with conv1d layers with kernel 1. The head - takes both fine-grained and coarse prediction features as its input. - """ - - def __init__(self, cfg, input_shape: ShapeSpec): - """ - The following attributes are parsed from config: - fc_dim: the output dimension of each FC layers - num_fc: the number of FC layers - coarse_pred_each_layer: if True, coarse prediction features are concatenated to each - layer's input - """ - super(StandardPointHead, self).__init__() - # fmt: off - num_classes = cfg.MODEL.POINT_HEAD.NUM_CLASSES - fc_dim = cfg.MODEL.POINT_HEAD.FC_DIM - num_fc = cfg.MODEL.POINT_HEAD.NUM_FC - cls_agnostic_mask = cfg.MODEL.POINT_HEAD.CLS_AGNOSTIC_MASK - self.coarse_pred_each_layer = cfg.MODEL.POINT_HEAD.COARSE_PRED_EACH_LAYER - input_channels = input_shape.channels - # fmt: on - - fc_dim_in = input_channels + num_classes - self.fc_layers = [] - for k in range(num_fc): - fc = nn.Conv1d(fc_dim_in, fc_dim, kernel_size=1, stride=1, padding=0, bias=True) - self.add_module("fc{}".format(k + 1), fc) - self.fc_layers.append(fc) - fc_dim_in = fc_dim - fc_dim_in += num_classes if self.coarse_pred_each_layer else 0 - - num_mask_classes = 1 if cls_agnostic_mask else num_classes - self.predictor = nn.Conv1d(fc_dim_in, num_mask_classes, kernel_size=1, stride=1, padding=0) - - for layer in self.fc_layers: - weight_init.c2_msra_fill(layer) - # use normal distribution initialization for mask prediction layer - nn.init.normal_(self.predictor.weight, std=0.001) - if self.predictor.bias is not None: - nn.init.constant_(self.predictor.bias, 0) - - def forward(self, fine_grained_features, coarse_features): - x = torch.cat((fine_grained_features, coarse_features), dim=1) - for layer in self.fc_layers: - x = F.relu(layer(x)) - if self.coarse_pred_each_layer: - x = cat((x, coarse_features), dim=1) - return self.predictor(x) - - -def build_point_head(cfg, input_channels): - """ - Build a point head defined by `cfg.MODEL.POINT_HEAD.NAME`. - """ - head_name = cfg.MODEL.POINT_HEAD.NAME - return POINT_HEAD_REGISTRY.get(head_name)(cfg, input_channels) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py deleted file mode 100644 index 4f7225b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/roi_heads.py +++ /dev/null @@ -1,227 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import torch - -from detectron2.layers import ShapeSpec, cat, interpolate -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads -from detectron2.modeling.roi_heads.mask_head import ( - build_mask_head, - mask_rcnn_inference, - mask_rcnn_loss, -) -from detectron2.modeling.roi_heads.roi_heads import select_foreground_proposals - -from .point_features import ( - generate_regular_grid_point_coords, - get_uncertain_point_coords_on_grid, - get_uncertain_point_coords_with_randomness, - point_sample, - point_sample_fine_grained_features, -) -from .point_head import build_point_head, roi_mask_point_loss - - -def calculate_uncertainty(logits, classes): - """ - We estimate uncerainty as L1 distance between 0.0 and the logit prediction in 'logits' for the - foreground class in `classes`. - - Args: - logits (Tensor): A tensor of shape (R, C, ...) or (R, 1, ...) for class-specific or - class-agnostic, where R is the total number of predicted masks in all images and C is - the number of foreground classes. The values are logits. - classes (list): A list of length R that contains either predicted of ground truth class - for eash predicted mask. - - Returns: - scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with - the most uncertain locations having the highest uncertainty score. - """ - if logits.shape[1] == 1: - gt_class_logits = logits.clone() - else: - gt_class_logits = logits[ - torch.arange(logits.shape[0], device=logits.device), classes - ].unsqueeze(1) - return -(torch.abs(gt_class_logits)) - - -@ROI_HEADS_REGISTRY.register() -class PointRendROIHeads(StandardROIHeads): - """ - The RoI heads class for PointRend instance segmentation models. - - In this class we redefine the mask head of `StandardROIHeads` leaving all other heads intact. - To avoid namespace conflict with other heads we use names starting from `mask_` for all - variables that correspond to the mask head in the class's namespace. - """ - - def __init__(self, cfg, input_shape): - # TODO use explicit args style - super().__init__(cfg, input_shape) - self._init_mask_head(cfg, input_shape) - - def _init_mask_head(self, cfg, input_shape): - # fmt: off - self.mask_on = cfg.MODEL.MASK_ON - if not self.mask_on: - return - self.mask_coarse_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES - self.mask_coarse_side_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION - self._feature_scales = {k: 1.0 / v.stride for k, v in input_shape.items()} - # fmt: on - - in_channels = np.sum([input_shape[f].channels for f in self.mask_coarse_in_features]) - self.mask_coarse_head = build_mask_head( - cfg, - ShapeSpec( - channels=in_channels, - width=self.mask_coarse_side_size, - height=self.mask_coarse_side_size, - ), - ) - self._init_point_head(cfg, input_shape) - - def _init_point_head(self, cfg, input_shape): - # fmt: off - self.mask_point_on = cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON - if not self.mask_point_on: - return - assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES - self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES - self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS - self.mask_point_oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO - self.mask_point_importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO - # next two parameters are use in the adaptive subdivions inference procedure - self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS - self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS - # fmt: on - - in_channels = np.sum([input_shape[f].channels for f in self.mask_point_in_features]) - self.mask_point_head = build_point_head( - cfg, ShapeSpec(channels=in_channels, width=1, height=1) - ) - - def _forward_mask(self, features, instances): - """ - Forward logic of the mask prediction branch. - - Args: - features (dict[str, Tensor]): #level input features for mask prediction - instances (list[Instances]): the per-image instances to train/predict masks. - In training, they can be the proposals. - In inference, they can be the predicted boxes. - - Returns: - In training, a dict of losses. - In inference, update `instances` with new fields "pred_masks" and return it. - """ - if not self.mask_on: - return {} if self.training else instances - - if self.training: - proposals, _ = select_foreground_proposals(instances, self.num_classes) - proposal_boxes = [x.proposal_boxes for x in proposals] - mask_coarse_logits = self._forward_mask_coarse(features, proposal_boxes) - - losses = {"loss_mask": mask_rcnn_loss(mask_coarse_logits, proposals)} - losses.update(self._forward_mask_point(features, mask_coarse_logits, proposals)) - return losses - else: - pred_boxes = [x.pred_boxes for x in instances] - mask_coarse_logits = self._forward_mask_coarse(features, pred_boxes) - - mask_logits = self._forward_mask_point(features, mask_coarse_logits, instances) - mask_rcnn_inference(mask_logits, instances) - return instances - - def _forward_mask_coarse(self, features, boxes): - """ - Forward logic of the coarse mask head. - """ - point_coords = generate_regular_grid_point_coords( - np.sum(len(x) for x in boxes), self.mask_coarse_side_size, boxes[0].device - ) - mask_coarse_features_list = [features[k] for k in self.mask_coarse_in_features] - features_scales = [self._feature_scales[k] for k in self.mask_coarse_in_features] - # For regular grids of points, this function is equivalent to `len(features_list)' calls - # of `ROIAlign` (with `SAMPLING_RATIO=2`), and concat the results. - mask_features, _ = point_sample_fine_grained_features( - mask_coarse_features_list, features_scales, boxes, point_coords - ) - return self.mask_coarse_head(mask_features) - - def _forward_mask_point(self, features, mask_coarse_logits, instances): - """ - Forward logic of the mask point head. - """ - if not self.mask_point_on: - return {} if self.training else mask_coarse_logits - - mask_features_list = [features[k] for k in self.mask_point_in_features] - features_scales = [self._feature_scales[k] for k in self.mask_point_in_features] - - if self.training: - proposal_boxes = [x.proposal_boxes for x in instances] - gt_classes = cat([x.gt_classes for x in instances]) - with torch.no_grad(): - point_coords = get_uncertain_point_coords_with_randomness( - mask_coarse_logits, - lambda logits: calculate_uncertainty(logits, gt_classes), - self.mask_point_train_num_points, - self.mask_point_oversample_ratio, - self.mask_point_importance_sample_ratio, - ) - - fine_grained_features, point_coords_wrt_image = point_sample_fine_grained_features( - mask_features_list, features_scales, proposal_boxes, point_coords - ) - coarse_features = point_sample(mask_coarse_logits, point_coords, align_corners=False) - point_logits = self.mask_point_head(fine_grained_features, coarse_features) - return { - "loss_mask_point": roi_mask_point_loss( - point_logits, instances, point_coords_wrt_image - ) - } - else: - pred_boxes = [x.pred_boxes for x in instances] - pred_classes = cat([x.pred_classes for x in instances]) - # The subdivision code will fail with the empty list of boxes - if len(pred_classes) == 0: - return mask_coarse_logits - - mask_logits = mask_coarse_logits.clone() - for subdivions_step in range(self.mask_point_subdivision_steps): - mask_logits = interpolate( - mask_logits, scale_factor=2, mode="bilinear", align_corners=False - ) - # If `mask_point_subdivision_num_points` is larger or equal to the - # resolution of the next step, then we can skip this step - H, W = mask_logits.shape[-2:] - if ( - self.mask_point_subdivision_num_points >= 4 * H * W - and subdivions_step < self.mask_point_subdivision_steps - 1 - ): - continue - uncertainty_map = calculate_uncertainty(mask_logits, pred_classes) - point_indices, point_coords = get_uncertain_point_coords_on_grid( - uncertainty_map, self.mask_point_subdivision_num_points - ) - fine_grained_features, _ = point_sample_fine_grained_features( - mask_features_list, features_scales, pred_boxes, point_coords - ) - coarse_features = point_sample( - mask_coarse_logits, point_coords, align_corners=False - ) - point_logits = self.mask_point_head(fine_grained_features, coarse_features) - - # put mask point predictions to the right places on the upsampled grid. - R, C, H, W = mask_logits.shape - point_indices = point_indices.unsqueeze(1).expand(-1, C, -1) - mask_logits = ( - mask_logits.reshape(R, C, H * W) - .scatter_(2, point_indices, point_logits) - .view(R, C, H, W) - ) - return mask_logits diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py deleted file mode 100644 index 670a0ea..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/point_rend/semantic_seg.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -from typing import Dict -import torch -from torch import nn -from torch.nn import functional as F - -from detectron2.layers import ShapeSpec, cat -from detectron2.modeling import SEM_SEG_HEADS_REGISTRY - -from .point_features import ( - get_uncertain_point_coords_on_grid, - get_uncertain_point_coords_with_randomness, - point_sample, -) -from .point_head import build_point_head - - -def calculate_uncertainty(sem_seg_logits): - """ - For each location of the prediction `sem_seg_logits` we estimate uncerainty as the - difference between top first and top second predicted logits. - - Args: - mask_logits (Tensor): A tensor of shape (N, C, ...), where N is the minibatch size and - C is the number of foreground classes. The values are logits. - - Returns: - scores (Tensor): A tensor of shape (N, 1, ...) that contains uncertainty scores with - the most uncertain locations having the highest uncertainty score. - """ - top2_scores = torch.topk(sem_seg_logits, k=2, dim=1)[0] - return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) - - -@SEM_SEG_HEADS_REGISTRY.register() -class PointRendSemSegHead(nn.Module): - """ - A semantic segmentation head that combines a head set in `POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME` - and a point head set in `MODEL.POINT_HEAD.NAME`. - """ - - def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): - super().__init__() - - self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE - - self.coarse_sem_seg_head = SEM_SEG_HEADS_REGISTRY.get( - cfg.MODEL.POINT_HEAD.COARSE_SEM_SEG_HEAD_NAME - )(cfg, input_shape) - self._init_point_head(cfg, input_shape) - - def _init_point_head(self, cfg, input_shape: Dict[str, ShapeSpec]): - # fmt: off - assert cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES - feature_channels = {k: v.channels for k, v in input_shape.items()} - self.in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES - self.train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS - self.oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO - self.importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO - self.subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS - self.subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS - # fmt: on - - in_channels = np.sum([feature_channels[f] for f in self.in_features]) - self.point_head = build_point_head(cfg, ShapeSpec(channels=in_channels, width=1, height=1)) - - def forward(self, features, targets=None): - coarse_sem_seg_logits = self.coarse_sem_seg_head.layers(features) - - if self.training: - losses = self.coarse_sem_seg_head.losses(coarse_sem_seg_logits, targets) - - with torch.no_grad(): - point_coords = get_uncertain_point_coords_with_randomness( - coarse_sem_seg_logits, - calculate_uncertainty, - self.train_num_points, - self.oversample_ratio, - self.importance_sample_ratio, - ) - coarse_features = point_sample(coarse_sem_seg_logits, point_coords, align_corners=False) - - fine_grained_features = cat( - [ - point_sample(features[in_feature], point_coords, align_corners=False) - for in_feature in self.in_features - ] - ) - point_logits = self.point_head(fine_grained_features, coarse_features) - point_targets = ( - point_sample( - targets.unsqueeze(1).to(torch.float), - point_coords, - mode="nearest", - align_corners=False, - ) - .squeeze(1) - .to(torch.long) - ) - losses["loss_sem_seg_point"] = F.cross_entropy( - point_logits, point_targets, reduction="mean", ignore_index=self.ignore_value - ) - return None, losses - else: - sem_seg_logits = coarse_sem_seg_logits.clone() - for _ in range(self.subdivision_steps): - sem_seg_logits = F.interpolate( - sem_seg_logits, scale_factor=2, mode="bilinear", align_corners=False - ) - uncertainty_map = calculate_uncertainty(sem_seg_logits) - point_indices, point_coords = get_uncertain_point_coords_on_grid( - uncertainty_map, self.subdivision_num_points - ) - fine_grained_features = cat( - [ - point_sample(features[in_feature], point_coords, align_corners=False) - for in_feature in self.in_features - ] - ) - coarse_features = point_sample( - coarse_sem_seg_logits, point_coords, align_corners=False - ) - point_logits = self.point_head(fine_grained_features, coarse_features) - - # put sem seg point predictions to the right places on the upsampled grid. - N, C, H, W = sem_seg_logits.shape - point_indices = point_indices.unsqueeze(1).expand(-1, C, -1) - sem_seg_logits = ( - sem_seg_logits.reshape(N, C, H * W) - .scatter_(2, point_indices, point_logits) - .view(N, C, H, W) - ) - return sem_seg_logits, {} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh deleted file mode 100644 index 4ee1614..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/run.sh +++ /dev/null @@ -1,2 +0,0 @@ -python finetune_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_parsing.yaml --num-gpus 1 -#python finetune_net.py --config-file configs/InstanceSegmentation/pointrend_rcnn_R_50_FPN_3x_parsing.yaml --num-gpus 1 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py deleted file mode 100644 index 7832867..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/PointRend/train_net.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -PointRend Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os -import torch - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog, build_detection_train_loader -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - DatasetEvaluators, - LVISEvaluator, - SemSegEvaluator, - verify_results, -) - -from point_rend import SemSegDatasetMapper, add_pointrend_config - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains a number pre-defined logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if evaluator_type == "coco": - return COCOEvaluator(dataset_name, cfg, True, output_folder) - if evaluator_type == "sem_seg": - return SemSegEvaluator( - dataset_name, - distributed=True, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - output_dir=output_folder, - ) - if evaluator_type == "cityscapes_instance": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesSemSegEvaluator(dataset_name) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def build_train_loader(cls, cfg): - if "SemanticSegmentor" in cfg.MODEL.META_ARCHITECTURE: - mapper = SemSegDatasetMapper(cfg, True) - else: - mapper = None - return build_detection_train_loader(cfg, mapper=mapper) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_pointrend_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md deleted file mode 100644 index 36263bd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/README.md +++ /dev/null @@ -1,31 +0,0 @@ - -Here are a few projects that are built on detectron2. -They are examples of how to use detectron2 as a library, to make your projects more -maintainable. - -## Projects by Facebook - -Note that these are research projects, and therefore may not have the same level -of support or stability of detectron2. - -+ [DensePose: Dense Human Pose Estimation In The Wild](DensePose) -+ [Scale-Aware Trident Networks for Object Detection](TridentNet) -+ [TensorMask: A Foundation for Dense Object Segmentation](TensorMask) -+ [Mesh R-CNN](https://github.com/facebookresearch/meshrcnn) -+ [PointRend: Image Segmentation as Rendering](PointRend) -+ [Momentum Contrast for Unsupervised Visual Representation Learning](https://github.com/facebookresearch/moco/tree/master/detection) - - -## External Projects - -External projects in the community that use detectron2: - - - -+ [VoVNet backbones](https://github.com/youngwanLEE/vovnet-detectron2). -+ [AdelaiDet](https://github.com/aim-uofa/adet), a detection toolbox from the Universtiy of Adelaide. -+ [CenterMask : Real-Time Anchor-Free Instance Segmentation](https://github.com/youngwanLEE/centermask2) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md deleted file mode 100644 index 6831508..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/README.md +++ /dev/null @@ -1,64 +0,0 @@ - -# TensorMask in Detectron2 -**A Foundation for Dense Object Segmentation** - -Xinlei Chen, Ross Girshick, Kaiming He, Piotr Dollár - -[[`arXiv`](https://arxiv.org/abs/1903.12174)] [[`BibTeX`](#CitingTensorMask)] - -
- -
- -In this repository, we release code for TensorMask in Detectron2. -TensorMask is a dense sliding-window instance segmentation framework that, for the first time, achieves results close to the well-developed Mask R-CNN framework -- both qualitatively and quantitatively. It establishes a conceptually complementary direction for object instance segmentation research. - -## Installation -First install Detectron2 following the [documentation](https://detectron2.readthedocs.io/tutorials/install.html) and -[setup the dataset](../../datasets). Then compile the TensorMask-specific op (`swap_align2nat`): -```bash -cd /path/to/detectron2/projects/TensorMask -python setup.py build develop -``` - -## Training - -To train a model, run: -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file -``` - -For example, to launch TensorMask BiPyramid training (1x schedule) with ResNet-50 backbone on 8 GPUs, -one should execute: -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_1x.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly (6x schedule with scale augmentation): -```bash -python /path/to/detectron2/projects/TensorMask/train_net.py --config-file configs/tensormask_R_50_FPN_6x.yaml --eval-only MODEL.WEIGHTS /path/to/model_checkpoint -``` - -# Pretrained Models - -| Backbone | lr sched | AP box | AP mask | download | -| -------- | -------- | -- | --- | -------- | -| R50 | 1x | 37.6 | 32.4 | model \|  metrics | -| R50 | 6x | 41.4 | 35.8 | model \|  metrics | - - -## Citing TensorMask - -If you use TensorMask, please use the following BibTeX entry. - -``` -@InProceedings{chen2019tensormask, - title={Tensormask: A Foundation for Dense Object Segmentation}, - author={Chen, Xinlei and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, - journal={The International Conference on Computer Vision (ICCV)}, - year={2019} -} -``` - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml deleted file mode 100644 index a724534..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/Base-TensorMask.yaml +++ /dev/null @@ -1,25 +0,0 @@ -MODEL: - META_ARCHITECTURE: "TensorMask" - MASK_ON: True - BACKBONE: - NAME: "build_retinanet_resnet_fpn_backbone" - RESNETS: - OUT_FEATURES: ["res2", "res3", "res4", "res5"] - ANCHOR_GENERATOR: - SIZES: [[44, 60], [88, 120], [176, 240], [352, 480], [704, 960], [1408, 1920]] - ASPECT_RATIOS: [[1.0]] - FPN: - IN_FEATURES: ["res2", "res3", "res4", "res5"] - FUSE_TYPE: "avg" - TENSOR_MASK: - ALIGNED_ON: True - BIPYRAMID_ON: True -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml deleted file mode 100644 index 5d5eee1..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_1x.yaml +++ /dev/null @@ -1,5 +0,0 @@ -_BASE_: "Base-TensorMask.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml deleted file mode 100644 index 366a965..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/configs/tensormask_R_50_FPN_6x.yaml +++ /dev/null @@ -1,11 +0,0 @@ -_BASE_: "Base-TensorMask.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (480000, 520000) - MAX_ITER: 540000 -INPUT: - MIN_SIZE_TRAIN_SAMPLING: "range" - MIN_SIZE_TRAIN: (640, 800) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py deleted file mode 100644 index 0194e76..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/setup.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import glob -import os -from setuptools import find_packages, setup -import torch -from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension - - -def get_extensions(): - this_dir = os.path.dirname(os.path.abspath(__file__)) - extensions_dir = os.path.join(this_dir, "tensormask", "layers", "csrc") - - main_source = os.path.join(extensions_dir, "vision.cpp") - sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) - source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( - os.path.join(extensions_dir, "*.cu") - ) - - sources = [main_source] + sources - - extension = CppExtension - - extra_compile_args = {"cxx": []} - define_macros = [] - - if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": - extension = CUDAExtension - sources += source_cuda - define_macros += [("WITH_CUDA", None)] - extra_compile_args["nvcc"] = [ - "-DCUDA_HAS_FP16=1", - "-D__CUDA_NO_HALF_OPERATORS__", - "-D__CUDA_NO_HALF_CONVERSIONS__", - "-D__CUDA_NO_HALF2_OPERATORS__", - ] - - # It's better if pytorch can do this by default .. - CC = os.environ.get("CC", None) - if CC is not None: - extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) - - sources = [os.path.join(extensions_dir, s) for s in sources] - - include_dirs = [extensions_dir] - - ext_modules = [ - extension( - "tensormask._C", - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -setup( - name="tensormask", - version="0.1", - author="FAIR", - packages=find_packages(exclude=("configs", "tests")), - python_requires=">=3.6", - ext_modules=get_extensions(), - cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, -) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py deleted file mode 100644 index e3b642a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .config import add_tensormask_config -from .arch import TensorMask diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py deleted file mode 100644 index a3e89c6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/arch.py +++ /dev/null @@ -1,904 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import copy -import logging -import math -from typing import List -import torch -import torch.nn.functional as F -from fvcore.nn import sigmoid_focal_loss_star_jit, smooth_l1_loss -from torch import nn - -from detectron2.layers import ShapeSpec, batched_nms, cat, paste_masks_in_image -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator -from detectron2.modeling.backbone import build_backbone -from detectron2.modeling.box_regression import Box2BoxTransform -from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY -from detectron2.modeling.meta_arch.retinanet import ( - permute_all_cls_and_box_to_N_HWA_K_and_concat, - permute_to_N_HWA_K, -) -from detectron2.structures import Boxes, ImageList, Instances -from detectron2.utils.logger import log_first_n - -from tensormask.layers import SwapAlign2Nat - -__all__ = ["TensorMask"] - - -def _assignment_rule( - gt_boxes, - anchor_boxes, - unit_lengths, - min_anchor_size, - scale_thresh=2.0, - spatial_thresh=1.0, - uniqueness_on=True, -): - """ - Given two lists of boxes of N ground truth boxes and M anchor boxes, - compute the assignment between the two, following the assignment rules in - https://arxiv.org/abs/1903.12174. - The box order must be (xmin, ymin, xmax, ymax), so please make sure to convert - to BoxMode.XYXY_ABS before calling this function. - - Args: - gt_boxes, anchor_boxes (Boxes): two Boxes. Contains N & M boxes/anchors, respectively. - unit_lengths (Tensor): Contains the unit lengths of M anchor boxes. - min_anchor_size (float): Minimum size of the anchor, in pixels - scale_thresh (float): The `scale` threshold: the maximum size of the anchor - should not be greater than scale_thresh x max(h, w) of - the ground truth box. - spatial_thresh (float): The `spatial` threshold: the l2 distance between the - center of the anchor and the ground truth box should not - be greater than spatial_thresh x u where u is the unit length. - - Returns: - matches (Tensor[int64]): a vector of length M, where matches[i] is a matched - ground-truth index in [0, N) - match_labels (Tensor[int8]): a vector of length M, where pred_labels[i] indicates - whether a prediction is a true or false positive or ignored - """ - gt_boxes, anchor_boxes = gt_boxes.tensor, anchor_boxes.tensor - N = gt_boxes.shape[0] - M = anchor_boxes.shape[0] - if N == 0 or M == 0: - return ( - gt_boxes.new_full((N,), 0, dtype=torch.int64), - gt_boxes.new_full((N,), -1, dtype=torch.int8), - ) - - # Containment rule - lt = torch.min(gt_boxes[:, None, :2], anchor_boxes[:, :2]) # [N,M,2] - rb = torch.max(gt_boxes[:, None, 2:], anchor_boxes[:, 2:]) # [N,M,2] - union = cat([lt, rb], dim=2) # [N,M,4] - - dummy_gt_boxes = torch.zeros_like(gt_boxes) - anchor = dummy_gt_boxes[:, None, :] + anchor_boxes[:, :] # [N,M,4] - - contain_matrix = torch.all(union == anchor, dim=2) # [N,M] - - # Centrality rule, scale - gt_size_lower = torch.max(gt_boxes[:, 2:] - gt_boxes[:, :2], dim=1)[0] # [N] - gt_size_upper = gt_size_lower * scale_thresh # [N] - # Fall back for small objects - gt_size_upper[gt_size_upper < min_anchor_size] = min_anchor_size - # Due to sampling of locations, the anchor sizes are deducted with sampling strides - anchor_size = ( - torch.max(anchor_boxes[:, 2:] - anchor_boxes[:, :2], dim=1)[0] - unit_lengths - ) # [M] - - size_diff_upper = gt_size_upper[:, None] - anchor_size # [N,M] - scale_matrix = size_diff_upper >= 0 # [N,M] - - # Centrality rule, spatial - gt_center = (gt_boxes[:, 2:] + gt_boxes[:, :2]) / 2 # [N,2] - anchor_center = (anchor_boxes[:, 2:] + anchor_boxes[:, :2]) / 2 # [M,2] - offset_center = gt_center[:, None, :] - anchor_center[:, :] # [N,M,2] - offset_center /= unit_lengths[:, None] # [N,M,2] - spatial_square = spatial_thresh * spatial_thresh - spatial_matrix = torch.sum(offset_center * offset_center, dim=2) <= spatial_square - - assign_matrix = (contain_matrix & scale_matrix & spatial_matrix).int() - - # assign_matrix is N (gt) x M (predicted) - # Max over gt elements (dim 0) to find best gt candidate for each prediction - matched_vals, matches = assign_matrix.max(dim=0) - match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) - - match_labels[matched_vals == 0] = 0 - match_labels[matched_vals == 1] = 1 - - # find all the elements that match to ground truths multiple times - not_unique_idxs = assign_matrix.sum(dim=0) > 1 - if uniqueness_on: - match_labels[not_unique_idxs] = 0 - else: - match_labels[not_unique_idxs] = -1 - - return matches, match_labels - - -# TODO make the paste_mask function in d2 core support mask list -def _paste_mask_lists_in_image(masks, boxes, image_shape, threshold=0.5): - """ - Paste a list of masks that are of various resolutions (e.g., 28 x 28) into an image. - The location, height, and width for pasting each mask is determined by their - corresponding bounding boxes in boxes. - - Args: - masks (list(Tensor)): A list of Tensor of shape (1, Hmask_i, Wmask_i). - Values are in [0, 1]. The list length, Bimg, is the - number of detected object instances in the image. - boxes (Boxes): A Boxes of length Bimg. boxes.tensor[i] and masks[i] correspond - to the same object instance. - image_shape (tuple): height, width - threshold (float): A threshold in [0, 1] for converting the (soft) masks to - binary masks. - - Returns: - img_masks (Tensor): A tensor of shape (Bimg, Himage, Wimage), where Bimg is the - number of detected object instances and Himage, Wimage are the image width - and height. img_masks[i] is a binary mask for object instance i. - """ - if len(masks) == 0: - return torch.empty((0, 1) + image_shape, dtype=torch.uint8) - - # Loop over masks groups. Each group has the same mask prediction size. - img_masks = [] - ind_masks = [] - mask_sizes = torch.tensor([m.shape[-1] for m in masks]) - unique_sizes = torch.unique(mask_sizes) - for msize in unique_sizes.tolist(): - cur_ind = torch.where(mask_sizes == msize)[0] - ind_masks.append(cur_ind) - - cur_masks = cat([masks[i] for i in cur_ind]) - cur_boxes = boxes[cur_ind] - img_masks.append(paste_masks_in_image(cur_masks, cur_boxes, image_shape, threshold)) - - img_masks = cat(img_masks) - ind_masks = cat(ind_masks) - - img_masks_out = torch.empty_like(img_masks) - img_masks_out[ind_masks, :, :] = img_masks - - return img_masks_out - - -def _postprocess(results, result_mask_info, output_height, output_width, mask_threshold=0.5): - """ - Post-process the output boxes for TensorMask. - The input images are often resized when entering an object detector. - As a result, we often need the outputs of the detector in a different - resolution from its inputs. - - This function will postprocess the raw outputs of TensorMask - to produce outputs according to the desired output resolution. - - Args: - results (Instances): the raw outputs from the detector. - `results.image_size` contains the input image resolution the detector sees. - This object might be modified in-place. Note that it does not contain the field - `pred_masks`, which is provided by another input `result_masks`. - result_mask_info (list[Tensor], Boxes): a pair of two items for mask related results. - The first item is a list of #detection tensors, each is the predicted masks. - The second item is the anchors corresponding to the predicted masks. - output_height, output_width: the desired output resolution. - - Returns: - Instances: the postprocessed output from the model, based on the output resolution - """ - scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) - results = Instances((output_height, output_width), **results.get_fields()) - - output_boxes = results.pred_boxes - output_boxes.tensor[:, 0::2] *= scale_x - output_boxes.tensor[:, 1::2] *= scale_y - output_boxes.clip(results.image_size) - - inds_nonempty = output_boxes.nonempty() - results = results[inds_nonempty] - result_masks, result_anchors = result_mask_info - if result_masks: - result_anchors.tensor[:, 0::2] *= scale_x - result_anchors.tensor[:, 1::2] *= scale_y - result_masks = [x for (i, x) in zip(inds_nonempty.tolist(), result_masks) if i] - results.pred_masks = _paste_mask_lists_in_image( - result_masks, - result_anchors[inds_nonempty], - results.image_size, - threshold=mask_threshold, - ) - return results - - -class TensorMaskAnchorGenerator(DefaultAnchorGenerator): - """ - For a set of image sizes and feature maps, computes a set of anchors for TensorMask. - It also computes the unit lengths and indexes for each anchor box. - """ - - def grid_anchors_with_unit_lengths_and_indexes(self, grid_sizes): - anchors = [] - unit_lengths = [] - indexes = [] - for lvl, (size, stride, base_anchors) in enumerate( - zip(grid_sizes, self.strides, self.cell_anchors) - ): - grid_height, grid_width = size - device = base_anchors.device - shifts_x = torch.arange( - 0, grid_width * stride, step=stride, dtype=torch.float32, device=device - ) - shifts_y = torch.arange( - 0, grid_height * stride, step=stride, dtype=torch.float32, device=device - ) - shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) - shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=2) - # Stack anchors in shapes of (HWA, 4) - cur_anchor = (shifts[:, :, None, :] + base_anchors.view(1, 1, -1, 4)).view(-1, 4) - anchors.append(cur_anchor) - unit_lengths.append( - torch.full((cur_anchor.shape[0],), stride, dtype=torch.float32, device=device) - ) - # create mask indexes using mesh grid - shifts_l = torch.full((1,), lvl, dtype=torch.int64, device=device) - shifts_i = torch.zeros((1,), dtype=torch.int64, device=device) - shifts_h = torch.arange(0, grid_height, dtype=torch.int64, device=device) - shifts_w = torch.arange(0, grid_width, dtype=torch.int64, device=device) - shifts_a = torch.arange(0, base_anchors.shape[0], dtype=torch.int64, device=device) - grids = torch.meshgrid(shifts_l, shifts_i, shifts_h, shifts_w, shifts_a) - - indexes.append(torch.stack(grids, dim=5).view(-1, 5)) - - return anchors, unit_lengths, indexes - - def forward(self, features): - """ - Returns: - list[list[Boxes]]: a list of #image elements. Each is a list of #feature level Boxes. - The Boxes contains anchors of this image on the specific feature level. - list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors. - The tensor contains strides, or unit lengths for the anchors. - list[list[Tensor]]: a list of #image elements. Each is a list of #feature level tensors. - The Tensor contains indexes for the anchors, with the last dimension meaning - (L, N, H, W, A), where L is level, I is image (not set yet), H is height, - W is width, and A is anchor. - """ - num_images = len(features[0]) - grid_sizes = [feature_map.shape[-2:] for feature_map in features] - anchors_list, lengths_list, indexes_list = self.grid_anchors_with_unit_lengths_and_indexes( - grid_sizes - ) - - # Convert anchors from Tensor to Boxes - anchors_per_im = [Boxes(x) for x in anchors_list] - - # TODO it can be simplified to not return duplicated information for - # each image, just like detectron2's own AnchorGenerator - anchors = [copy.deepcopy(anchors_per_im) for _ in range(num_images)] - unit_lengths = [copy.deepcopy(lengths_list) for _ in range(num_images)] - indexes = [copy.deepcopy(indexes_list) for _ in range(num_images)] - - return anchors, unit_lengths, indexes - - -@META_ARCH_REGISTRY.register() -class TensorMask(nn.Module): - """ - TensorMask model. Creates FPN backbone, anchors and a head for classification - and box regression. Calculates and applies proper losses to class, box, and - masks. - """ - - def __init__(self, cfg): - super().__init__() - - # fmt: off - self.num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES - self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES - self.anchor_sizes = cfg.MODEL.ANCHOR_GENERATOR.SIZES - self.num_levels = len(cfg.MODEL.ANCHOR_GENERATOR.SIZES) - # Loss parameters: - self.focal_loss_alpha = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA - self.focal_loss_gamma = cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA - # Inference parameters: - self.score_threshold = cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST - self.topk_candidates = cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST - self.nms_threshold = cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST - self.detections_im = cfg.TEST.DETECTIONS_PER_IMAGE - # Mask parameters: - self.mask_on = cfg.MODEL.MASK_ON - self.mask_loss_weight = cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT - self.mask_pos_weight = torch.tensor(cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT, - dtype=torch.float32) - self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON - # fmt: on - - # build the backbone - self.backbone = build_backbone(cfg) - - backbone_shape = self.backbone.output_shape() - feature_shapes = [backbone_shape[f] for f in self.in_features] - feature_strides = [x.stride for x in feature_shapes] - # build anchors - self.anchor_generator = TensorMaskAnchorGenerator(cfg, feature_shapes) - self.num_anchors = self.anchor_generator.num_cell_anchors[0] - anchors_min_level = cfg.MODEL.ANCHOR_GENERATOR.SIZES[0] - self.mask_sizes = [size // feature_strides[0] for size in anchors_min_level] - self.min_anchor_size = min(anchors_min_level) - feature_strides[0] - - # head of the TensorMask - self.head = TensorMaskHead( - cfg, self.num_levels, self.num_anchors, self.mask_sizes, feature_shapes - ) - # box transform - self.box2box_transform = Box2BoxTransform(weights=cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS) - self.register_buffer("pixel_mean", torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(-1, 1, 1)) - self.register_buffer("pixel_std", torch.Tensor(cfg.MODEL.PIXEL_STD).view(-1, 1, 1)) - - @property - def device(self): - return self.pixel_mean.device - - def forward(self, batched_inputs): - """ - Args: - batched_inputs: a list, batched outputs of :class:`DetectionTransform` . - Each item in the list contains the inputs for one image. - For now, each item in the list is a dict that contains: - image: Tensor, image in (C, H, W) format. - instances: Instances - Other information that's included in the original dicts, such as: - "height", "width" (int): the output resolution of the model, used in inference. - See :meth:`postprocess` for details. - Returns: - losses (dict[str: Tensor]): mapping from a named loss to a tensor - storing the loss. Used during training only. - """ - images = self.preprocess_image(batched_inputs) - if "instances" in batched_inputs[0]: - gt_instances = [x["instances"].to(self.device) for x in batched_inputs] - elif "targets" in batched_inputs[0]: - log_first_n( - logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10 - ) - gt_instances = [x["targets"].to(self.device) for x in batched_inputs] - else: - gt_instances = None - - features = self.backbone(images.tensor) - features = [features[f] for f in self.in_features] - # apply the TensorMask head - pred_logits, pred_deltas, pred_masks = self.head(features) - # generate anchors based on features, is it image specific? - anchors, unit_lengths, indexes = self.anchor_generator(features) - - if self.training: - # get ground truths for class labels and box targets, it will label each anchor - gt_class_info, gt_delta_info, gt_mask_info, num_fg = self.get_ground_truth( - anchors, unit_lengths, indexes, gt_instances - ) - # compute the loss - return self.losses( - gt_class_info, - gt_delta_info, - gt_mask_info, - num_fg, - pred_logits, - pred_deltas, - pred_masks, - ) - else: - # do inference to get the output - results = self.inference(pred_logits, pred_deltas, pred_masks, anchors, indexes, images) - processed_results = [] - for results_im, input_im, image_size in zip( - results, batched_inputs, images.image_sizes - ): - height = input_im.get("height", image_size[0]) - width = input_im.get("width", image_size[1]) - # this is to do post-processing with the image size - result_box, result_mask = results_im - r = _postprocess(result_box, result_mask, height, width) - processed_results.append({"instances": r}) - return processed_results - - def losses( - self, - gt_class_info, - gt_delta_info, - gt_mask_info, - num_fg, - pred_logits, - pred_deltas, - pred_masks, - ): - """ - Args: - For `gt_class_info`, `gt_delta_info`, `gt_mask_info` and `num_fg` parameters, see - :meth:`TensorMask.get_ground_truth`. - For `pred_logits`, `pred_deltas` and `pred_masks`, see - :meth:`TensorMaskHead.forward`. - - Returns: - losses (dict[str: Tensor]): mapping from a named loss to a scalar tensor - storing the loss. Used during training only. The potential dict keys are: - "loss_cls", "loss_box_reg" and "loss_mask". - """ - gt_classes_target, gt_valid_inds = gt_class_info - gt_deltas, gt_fg_inds = gt_delta_info - gt_masks, gt_mask_inds = gt_mask_info - loss_normalizer = torch.tensor(max(1, num_fg), dtype=torch.float32, device=self.device) - - # classification and regression - pred_logits, pred_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( - pred_logits, pred_deltas, self.num_classes - ) - loss_cls = ( - sigmoid_focal_loss_star_jit( - pred_logits[gt_valid_inds], - gt_classes_target[gt_valid_inds], - alpha=self.focal_loss_alpha, - gamma=self.focal_loss_gamma, - reduction="sum", - ) - / loss_normalizer - ) - - if num_fg == 0: - loss_box_reg = pred_deltas.sum() * 0 - else: - loss_box_reg = ( - smooth_l1_loss(pred_deltas[gt_fg_inds], gt_deltas, beta=0.0, reduction="sum") - / loss_normalizer - ) - losses = {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg} - - # mask prediction - if self.mask_on: - loss_mask = 0 - for lvl in range(self.num_levels): - cur_level_factor = 2 ** lvl if self.bipyramid_on else 1 - for anc in range(self.num_anchors): - cur_gt_mask_inds = gt_mask_inds[lvl][anc] - if cur_gt_mask_inds is None: - loss_mask += pred_masks[lvl][anc][0, 0, 0, 0] * 0 - else: - cur_mask_size = self.mask_sizes[anc] * cur_level_factor - # TODO maybe there are numerical issues when mask sizes are large - cur_size_divider = torch.tensor( - self.mask_loss_weight / (cur_mask_size ** 2), - dtype=torch.float32, - device=self.device, - ) - - cur_pred_masks = pred_masks[lvl][anc][ - cur_gt_mask_inds[:, 0], # N - :, # V x U - cur_gt_mask_inds[:, 1], # H - cur_gt_mask_inds[:, 2], # W - ] - - loss_mask += F.binary_cross_entropy_with_logits( - cur_pred_masks.view(-1, cur_mask_size, cur_mask_size), # V, U - gt_masks[lvl][anc].to(dtype=torch.float32), - reduction="sum", - weight=cur_size_divider, - pos_weight=self.mask_pos_weight, - ) - losses["loss_mask"] = loss_mask / loss_normalizer - return losses - - @torch.no_grad() - def get_ground_truth(self, anchors, unit_lengths, indexes, targets): - """ - Args: - anchors (list[list[Boxes]]): a list of N=#image elements. Each is a - list of #feature level Boxes. The Boxes contains anchors of - this image on the specific feature level. - unit_lengths (list[list[Tensor]]): a list of N=#image elements. Each is a - list of #feature level Tensor. The tensor contains unit lengths for anchors of - this image on the specific feature level. - indexes (list[list[Tensor]]): a list of N=#image elements. Each is a - list of #feature level Tensor. The tensor contains the 5D index of - each anchor, the second dimension means (L, N, H, W, A), where L - is level, I is image, H is height, W is width, and A is anchor. - targets (list[Instances]): a list of N `Instances`s. The i-th - `Instances` contains the ground-truth per-instance annotations - for the i-th input image. Specify `targets` during training only. - - Returns: - gt_class_info (Tensor, Tensor): A pair of two tensors for classification. - The first one is an integer tensor of shape (R, #classes) storing ground-truth - labels for each anchor. R is the total number of anchors in the batch. - The second one is an integer tensor of shape (R,), to indicate which - anchors are valid for loss computation, which anchors are not. - gt_delta_info (Tensor, Tensor): A pair of two tensors for boxes. - The first one, of shape (F, 4). F=#foreground anchors. - The last dimension represents ground-truth box2box transform - targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. - Only foreground anchors have values in this tensor. Could be `None` if F=0. - The second one, of shape (R,), is an integer tensor indicating which anchors - are foreground ones used for box regression. Could be `None` if F=0. - gt_mask_info (list[list[Tensor]], list[list[Tensor]]): A pair of two lists for masks. - The first one is a list of P=#feature level elements. Each is a - list of A=#anchor tensors. Each tensor contains the ground truth - masks of the same size and for the same feature level. Could be `None`. - The second one is a list of P=#feature level elements. Each is a - list of A=#anchor tensors. Each tensor contains the location of the ground truth - masks of the same size and for the same feature level. The second dimension means - (N, H, W), where N is image, H is height, and W is width. Could be `None`. - num_fg (int): F=#foreground anchors, used later for loss normalization. - """ - gt_classes = [] - gt_deltas = [] - gt_masks = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)] - gt_mask_inds = [[[] for _ in range(self.num_anchors)] for _ in range(self.num_levels)] - - anchors = [Boxes.cat(anchors_i) for anchors_i in anchors] - unit_lengths = [cat(unit_lengths_i) for unit_lengths_i in unit_lengths] - indexes = [cat(indexes_i) for indexes_i in indexes] - - num_fg = 0 - for i, (anchors_im, unit_lengths_im, indexes_im, targets_im) in enumerate( - zip(anchors, unit_lengths, indexes, targets) - ): - # Initialize all - gt_classes_i = torch.full_like( - unit_lengths_im, self.num_classes, dtype=torch.int64, device=self.device - ) - # Ground truth classes - has_gt = len(targets_im) > 0 - if has_gt: - # Compute the pairwise matrix - gt_matched_inds, anchor_labels = _assignment_rule( - targets_im.gt_boxes, anchors_im, unit_lengths_im, self.min_anchor_size - ) - # Find the foreground instances - fg_inds = anchor_labels == 1 - fg_anchors = anchors_im[fg_inds] - num_fg += len(fg_anchors) - # Find the ground truths for foreground instances - gt_fg_matched_inds = gt_matched_inds[fg_inds] - # Assign labels for foreground instances - gt_classes_i[fg_inds] = targets_im.gt_classes[gt_fg_matched_inds] - # Anchors with label -1 are ignored, others are left as negative - gt_classes_i[anchor_labels == -1] = -1 - - # Boxes - # Ground truth box regression, only for foregrounds - matched_gt_boxes = targets_im[gt_fg_matched_inds].gt_boxes - # Compute box regression offsets for foregrounds only - gt_deltas_i = self.box2box_transform.get_deltas( - fg_anchors.tensor, matched_gt_boxes.tensor - ) - gt_deltas.append(gt_deltas_i) - - # Masks - if self.mask_on: - # Compute masks for each level and each anchor - matched_indexes = indexes_im[fg_inds, :] - for lvl in range(self.num_levels): - ids_lvl = matched_indexes[:, 0] == lvl - if torch.any(ids_lvl): - cur_level_factor = 2 ** lvl if self.bipyramid_on else 1 - for anc in range(self.num_anchors): - ids_lvl_anchor = ids_lvl & (matched_indexes[:, 4] == anc) - if torch.any(ids_lvl_anchor): - gt_masks[lvl][anc].append( - targets_im[ - gt_fg_matched_inds[ids_lvl_anchor] - ].gt_masks.crop_and_resize( - fg_anchors[ids_lvl_anchor].tensor, - self.mask_sizes[anc] * cur_level_factor, - ) - ) - # Select (N, H, W) dimensions - gt_mask_inds_lvl_anc = matched_indexes[ids_lvl_anchor, 1:4] - # Set the image index to the current image - gt_mask_inds_lvl_anc[:, 0] = i - gt_mask_inds[lvl][anc].append(gt_mask_inds_lvl_anc) - gt_classes.append(gt_classes_i) - - # Classes and boxes - gt_classes = cat(gt_classes) - gt_valid_inds = gt_classes >= 0 - gt_fg_inds = gt_valid_inds & (gt_classes < self.num_classes) - gt_classes_target = torch.zeros( - (gt_classes.shape[0], self.num_classes), dtype=torch.float32, device=self.device - ) - gt_classes_target[gt_fg_inds, gt_classes[gt_fg_inds]] = 1 - gt_deltas = cat(gt_deltas) if gt_deltas else None - - # Masks - gt_masks = [[cat(mla) if mla else None for mla in ml] for ml in gt_masks] - gt_mask_inds = [[cat(ila) if ila else None for ila in il] for il in gt_mask_inds] - return ( - (gt_classes_target, gt_valid_inds), - (gt_deltas, gt_fg_inds), - (gt_masks, gt_mask_inds), - num_fg, - ) - - def inference(self, pred_logits, pred_deltas, pred_masks, anchors, indexes, images): - """ - Arguments: - pred_logits, pred_deltas, pred_masks: Same as the output of: - meth:`TensorMaskHead.forward` - anchors, indexes: Same as the input of meth:`TensorMask.get_ground_truth` - images (ImageList): the input images - - Returns: - results (List[Instances]): a list of #images elements. - """ - assert len(anchors) == len(images) - results = [] - - pred_logits = [permute_to_N_HWA_K(x, self.num_classes) for x in pred_logits] - pred_deltas = [permute_to_N_HWA_K(x, 4) for x in pred_deltas] - - pred_logits = cat(pred_logits, dim=1) - pred_deltas = cat(pred_deltas, dim=1) - - for img_idx, (anchors_im, indexes_im) in enumerate(zip(anchors, indexes)): - # Get the size of the current image - image_size = images.image_sizes[img_idx] - - logits_im = pred_logits[img_idx] - deltas_im = pred_deltas[img_idx] - - if self.mask_on: - masks_im = [[mla[img_idx] for mla in ml] for ml in pred_masks] - else: - masks_im = [None] * self.num_levels - results_im = self.inference_single_image( - logits_im, - deltas_im, - masks_im, - Boxes.cat(anchors_im), - cat(indexes_im), - tuple(image_size), - ) - results.append(results_im) - return results - - def inference_single_image( - self, pred_logits, pred_deltas, pred_masks, anchors, indexes, image_size - ): - """ - Single-image inference. Return bounding-box detection results by thresholding - on scores and applying non-maximum suppression (NMS). - - Arguments: - pred_logits (list[Tensor]): list of #feature levels. Each entry contains - tensor of size (AxHxW, K) - pred_deltas (list[Tensor]): Same shape as 'pred_logits' except that K becomes 4. - pred_masks (list[list[Tensor]]): List of #feature levels, each is a list of #anchors. - Each entry contains tensor of size (M_i*M_i, H, W). `None` if mask_on=False. - anchors (list[Boxes]): list of #feature levels. Each entry contains - a Boxes object, which contains all the anchors for that - image in that feature level. - image_size (tuple(H, W)): a tuple of the image height and width. - - Returns: - Same as `inference`, but for only one image. - """ - pred_logits = pred_logits.flatten().sigmoid_() - # We get top locations across all levels to accelerate the inference speed, - # which does not seem to affect the accuracy. - # First select values above the threshold - logits_top_idxs = torch.where(pred_logits > self.score_threshold)[0] - # Then get the top values - num_topk = min(self.topk_candidates, logits_top_idxs.shape[0]) - pred_prob, topk_idxs = pred_logits[logits_top_idxs].sort(descending=True) - # Keep top k scoring values - pred_prob = pred_prob[:num_topk] - # Keep top k values - top_idxs = logits_top_idxs[topk_idxs[:num_topk]] - - # class index - cls_idxs = top_idxs % self.num_classes - # HWA index - top_idxs //= self.num_classes - # predict boxes - pred_boxes = self.box2box_transform.apply_deltas( - pred_deltas[top_idxs], anchors[top_idxs].tensor - ) - # apply nms - keep = batched_nms(pred_boxes, pred_prob, cls_idxs, self.nms_threshold) - # pick the top ones - keep = keep[: self.detections_im] - - results = Instances(image_size) - results.pred_boxes = Boxes(pred_boxes[keep]) - results.scores = pred_prob[keep] - results.pred_classes = cls_idxs[keep] - - # deal with masks - result_masks, result_anchors = [], None - if self.mask_on: - # index and anchors, useful for masks - top_indexes = indexes[top_idxs] - top_anchors = anchors[top_idxs] - result_indexes = top_indexes[keep] - result_anchors = top_anchors[keep] - # Get masks and do sigmoid - for lvl, _, h, w, anc in result_indexes.tolist(): - cur_size = self.mask_sizes[anc] * (2 ** lvl if self.bipyramid_on else 1) - result_masks.append( - torch.sigmoid(pred_masks[lvl][anc][:, h, w].view(1, cur_size, cur_size)) - ) - - return results, (result_masks, result_anchors) - - def preprocess_image(self, batched_inputs): - """ - Normalize, pad and batch the input images. - """ - images = [x["image"].to(self.device) for x in batched_inputs] - images = [(x - self.pixel_mean) / self.pixel_std for x in images] - images = ImageList.from_tensors(images, self.backbone.size_divisibility) - return images - - -class TensorMaskHead(nn.Module): - def __init__(self, cfg, num_levels, num_anchors, mask_sizes, input_shape: List[ShapeSpec]): - """ - TensorMask head. - """ - super().__init__() - # fmt: off - self.in_features = cfg.MODEL.TENSOR_MASK.IN_FEATURES - in_channels = input_shape[0].channels - num_classes = cfg.MODEL.TENSOR_MASK.NUM_CLASSES - cls_channels = cfg.MODEL.TENSOR_MASK.CLS_CHANNELS - num_convs = cfg.MODEL.TENSOR_MASK.NUM_CONVS - # box parameters - bbox_channels = cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS - # mask parameters - self.mask_on = cfg.MODEL.MASK_ON - self.mask_sizes = mask_sizes - mask_channels = cfg.MODEL.TENSOR_MASK.MASK_CHANNELS - self.align_on = cfg.MODEL.TENSOR_MASK.ALIGNED_ON - self.bipyramid_on = cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON - # fmt: on - - # class subnet - cls_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - cls_subnet.append( - nn.Conv2d(cur_channels, cls_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = cls_channels - cls_subnet.append(nn.ReLU()) - - self.cls_subnet = nn.Sequential(*cls_subnet) - self.cls_score = nn.Conv2d( - cur_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1 - ) - modules_list = [self.cls_subnet, self.cls_score] - - # box subnet - bbox_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - bbox_subnet.append( - nn.Conv2d(cur_channels, bbox_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = bbox_channels - bbox_subnet.append(nn.ReLU()) - - self.bbox_subnet = nn.Sequential(*bbox_subnet) - self.bbox_pred = nn.Conv2d( - cur_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1 - ) - modules_list.extend([self.bbox_subnet, self.bbox_pred]) - - # mask subnet - if self.mask_on: - mask_subnet = [] - cur_channels = in_channels - for _ in range(num_convs): - mask_subnet.append( - nn.Conv2d(cur_channels, mask_channels, kernel_size=3, stride=1, padding=1) - ) - cur_channels = mask_channels - mask_subnet.append(nn.ReLU()) - - self.mask_subnet = nn.Sequential(*mask_subnet) - modules_list.append(self.mask_subnet) - for mask_size in self.mask_sizes: - cur_mask_module = "mask_pred_%02d" % mask_size - self.add_module( - cur_mask_module, - nn.Conv2d( - cur_channels, mask_size * mask_size, kernel_size=1, stride=1, padding=0 - ), - ) - modules_list.append(getattr(self, cur_mask_module)) - if self.align_on: - if self.bipyramid_on: - for lvl in range(num_levels): - cur_mask_module = "align2nat_%02d" % lvl - lambda_val = 2 ** lvl - setattr(self, cur_mask_module, SwapAlign2Nat(lambda_val)) - # Also the fusing layer, stay at the same channel size - mask_fuse = [ - nn.Conv2d(cur_channels, cur_channels, kernel_size=3, stride=1, padding=1), - nn.ReLU(), - ] - self.mask_fuse = nn.Sequential(*mask_fuse) - modules_list.append(self.mask_fuse) - else: - self.align2nat = SwapAlign2Nat(1) - - # Initialization - for modules in modules_list: - for layer in modules.modules(): - if isinstance(layer, nn.Conv2d): - torch.nn.init.normal_(layer.weight, mean=0, std=0.01) - torch.nn.init.constant_(layer.bias, 0) - - # Use prior in model initialization to improve stability - bias_value = -(math.log((1 - 0.01) / 0.01)) - torch.nn.init.constant_(self.cls_score.bias, bias_value) - - def forward(self, features): - """ - Arguments: - features (list[Tensor]): FPN feature map tensors in high to low resolution. - Each tensor in the list correspond to different feature levels. - - Returns: - pred_logits (list[Tensor]): #lvl tensors, each has shape (N, AxK, Hi, Wi). - The tensor predicts the classification probability - at each spatial position for each of the A anchors and K object - classes. - pred_deltas (list[Tensor]): #lvl tensors, each has shape (N, Ax4, Hi, Wi). - The tensor predicts 4-vector (dx,dy,dw,dh) box - regression values for every anchor. These values are the - relative offset between the anchor and the ground truth box. - pred_masks (list(list[Tensor])): #lvl list of tensors, each is a list of - A tensors of shape (N, M_{i,a}, Hi, Wi). - The tensor predicts a dense set of M_ixM_i masks at every location. - """ - pred_logits = [self.cls_score(self.cls_subnet(x)) for x in features] - pred_deltas = [self.bbox_pred(self.bbox_subnet(x)) for x in features] - - pred_masks = None - if self.mask_on: - mask_feats = [self.mask_subnet(x) for x in features] - - if self.bipyramid_on: - mask_feat_high_res = mask_feats[0] - H, W = mask_feat_high_res.shape[-2:] - mask_feats_up = [] - for lvl, mask_feat in enumerate(mask_feats): - lambda_val = 2.0 ** lvl - mask_feat_up = mask_feat - if lvl > 0: - mask_feat_up = F.interpolate( - mask_feat, scale_factor=lambda_val, mode="bilinear", align_corners=False - ) - mask_feats_up.append( - self.mask_fuse(mask_feat_up[:, :, :H, :W] + mask_feat_high_res) - ) - mask_feats = mask_feats_up - - pred_masks = [] - for lvl, mask_feat in enumerate(mask_feats): - cur_masks = [] - for mask_size in self.mask_sizes: - cur_mask_module = getattr(self, "mask_pred_%02d" % mask_size) - cur_mask = cur_mask_module(mask_feat) - if self.align_on: - if self.bipyramid_on: - cur_mask_module = getattr(self, "align2nat_%02d" % lvl) - cur_mask = cur_mask_module(cur_mask) - else: - cur_mask = self.align2nat(cur_mask) - cur_masks.append(cur_mask) - pred_masks.append(cur_masks) - return pred_logits, pred_deltas, pred_masks diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py deleted file mode 100644 index 44479f2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/config.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from detectron2.config import CfgNode as CN - - -def add_tensormask_config(cfg): - """ - Add config for TensorMask. - """ - cfg.MODEL.TENSOR_MASK = CN() - - # Anchor parameters - cfg.MODEL.TENSOR_MASK.IN_FEATURES = ["p2", "p3", "p4", "p5", "p6", "p7"] - - # Convolutions to use in the towers - cfg.MODEL.TENSOR_MASK.NUM_CONVS = 4 - - # Number of foreground classes. - cfg.MODEL.TENSOR_MASK.NUM_CLASSES = 80 - # Channel size for the classification tower - cfg.MODEL.TENSOR_MASK.CLS_CHANNELS = 256 - - cfg.MODEL.TENSOR_MASK.SCORE_THRESH_TEST = 0.05 - # Only the top (1000 * #levels) candidate boxes across all levels are - # considered jointly during test (to improve speed) - cfg.MODEL.TENSOR_MASK.TOPK_CANDIDATES_TEST = 6000 - cfg.MODEL.TENSOR_MASK.NMS_THRESH_TEST = 0.5 - - # Box parameters - # Channel size for the box tower - cfg.MODEL.TENSOR_MASK.BBOX_CHANNELS = 128 - # Weights on (dx, dy, dw, dh) - cfg.MODEL.TENSOR_MASK.BBOX_REG_WEIGHTS = (1.5, 1.5, 0.75, 0.75) - - # Loss parameters - cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_GAMMA = 3.0 - cfg.MODEL.TENSOR_MASK.FOCAL_LOSS_ALPHA = 0.3 - - # Mask parameters - # Channel size for the mask tower - cfg.MODEL.TENSOR_MASK.MASK_CHANNELS = 128 - # Mask loss weight - cfg.MODEL.TENSOR_MASK.MASK_LOSS_WEIGHT = 2.0 - # weight on positive pixels within the mask - cfg.MODEL.TENSOR_MASK.POSITIVE_WEIGHT = 1.5 - # Whether to predict in the aligned representation - cfg.MODEL.TENSOR_MASK.ALIGNED_ON = False - # Whether to use the bipyramid architecture - cfg.MODEL.TENSOR_MASK.BIPYRAMID_ON = False diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py deleted file mode 100644 index cbbac42..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .swap_align2nat import SwapAlign2Nat, swap_align2nat - -__all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h deleted file mode 100644 index 2ec0373..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#pragma once -#include - -namespace tensormask { - -#ifdef WITH_CUDA -at::Tensor SwapAlign2Nat_forward_cuda( - const at::Tensor& X, - const int lambda_val, - const float pad_val); - -at::Tensor SwapAlign2Nat_backward_cuda( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width); -#endif - -inline at::Tensor SwapAlign2Nat_forward( - const at::Tensor& X, - const int lambda_val, - const float pad_val) { - if (X.type().is_cuda()) { -#ifdef WITH_CUDA - return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -inline at::Tensor SwapAlign2Nat_backward( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width) { - if (gY.type().is_cuda()) { -#ifdef WITH_CUDA - return SwapAlign2Nat_backward_cuda( - gY, lambda_val, batch_size, channel, height, width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - AT_ERROR("Not implemented on the CPU"); -} - -} // namespace tensormask diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu deleted file mode 100644 index 06de4a4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/SwapAlign2Nat/SwapAlign2Nat_cuda.cu +++ /dev/null @@ -1,526 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#include -#include -#include -#include - -// TODO make it in a common file -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - -template -__device__ inline T get_pixel_val( - const T* tensor, - const int idx, - const int H, - const int W, - const int y, - const int x, - const int V, - const int U, - const int v, - const int u, - const T pad_val) { - if ((y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) || (v >= V) || - (u < 0) || (u >= U)) { - return pad_val; - } else { - return tensor[(((idx * V + v) * U + u) * H + y) * W + x]; - } -} - -template -__device__ inline void add_pixel_val( - T* tensor, - const T val, - const int idx, - const int H, - const int W, - const int y, - const int x, - const int V, - const int U, - const int v, - const int u) { - if ((val == 0.) || (y < 0) || (y >= H) || (x < 0) || (x >= W) || (v < 0) || - (v >= V) || (u < 0) || (u >= U)) { - return; - } else { - atomicAdd(tensor + ((((idx * V + v) * U + u) * H + y) * W + x), val); - } -} - -template -__global__ void SwapAlign2NatForwardFeat( - const int nthreads, - const T* bottom_data, - const int Vout, - const int Uout, - const float hVout, - const float hUout, - const int Vin, - const int Uin, - const float lambda, - const int Hin, - const int Win, - const int Hout, - const int Wout, - const T pad_val, - T* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int idx = index; - const int x = idx % Wout; - idx /= Wout; - const int y = idx % Hout; - idx /= Hout; - const int u = idx % Uout; - idx /= Uout; - const int v = idx % Vout; - idx /= Vout; - - const float ox = x * lambda + u - hUout + 0.5; - const int xf = static_cast(floor(ox)); - const int xc = static_cast(ceil(ox)); - const float xwc = ox - xf; - const float xwf = 1. - xwc; - - const float oy = y * lambda + v - hVout + 0.5; - const int yf = static_cast(floor(oy)); - const int yc = static_cast(ceil(oy)); - const float ywc = oy - yf; - const float ywf = 1. - ywc; - - const float ou = (u + 0.5) / lambda - 0.5; - const int uf = static_cast(floor(ou)); - const int uc = static_cast(ceil(ou)); - const float uwc = ou - uf; - const float uwf = 1. - uwc; - - const float ov = (v + 0.5) / lambda - 0.5; - const int vf = static_cast(floor(ov)); - const int vc = static_cast(ceil(ov)); - const float vwc = ov - vf; - const float vwf = 1. - vwc; - - T val = ywf * xwf * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uf, pad_val) + - ywf * xwf * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vf, uc, pad_val) + - ywf * xwf * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uf, pad_val) + - ywf * xwf * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xf, Vin, Uin, vc, uc, pad_val) + - ywf * xwc * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uf, pad_val) + - ywf * xwc * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vf, uc, pad_val) + - ywf * xwc * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uf, pad_val) + - ywf * xwc * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yf, xc, Vin, Uin, vc, uc, pad_val) + - ywc * xwf * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uf, pad_val) + - ywc * xwf * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vf, uc, pad_val) + - ywc * xwf * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uf, pad_val) + - ywc * xwf * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xf, Vin, Uin, vc, uc, pad_val) + - ywc * xwc * vwf * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uf, pad_val) + - ywc * xwc * vwf * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vf, uc, pad_val) + - ywc * xwc * vwc * uwf * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uf, pad_val) + - ywc * xwc * vwc * uwc * - get_pixel_val( - bottom_data, idx, Hin, Win, yc, xc, Vin, Uin, vc, uc, pad_val); - - top_data[index] = val; - } -} - -template -__global__ void SwapAlign2NatBackwardFeat( - const int nthreads, - const T* top_diff, - const int Vout, - const int Uout, - const float hVout, - const float hUout, - const int Vin, - const int Uin, - const float lambda, - const int Hin, - const int Win, - const int Hout, - const int Wout, - T* bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int idx = index; - const int x = idx % Wout; - idx /= Wout; - const int y = idx % Hout; - idx /= Hout; - const int u = idx % Uout; - idx /= Uout; - const int v = idx % Vout; - idx /= Vout; - - const float ox = x * lambda + u - hUout + 0.5; - const int xf = static_cast(floor(ox)); - const int xc = static_cast(ceil(ox)); - const float xwc = ox - xf; - const float xwf = 1. - xwc; - - const float oy = y * lambda + v - hVout + 0.5; - const int yf = static_cast(floor(oy)); - const int yc = static_cast(ceil(oy)); - const float ywc = oy - yf; - const float ywf = 1. - ywc; - - const float ou = (u + 0.5) / lambda - 0.5; - const int uf = static_cast(floor(ou)); - const int uc = static_cast(ceil(ou)); - const float uwc = ou - uf; - const float uwf = 1. - uwc; - - const float ov = (v + 0.5) / lambda - 0.5; - const int vf = static_cast(floor(ov)); - const int vc = static_cast(ceil(ov)); - const float vwc = ov - vf; - const float vwf = 1. - vwc; - - const T grad = top_diff[index]; - - add_pixel_val( - bottom_diff, - ywf * xwf * vwf * uwf * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywf * xwf * vwf * uwc * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywf * xwf * vwc * uwf * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywf * xwf * vwc * uwc * grad, - idx, - Hin, - Win, - yf, - xf, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywf * xwc * vwf * uwf * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywf * xwc * vwf * uwc * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywf * xwc * vwc * uwf * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywf * xwc * vwc * uwc * grad, - idx, - Hin, - Win, - yf, - xc, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywc * xwf * vwf * uwf * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywc * xwf * vwf * uwc * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywc * xwf * vwc * uwf * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywc * xwf * vwc * uwc * grad, - idx, - Hin, - Win, - yc, - xf, - Vin, - Uin, - vc, - uc); - add_pixel_val( - bottom_diff, - ywc * xwc * vwf * uwf * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vf, - uf); - add_pixel_val( - bottom_diff, - ywc * xwc * vwf * uwc * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vf, - uc); - add_pixel_val( - bottom_diff, - ywc * xwc * vwc * uwf * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vc, - uf); - add_pixel_val( - bottom_diff, - ywc * xwc * vwc * uwc * grad, - idx, - Hin, - Win, - yc, - xc, - Vin, - Uin, - vc, - uc); - } -} - -namespace tensormask { - -at::Tensor SwapAlign2Nat_forward_cuda( - const at::Tensor& X, - const int lambda_val, - const float pad_val) { - AT_ASSERTM(X.device().is_cuda(), "input must be a CUDA tensor"); - AT_ASSERTM(X.ndimension() == 4, "input must be a 4D tensor"); - AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1"); - const int N = X.size(0); - const int C = X.size(1); - const int Vin = static_cast(sqrt(static_cast(C))); - const int Uin = C / Vin; - AT_ASSERTM( - C == Vin * Uin && Vin == Uin, "#channels should be a square number"); - const int Vout = lambda_val * Vin; - const int Uout = lambda_val * Uin; - const int Hin = X.size(2); - const int Win = X.size(3); - const float lambda = static_cast(lambda_val); - const int Hout = static_cast(ceil(Hin / lambda)); - const int Wout = static_cast(ceil(Win / lambda)); - const float hVout = Vout / 2.; - const float hUout = Uout / 2.; - - at::cuda::CUDAGuard device_guard(X.device()); - - at::Tensor Y = at::empty({N, Vout * Uout, Hout, Wout}, X.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(Y.numel(), 512L), 4096L)); - dim3 block(512); - - if (Y.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return Y; - } - - auto X_ = X.contiguous(); - AT_DISPATCH_FLOATING_TYPES(X.scalar_type(), "SwapAlign2Nat_forward", [&] { - SwapAlign2NatForwardFeat<<>>( - Y.numel(), - X_.data_ptr(), - Vout, - Uout, - hVout, - hUout, - Vin, - Uin, - lambda, - Hin, - Win, - Hout, - Wout, - pad_val, - Y.data_ptr()); - }); - cudaDeviceSynchronize(); - AT_CUDA_CHECK(cudaGetLastError()); - return Y; -} - -at::Tensor SwapAlign2Nat_backward_cuda( - const at::Tensor& gY, - const int lambda_val, - const int batch_size, - const int channel, - const int height, - const int width) { - AT_ASSERTM(gY.device().is_cuda(), "input gradient must be a CUDA tensor"); - AT_ASSERTM(gY.ndimension() == 4, "input gradient must be a 4D tensor"); - AT_ASSERTM(lambda_val >= 1, "lambda should be greater or equal to 1"); - const int Vin = static_cast(sqrt(static_cast(channel))); - const int Uin = channel / Vin; - const int Vout = lambda_val * Vin; - const int Uout = lambda_val * Uin; - const float hVout = Vout / 2.; - const float hUout = Uout / 2.; - const int Hout = gY.size(2); - const int Wout = gY.size(3); - - at::cuda::CUDAGuard device_guard(gY.device()); - - at::Tensor gX = at::zeros({batch_size, channel, height, width}, gY.options()); - - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - - dim3 grid(std::min(at::cuda::ATenCeilDiv(gY.numel(), 512L), 4096L)); - dim3 block(512); - - // handle possibly empty gradients - if (gY.numel() == 0) { - AT_CUDA_CHECK(cudaGetLastError()); - return gX; - } - - auto gY_ = gY.contiguous(); - AT_DISPATCH_FLOATING_TYPES(gY.scalar_type(), "SwapAlign2Nat_backward", [&] { - SwapAlign2NatBackwardFeat<<>>( - gY.numel(), - gY_.data_ptr(), - Vout, - Uout, - hVout, - hUout, - Vin, - Uin, - static_cast(lambda_val), - height, - width, - Hout, - Wout, - gX.data_ptr()); - }); - AT_CUDA_CHECK(cudaGetLastError()); - return gX; -} - -} // namespace tensormask diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp deleted file mode 100644 index ad8e472..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/csrc/vision.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -#include -#include "SwapAlign2Nat/SwapAlign2Nat.h" - -namespace tensormask { - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def( - "swap_align2nat_forward", - &SwapAlign2Nat_forward, - "SwapAlign2Nat_forward"); - m.def( - "swap_align2nat_backward", - &SwapAlign2Nat_backward, - "SwapAlign2Nat_backward"); -} - -} // namespace tensormask diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py deleted file mode 100644 index a72c98a..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tensormask/layers/swap_align2nat.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from torch import nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from tensormask import _C - - -class _SwapAlign2Nat(Function): - @staticmethod - def forward(ctx, X, lambda_val, pad_val): - ctx.lambda_val = lambda_val - ctx.input_shape = X.size() - - Y = _C.swap_align2nat_forward(X, lambda_val, pad_val) - return Y - - @staticmethod - @once_differentiable - def backward(ctx, gY): - lambda_val = ctx.lambda_val - bs, ch, h, w = ctx.input_shape - - gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w) - - return gX, None, None - - -swap_align2nat = _SwapAlign2Nat.apply - - -class SwapAlign2Nat(nn.Module): - """ - The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174. - Given an input tensor that predicts masks of shape (N, C=VxU, H, W), - apply the op, it will return masks of shape (N, V'xU', H', W') where - the unit lengths of (V, U) and (H, W) are swapped, and the mask representation - is transformed from aligned to natural. - Args: - lambda_val (int): the relative unit length ratio between (V, U) and (H, W), - as we always have larger unit lengths for (V, U) than (H, W), - lambda_val is always >= 1. - pad_val (float): padding value for the values falling outside of the input - tensor, default set to -6 as sigmoid(-6) is ~0, indicating - that is no masks outside of the tensor. - """ - - def __init__(self, lambda_val, pad_val=-6.0): - super(SwapAlign2Nat, self).__init__() - self.lambda_val = lambda_val - self.pad_val = pad_val - - def forward(self, X): - return swap_align2nat(X, self.lambda_val, self.pad_val) - - def __repr__(self): - tmpstr = self.__class__.__name__ + "(" - tmpstr += "lambda_val=" + str(self.lambda_val) - tmpstr += ", pad_val=" + str(self.pad_val) - tmpstr += ")" - return tmpstr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py deleted file mode 100644 index 168f997..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py deleted file mode 100644 index b3d018c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/tests/test_swap_align2nat.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import unittest -import torch -from torch.autograd import gradcheck - -from tensormask.layers.swap_align2nat import SwapAlign2Nat - - -class SwapAlign2NatTest(unittest.TestCase): - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_swap_align2nat_gradcheck_cuda(self): - dtype = torch.float64 - device = torch.device("cuda") - m = SwapAlign2Nat(2).to(dtype=dtype, device=device) - x = torch.rand(2, 4, 10, 10, dtype=dtype, device=device, requires_grad=True) - - self.assertTrue(gradcheck(m, x), "gradcheck failed for SwapAlign2Nat CUDA") - - def _swap_align2nat(self, tensor, lambda_val): - """ - The basic setup for testing Swap_Align - """ - op = SwapAlign2Nat(lambda_val, pad_val=0.0) - input = torch.from_numpy(tensor[None, :, :, :].astype("float32")) - output = op.forward(input.cuda()).cpu().numpy() - return output[0] - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py deleted file mode 100644 index b898fc7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TensorMask/train_net.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -TensorMask Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import COCOEvaluator, verify_results - -from tensormask import add_tensormask_config - - -class Trainer(DefaultTrainer): - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - return COCOEvaluator(dataset_name, cfg, True, output_folder) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_tensormask_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md deleted file mode 100644 index 4b7a901..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/README.md +++ /dev/null @@ -1,60 +0,0 @@ - -# TridentNet in Detectron2 -**Scale-Aware Trident Networks for Object Detection** - -Yanghao Li\*, Yuntao Chen\*, Naiyan Wang, Zhaoxiang Zhang - -[[`TridentNet`](https://github.com/TuSimple/simpledet/tree/master/models/tridentnet)] [[`arXiv`](https://arxiv.org/abs/1901.01892)] [[`BibTeX`](#CitingTridentNet)] - -
- -
- -In this repository, we implement TridentNet-Fast in Detectron2. -Trident Network (TridentNet) aims to generate scale-specific feature maps with a uniform representational power. We construct a parallel multi-branch architecture in which each branch shares the same transformation parameters but with different receptive fields. TridentNet-Fast is a fast approximation version of TridentNet that could achieve significant improvements without any additional parameters and computational cost. - -## Training - -To train a model, run -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file -``` - -For example, to launch end-to-end TridentNet training with ResNet-50 backbone on 8 GPUs, -one should execute: -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --num-gpus 8 -``` - -## Evaluation - -Model evaluation can be done similarly: -```bash -python /path/to/detectron2/projects/TridentNet/train_net.py --config-file configs/tridentnet_fast_R_50_C4_1x.yaml --eval-only MODEL.WEIGHTS model.pth -``` - -## Results on MS-COCO in Detectron2 - -|Model|Backbone|Head|lr sched|AP|AP50|AP75|APs|APm|APl|download| -|-----|--------|----|--------|--|----|----|---|---|---|--------| -|Faster|R50-C4|C5-512ROI|1X|35.7|56.1|38.0|19.2|40.9|48.7|model \| metrics| -|TridentFast|R50-C4|C5-128ROI|1X|38.0|58.1|40.8|19.5|42.2|54.6|model \| metrics| -|Faster|R50-C4|C5-512ROI|3X|38.4|58.7|41.3|20.7|42.7|53.1|model \| metrics| -|TridentFast|R50-C4|C5-128ROI|3X|40.6|60.8|43.6|23.4|44.7|57.1|model \| metrics| -|Faster|R101-C4|C5-512ROI|3X|41.1|61.4|44.0|22.2|45.5|55.9|model \| metrics| -|TridentFast|R101-C4|C5-128ROI|3X|43.6|63.4|47.0|24.3|47.8|60.0|model \| metrics| - - -## Citing TridentNet - -If you use TridentNet, please use the following BibTeX entry. - -``` -@InProceedings{li2019scale, - title={Scale-Aware Trident Networks for Object Detection}, - author={Li, Yanghao and Chen, Yuntao and Wang, Naiyan and Zhang, Zhaoxiang}, - journal={The International Conference on Computer Vision (ICCV)}, - year={2019} -} -``` - diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml deleted file mode 100644 index 8c3d807..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/Base-TridentNet-Fast-C4.yaml +++ /dev/null @@ -1,29 +0,0 @@ -MODEL: - META_ARCHITECTURE: "GeneralizedRCNN" - BACKBONE: - NAME: "build_trident_resnet_backbone" - ROI_HEADS: - NAME: "TridentRes5ROIHeads" - POSITIVE_FRACTION: 0.5 - BATCH_SIZE_PER_IMAGE: 128 - PROPOSAL_APPEND_GT: False - PROPOSAL_GENERATOR: - NAME: "TridentRPN" - RPN: - POST_NMS_TOPK_TRAIN: 500 - TRIDENT: - NUM_BRANCH: 3 - BRANCH_DILATIONS: [1, 2, 3] - TEST_BRANCH_IDX: 1 - TRIDENT_STAGE: "res4" -DATASETS: - TRAIN: ("coco_2017_train",) - TEST: ("coco_2017_val",) -SOLVER: - IMS_PER_BATCH: 16 - BASE_LR: 0.02 - STEPS: (60000, 80000) - MAX_ITER: 90000 -INPUT: - MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) -VERSION: 2 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml deleted file mode 100644 index bc83c2f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_101_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" - MASK_ON: False - RESNETS: - DEPTH: 101 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml deleted file mode 100644 index fda2cb6..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_1x.yaml +++ /dev/null @@ -1,6 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml deleted file mode 100644 index ebf89d0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/configs/tridentnet_fast_R_50_C4_3x.yaml +++ /dev/null @@ -1,9 +0,0 @@ -_BASE_: "Base-TridentNet-Fast-C4.yaml" -MODEL: - WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" - MASK_ON: False - RESNETS: - DEPTH: 50 -SOLVER: - STEPS: (210000, 250000) - MAX_ITER: 270000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py deleted file mode 100644 index eac2ec5..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/train_net.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -""" -TridentNet Training Script. - -This script is a simplified version of the training script in detectron2/tools. -""" - -import os - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch -from detectron2.evaluation import COCOEvaluator - -from tridentnet import add_tridentnet_config - - -class Trainer(DefaultTrainer): - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - return COCOEvaluator(dataset_name, cfg, True, output_folder) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - add_tridentnet_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - return res - - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py deleted file mode 100644 index 2fcdeb4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .config import add_tridentnet_config -from .trident_backbone import ( - TridentBottleneckBlock, - build_trident_resnet_backbone, - make_trident_stage, -) -from .trident_rpn import TridentRPN -from .trident_rcnn import TridentRes5ROIHeads, TridentStandardROIHeads diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py deleted file mode 100644 index f33f473..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/config.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from detectron2.config import CfgNode as CN - - -def add_tridentnet_config(cfg): - """ - Add config for tridentnet. - """ - _C = cfg - - _C.MODEL.TRIDENT = CN() - - # Number of branches for TridentNet. - _C.MODEL.TRIDENT.NUM_BRANCH = 3 - # Specify the dilations for each branch. - _C.MODEL.TRIDENT.BRANCH_DILATIONS = [1, 2, 3] - # Specify the stage for applying trident blocks. Default stage is Res4 according to the - # TridentNet paper. - _C.MODEL.TRIDENT.TRIDENT_STAGE = "res4" - # Specify the test branch index TridentNet Fast inference: - # - use -1 to aggregate results of all branches during inference. - # - otherwise, only using specified branch for fast inference. Recommended setting is - # to use the middle branch. - _C.MODEL.TRIDENT.TEST_BRANCH_IDX = 1 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py deleted file mode 100644 index 232dfaf..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_backbone.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import fvcore.nn.weight_init as weight_init -import torch -import torch.nn.functional as F - -from detectron2.layers import Conv2d, FrozenBatchNorm2d, get_norm -from detectron2.modeling import BACKBONE_REGISTRY, ResNet, ResNetBlockBase, make_stage -from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock, DeformBottleneckBlock - -from .trident_conv import TridentConv - -__all__ = ["TridentBottleneckBlock", "make_trident_stage", "build_trident_resnet_backbone"] - - -class TridentBottleneckBlock(ResNetBlockBase): - def __init__( - self, - in_channels, - out_channels, - *, - bottleneck_channels, - stride=1, - num_groups=1, - norm="BN", - stride_in_1x1=False, - num_branch=3, - dilations=(1, 2, 3), - concat_output=False, - test_branch_idx=-1, - ): - """ - Args: - num_branch (int): the number of branches in TridentNet. - dilations (tuple): the dilations of multiple branches in TridentNet. - concat_output (bool): if concatenate outputs of multiple branches in TridentNet. - Use 'True' for the last trident block. - """ - super().__init__(in_channels, out_channels, stride) - - assert num_branch == len(dilations) - - self.num_branch = num_branch - self.concat_output = concat_output - self.test_branch_idx = test_branch_idx - - if in_channels != out_channels: - self.shortcut = Conv2d( - in_channels, - out_channels, - kernel_size=1, - stride=stride, - bias=False, - norm=get_norm(norm, out_channels), - ) - else: - self.shortcut = None - - stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) - - self.conv1 = Conv2d( - in_channels, - bottleneck_channels, - kernel_size=1, - stride=stride_1x1, - bias=False, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv2 = TridentConv( - bottleneck_channels, - bottleneck_channels, - kernel_size=3, - stride=stride_3x3, - paddings=dilations, - bias=False, - groups=num_groups, - dilations=dilations, - num_branch=num_branch, - test_branch_idx=test_branch_idx, - norm=get_norm(norm, bottleneck_channels), - ) - - self.conv3 = Conv2d( - bottleneck_channels, - out_channels, - kernel_size=1, - bias=False, - norm=get_norm(norm, out_channels), - ) - - for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: - if layer is not None: # shortcut can be None - weight_init.c2_msra_fill(layer) - - def forward(self, x): - num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1 - if not isinstance(x, list): - x = [x] * num_branch - out = [self.conv1(b) for b in x] - out = [F.relu_(b) for b in out] - - out = self.conv2(out) - out = [F.relu_(b) for b in out] - - out = [self.conv3(b) for b in out] - - if self.shortcut is not None: - shortcut = [self.shortcut(b) for b in x] - else: - shortcut = x - - out = [out_b + shortcut_b for out_b, shortcut_b in zip(out, shortcut)] - out = [F.relu_(b) for b in out] - if self.concat_output: - out = torch.cat(out) - return out - - -def make_trident_stage(block_class, num_blocks, first_stride, **kwargs): - """ - Create a resnet stage by creating many blocks for TridentNet. - """ - blocks = [] - for i in range(num_blocks - 1): - blocks.append(block_class(stride=first_stride if i == 0 else 1, **kwargs)) - kwargs["in_channels"] = kwargs["out_channels"] - blocks.append(block_class(stride=1, concat_output=True, **kwargs)) - return blocks - - -@BACKBONE_REGISTRY.register() -def build_trident_resnet_backbone(cfg, input_shape): - """ - Create a ResNet instance from config for TridentNet. - - Returns: - ResNet: a :class:`ResNet` instance. - """ - # need registration of new blocks/stems? - norm = cfg.MODEL.RESNETS.NORM - stem = BasicStem( - in_channels=input_shape.channels, - out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS, - norm=norm, - ) - freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT - - if freeze_at >= 1: - for p in stem.parameters(): - p.requires_grad = False - stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem) - - # fmt: off - out_features = cfg.MODEL.RESNETS.OUT_FEATURES - depth = cfg.MODEL.RESNETS.DEPTH - num_groups = cfg.MODEL.RESNETS.NUM_GROUPS - width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP - bottleneck_channels = num_groups * width_per_group - in_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS - out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS - stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 - res5_dilation = cfg.MODEL.RESNETS.RES5_DILATION - deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE - deform_modulated = cfg.MODEL.RESNETS.DEFORM_MODULATED - deform_num_groups = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS - num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - branch_dilations = cfg.MODEL.TRIDENT.BRANCH_DILATIONS - trident_stage = cfg.MODEL.TRIDENT.TRIDENT_STAGE - test_branch_idx = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX - # fmt: on - assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation) - - num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth] - - stages = [] - - res_stage_idx = {"res2": 2, "res3": 3, "res4": 4, "res5": 5} - out_stage_idx = [res_stage_idx[f] for f in out_features] - trident_stage_idx = res_stage_idx[trident_stage] - max_stage_idx = max(out_stage_idx) - for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)): - dilation = res5_dilation if stage_idx == 5 else 1 - first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2 - stage_kargs = { - "num_blocks": num_blocks_per_stage[idx], - "first_stride": first_stride, - "in_channels": in_channels, - "bottleneck_channels": bottleneck_channels, - "out_channels": out_channels, - "num_groups": num_groups, - "norm": norm, - "stride_in_1x1": stride_in_1x1, - "dilation": dilation, - } - if stage_idx == trident_stage_idx: - assert not deform_on_per_stage[ - idx - ], "Not support deformable conv in Trident blocks yet." - stage_kargs["block_class"] = TridentBottleneckBlock - stage_kargs["num_branch"] = num_branch - stage_kargs["dilations"] = branch_dilations - stage_kargs["test_branch_idx"] = test_branch_idx - stage_kargs.pop("dilation") - elif deform_on_per_stage[idx]: - stage_kargs["block_class"] = DeformBottleneckBlock - stage_kargs["deform_modulated"] = deform_modulated - stage_kargs["deform_num_groups"] = deform_num_groups - else: - stage_kargs["block_class"] = BottleneckBlock - blocks = ( - make_trident_stage(**stage_kargs) - if stage_idx == trident_stage_idx - else make_stage(**stage_kargs) - ) - in_channels = out_channels - out_channels *= 2 - bottleneck_channels *= 2 - - if freeze_at >= stage_idx: - for block in blocks: - block.freeze() - stages.append(blocks) - return ResNet(stem, stages, out_features=out_features) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py deleted file mode 100644 index 7e2d525..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_conv.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch -from torch import nn -from torch.nn import functional as F -from torch.nn.modules.utils import _pair - -from detectron2.layers.wrappers import _NewEmptyTensorOp - - -class TridentConv(nn.Module): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - paddings=0, - dilations=1, - groups=1, - num_branch=1, - test_branch_idx=-1, - bias=False, - norm=None, - activation=None, - ): - super(TridentConv, self).__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = _pair(kernel_size) - self.num_branch = num_branch - self.stride = _pair(stride) - self.groups = groups - self.with_bias = bias - if isinstance(paddings, int): - paddings = [paddings] * self.num_branch - if isinstance(dilations, int): - dilations = [dilations] * self.num_branch - self.paddings = [_pair(padding) for padding in paddings] - self.dilations = [_pair(dilation) for dilation in dilations] - self.test_branch_idx = test_branch_idx - self.norm = norm - self.activation = activation - - assert len({self.num_branch, len(self.paddings), len(self.dilations)}) == 1 - - self.weight = nn.Parameter( - torch.Tensor(out_channels, in_channels // groups, *self.kernel_size) - ) - if bias: - self.bias = nn.Parameter(torch.Tensor(out_channels)) - else: - self.bias = None - - nn.init.kaiming_uniform_(self.weight, nonlinearity="relu") - if self.bias is not None: - nn.init.constant_(self.bias, 0) - - def forward(self, inputs): - num_branch = self.num_branch if self.training or self.test_branch_idx == -1 else 1 - assert len(inputs) == num_branch - - if inputs[0].numel() == 0: - output_shape = [ - (i + 2 * p - (di * (k - 1) + 1)) // s + 1 - for i, p, di, k, s in zip( - inputs[0].shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride - ) - ] - output_shape = [input[0].shape[0], self.weight.shape[0]] + output_shape - return [_NewEmptyTensorOp.apply(input, output_shape) for input in inputs] - - if self.training or self.test_branch_idx == -1: - outputs = [ - F.conv2d(input, self.weight, self.bias, self.stride, padding, dilation, self.groups) - for input, dilation, padding in zip(inputs, self.dilations, self.paddings) - ] - else: - outputs = [ - F.conv2d( - inputs[0], - self.weight, - self.bias, - self.stride, - self.paddings[self.test_branch_idx], - self.dilations[self.test_branch_idx], - self.groups, - ) - ] - - if self.norm is not None: - outputs = [self.norm(x) for x in outputs] - if self.activation is not None: - outputs = [self.activation(x) for x in outputs] - return outputs - - def extra_repr(self): - tmpstr = "in_channels=" + str(self.in_channels) - tmpstr += ", out_channels=" + str(self.out_channels) - tmpstr += ", kernel_size=" + str(self.kernel_size) - tmpstr += ", num_branch=" + str(self.num_branch) - tmpstr += ", test_branch_idx=" + str(self.test_branch_idx) - tmpstr += ", stride=" + str(self.stride) - tmpstr += ", paddings=" + str(self.paddings) - tmpstr += ", dilations=" + str(self.dilations) - tmpstr += ", groups=" + str(self.groups) - tmpstr += ", bias=" + str(self.with_bias) - return tmpstr diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py deleted file mode 100644 index 65deb90..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rcnn.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from detectron2.layers import batched_nms -from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads -from detectron2.modeling.roi_heads.roi_heads import Res5ROIHeads -from detectron2.structures import Instances - - -def merge_branch_instances(instances, num_branch, nms_thresh, topk_per_image): - """ - Merge detection results from different branches of TridentNet. - Return detection results by applying non-maximum suppression (NMS) on bounding boxes - and keep the unsuppressed boxes and other instances (e.g mask) if any. - - Args: - instances (list[Instances]): A list of N * num_branch instances that store detection - results. Contain N images and each image has num_branch instances. - num_branch (int): Number of branches used for merging detection results for each image. - nms_thresh (float): The threshold to use for box non-maximum suppression. Value in [0, 1]. - topk_per_image (int): The number of top scoring detections to return. Set < 0 to return - all detections. - - Returns: - results: (list[Instances]): A list of N instances, one for each image in the batch, - that stores the topk most confidence detections after merging results from multiple - branches. - """ - if num_branch == 1: - return instances - - batch_size = len(instances) // num_branch - results = [] - for i in range(batch_size): - instance = Instances.cat([instances[i + batch_size * j] for j in range(num_branch)]) - - # Apply per-class NMS - keep = batched_nms( - instance.pred_boxes.tensor, instance.scores, instance.pred_classes, nms_thresh - ) - keep = keep[:topk_per_image] - result = instance[keep] - - results.append(result) - - return results - - -@ROI_HEADS_REGISTRY.register() -class TridentRes5ROIHeads(Res5ROIHeads): - """ - The TridentNet ROIHeads in a typical "C4" R-CNN model. - See :class:`Res5ROIHeads`. - """ - - def __init__(self, cfg, input_shape): - super().__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, proposals, targets=None): - """ - See :class:`Res5ROIHeads.forward`. - """ - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - all_targets = targets * num_branch if targets is not None else None - pred_instances, losses = super().forward(images, features, proposals, all_targets) - del images, all_targets, targets - - if self.training: - return pred_instances, losses - else: - pred_instances = merge_branch_instances( - pred_instances, - num_branch, - self.box_predictor.test_nms_thresh, - self.box_predictor.test_topk_per_image, - ) - - return pred_instances, {} - - -@ROI_HEADS_REGISTRY.register() -class TridentStandardROIHeads(StandardROIHeads): - """ - The `StandardROIHeads` for TridentNet. - See :class:`StandardROIHeads`. - """ - - def __init__(self, cfg, input_shape): - super(TridentStandardROIHeads, self).__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, proposals, targets=None): - """ - See :class:`Res5ROIHeads.forward`. - """ - # Use 1 branch if using trident_fast during inference. - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - # Duplicate targets for all branches in TridentNet. - all_targets = targets * num_branch if targets is not None else None - pred_instances, losses = super().forward(images, features, proposals, all_targets) - del images, all_targets, targets - - if self.training: - return pred_instances, losses - else: - pred_instances = merge_branch_instances( - pred_instances, - num_branch, - self.box_predictor.test_nms_thresh, - self.box_predictor.test_topk_per_image, - ) - - return pred_instances, {} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py deleted file mode 100644 index c30137f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/projects/TridentNet/tridentnet/trident_rpn.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import torch - -from detectron2.modeling import PROPOSAL_GENERATOR_REGISTRY -from detectron2.modeling.proposal_generator.rpn import RPN -from detectron2.structures import ImageList - - -@PROPOSAL_GENERATOR_REGISTRY.register() -class TridentRPN(RPN): - """ - Trident RPN subnetwork. - """ - - def __init__(self, cfg, input_shape): - super(TridentRPN, self).__init__(cfg, input_shape) - - self.num_branch = cfg.MODEL.TRIDENT.NUM_BRANCH - self.trident_fast = cfg.MODEL.TRIDENT.TEST_BRANCH_IDX != -1 - - def forward(self, images, features, gt_instances=None): - """ - See :class:`RPN.forward`. - """ - num_branch = self.num_branch if self.training or not self.trident_fast else 1 - # Duplicate images and gt_instances for all branches in TridentNet. - all_images = ImageList( - torch.cat([images.tensor] * num_branch), images.image_sizes * num_branch - ) - all_gt_instances = gt_instances * num_branch if gt_instances is not None else None - - return super(TridentRPN, self).forward(all_images, features, all_gt_instances) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg b/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg deleted file mode 100644 index b09bba9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[isort] -line_length=100 -multi_line_output=3 -include_trailing_comma=True -known_standard_library=numpy,setuptools,mock -skip=./datasets,docs -skip_glob=*/__init__.py -known_myself=detectron2 -known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil,pkg_resources,caffe2,onnx -no_lines_before=STDLIB,THIRDPARTY -sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER -default_section=FIRSTPARTY - -[mypy] -python_version=3.6 -ignore_missing_imports = True -warn_unused_configs = True -disallow_untyped_defs = True -check_untyped_defs = True -warn_unused_ignores = True -warn_redundant_casts = True -show_column_numbers = True -follow_imports = silent -allow_redefinition = True -; Require all functions to be annotated -disallow_incomplete_defs = True diff --git a/preprocess/humanparsing/mhp_extension/detectron2/setup.py b/preprocess/humanparsing/mhp_extension/detectron2/setup.py deleted file mode 100644 index a863fab..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/setup.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import glob -import os -import shutil -from os import path -from setuptools import find_packages, setup -from typing import List -import torch -from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension - -torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] -assert torch_ver >= [1, 4], "Requires PyTorch >= 1.4" - - -def get_version(): - init_py_path = path.join(path.abspath(path.dirname(__file__)), "detectron2", "__init__.py") - init_py = open(init_py_path, "r").readlines() - version_line = [l.strip() for l in init_py if l.startswith("__version__")][0] - version = version_line.split("=")[-1].strip().strip("'\"") - - # The following is used to build release packages. - # Users should never use it. - suffix = os.getenv("D2_VERSION_SUFFIX", "") - version = version + suffix - if os.getenv("BUILD_NIGHTLY", "0") == "1": - from datetime import datetime - - date_str = datetime.today().strftime("%y%m%d") - version = version + ".dev" + date_str - - new_init_py = [l for l in init_py if not l.startswith("__version__")] - new_init_py.append('__version__ = "{}"\n'.format(version)) - with open(init_py_path, "w") as f: - f.write("".join(new_init_py)) - return version - - -def get_extensions(): - this_dir = path.dirname(path.abspath(__file__)) - extensions_dir = path.join(this_dir, "detectron2", "layers", "csrc") - - main_source = path.join(extensions_dir, "vision.cpp") - sources = glob.glob(path.join(extensions_dir, "**", "*.cpp")) - source_cuda = glob.glob(path.join(extensions_dir, "**", "*.cu")) + glob.glob( - path.join(extensions_dir, "*.cu") - ) - - sources = [main_source] + sources - extension = CppExtension - - extra_compile_args = {"cxx": []} - define_macros = [] - - if ( - torch.cuda.is_available() and CUDA_HOME is not None and os.path.isdir(CUDA_HOME) - ) or os.getenv("FORCE_CUDA", "0") == "1": - extension = CUDAExtension - sources += source_cuda - define_macros += [("WITH_CUDA", None)] - extra_compile_args["nvcc"] = [ - "-DCUDA_HAS_FP16=1", - "-D__CUDA_NO_HALF_OPERATORS__", - "-D__CUDA_NO_HALF_CONVERSIONS__", - "-D__CUDA_NO_HALF2_OPERATORS__", - ] - - # It's better if pytorch can do this by default .. - CC = os.environ.get("CC", None) - if CC is not None: - extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) - - include_dirs = [extensions_dir] - - ext_modules = [ - extension( - "detectron2._C", - sources, - include_dirs=include_dirs, - define_macros=define_macros, - extra_compile_args=extra_compile_args, - ) - ] - - return ext_modules - - -def get_model_zoo_configs() -> List[str]: - """ - Return a list of configs to include in package for model zoo. Copy over these configs inside - detectron2/model_zoo. - """ - - # Use absolute paths while symlinking. - source_configs_dir = path.join(path.dirname(path.realpath(__file__)), "configs") - destination = path.join( - path.dirname(path.realpath(__file__)), "detectron2", "model_zoo", "configs" - ) - # Symlink the config directory inside package to have a cleaner pip install. - - # Remove stale symlink/directory from a previous build. - if path.exists(source_configs_dir): - if path.islink(destination): - os.unlink(destination) - elif path.isdir(destination): - shutil.rmtree(destination) - - if not path.exists(destination): - try: - os.symlink(source_configs_dir, destination) - except OSError: - # Fall back to copying if symlink fails: ex. on Windows. - shutil.copytree(source_configs_dir, destination) - - config_paths = glob.glob("configs/**/*.yaml", recursive=True) - return config_paths - - -setup( - name="detectron2", - version=get_version(), - author="FAIR", - url="https://github.com/facebookresearch/detectron2", - description="Detectron2 is FAIR's next-generation research " - "platform for object detection and segmentation.", - packages=find_packages(exclude=("configs", "tests*")), - package_data={"detectron2.model_zoo": get_model_zoo_configs()}, - python_requires=">=3.6", - install_requires=[ - "termcolor>=1.1", - "Pillow", # you can also use pillow-simd for better performance - "yacs>=0.1.6", - "tabulate", - "cloudpickle", - "matplotlib", - "mock", - "tqdm>4.29.0", - "tensorboard", - "fvcore>=0.1.1", - "future", # used by caffe2 - "pydot", # used to save caffe2 SVGs - ], - extras_require={ - "all": ["shapely", "psutil"], - "dev": [ - "flake8==3.7.9", - "isort", - "black @ git+https://github.com/psf/black@673327449f86fce558adde153bb6cbe54bfebad2", - "flake8-bugbear", - "flake8-comprehensions", - ], - }, - ext_modules=get_extensions(), - cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, -) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md deleted file mode 100644 index f560384..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/README.md +++ /dev/null @@ -1,9 +0,0 @@ -## Unit Tests - -To run the unittests, do: -``` -cd detectron2 -python -m unittest discover -v -s ./tests -``` - -There are also end-to-end inference & training tests, in [dev/run_*_tests.sh](../dev). diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py deleted file mode 100644 index 168f997..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py deleted file mode 100644 index 2cd807d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_coco.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import json -import numpy as np -import os -import tempfile -import unittest -import pycocotools - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.data.datasets.coco import convert_to_coco_dict, load_coco_json -from detectron2.structures import BoxMode - - -def make_mask(): - """ - Makes a donut shaped binary mask. - """ - H = 100 - W = 100 - mask = np.zeros([H, W], dtype=np.uint8) - for x in range(W): - for y in range(H): - d = np.linalg.norm(np.array([W, H]) / 2 - np.array([x, y])) - if d > 10 and d < 20: - mask[y, x] = 1 - return mask - - -def make_dataset_dicts(mask): - """ - Returns a list of dicts that represents a single COCO data point for - object detection. The single instance given by `mask` is represented by - RLE. - """ - record = {} - record["file_name"] = "test" - record["image_id"] = 0 - record["height"] = mask.shape[0] - record["width"] = mask.shape[1] - - y, x = np.nonzero(mask) - segmentation = pycocotools.mask.encode(np.asarray(mask, order="F")) - min_x = np.min(x) - max_x = np.max(x) - min_y = np.min(y) - max_y = np.max(y) - obj = { - "bbox": [min_x, min_y, max_x, max_y], - "bbox_mode": BoxMode.XYXY_ABS, - "category_id": 0, - "iscrowd": 0, - "segmentation": segmentation, - } - record["annotations"] = [obj] - return [record] - - -class TestRLEToJson(unittest.TestCase): - def test(self): - # Make a dummy dataset. - mask = make_mask() - DatasetCatalog.register("test_dataset", lambda: make_dataset_dicts(mask)) - MetadataCatalog.get("test_dataset").set(thing_classes=["test_label"]) - - # Dump to json. - json_dict = convert_to_coco_dict("test_dataset") - with tempfile.TemporaryDirectory() as tmpdir: - json_file_name = os.path.join(tmpdir, "test.json") - with open(json_file_name, "w") as f: - json.dump(json_dict, f) - # Load from json. - dicts = load_coco_json(json_file_name, "") - - # Check the loaded mask matches the original. - anno = dicts[0]["annotations"][0] - loaded_mask = pycocotools.mask.decode(anno["segmentation"]) - self.assertTrue(np.array_equal(loaded_mask, mask)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py deleted file mode 100644 index bdd94dd..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_detection_utils.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -import copy -import numpy as np -import unittest -import pycocotools.mask as mask_util - -from detectron2.data import detection_utils -from detectron2.data import transforms as T -from detectron2.structures import BitMasks, BoxMode - - -class TestTransformAnnotations(unittest.TestCase): - def test_transform_simple_annotation(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "category_id": 3, - "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]], - } - - output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400)) - self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300])) - self.assertEqual(len(output["segmentation"]), len(anno["segmentation"])) - self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10])) - - detection_utils.annotations_to_instances([output, output], (400, 400)) - - def test_flip_keypoints(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "keypoints": np.random.rand(17, 3) * 50 + 15, - } - - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), - transforms, - (400, 400), - keypoint_hflip_indices=detection_utils.create_keypoint_hflip_indices( - ["keypoints_coco_2017_train"] - ), - ) - # The first keypoint is nose - self.assertTrue(np.allclose(output["keypoints"][0, 0], 400 - anno["keypoints"][0, 0])) - # The last 16 keypoints are 8 left-right pairs - self.assertTrue( - np.allclose( - output["keypoints"][1:, 0].reshape(-1, 2)[:, ::-1], - 400 - anno["keypoints"][1:, 0].reshape(-1, 2), - ) - ) - self.assertTrue( - np.allclose( - output["keypoints"][1:, 1:].reshape(-1, 2, 2)[:, ::-1, :], - anno["keypoints"][1:, 1:].reshape(-1, 2, 2), - ) - ) - - def test_transform_RLE(self): - transforms = T.TransformList([T.HFlipTransform(400)]) - mask = np.zeros((300, 400), order="F").astype("uint8") - mask[:, :200] = 1 - - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "segmentation": mask_util.encode(mask[:, :, None])[0], - "category_id": 3, - } - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), transforms, (300, 400) - ) - mask = output["segmentation"] - self.assertTrue((mask[:, 200:] == 1).all()) - self.assertTrue((mask[:, :200] == 0).all()) - - inst = detection_utils.annotations_to_instances( - [output, output], (400, 400), mask_format="bitmask" - ) - self.assertTrue(isinstance(inst.gt_masks, BitMasks)) - - def test_transform_RLE_resize(self): - transforms = T.TransformList( - [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")] - ) - mask = np.zeros((300, 400), order="F").astype("uint8") - mask[:, :200] = 1 - - anno = { - "bbox": np.asarray([10, 10, 200, 300]), - "bbox_mode": BoxMode.XYXY_ABS, - "segmentation": mask_util.encode(mask[:, :, None])[0], - "category_id": 3, - } - output = detection_utils.transform_instance_annotations( - copy.deepcopy(anno), transforms, (400, 400) - ) - - inst = detection_utils.annotations_to_instances( - [output, output], (400, 400), mask_format="bitmask" - ) - self.assertTrue(isinstance(inst.gt_masks, BitMasks)) - - def test_gen_crop(self): - instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} - t = detection_utils.gen_crop_transform_with_instance((10, 10), (150, 150), instance) - # the box center must fall into the cropped region - self.assertTrue(t.x0 <= 55 <= t.x0 + t.w) - - def test_gen_crop_outside_boxes(self): - instance = {"bbox": [10, 10, 100, 100], "bbox_mode": BoxMode.XYXY_ABS} - with self.assertRaises(AssertionError): - detection_utils.gen_crop_transform_with_instance((10, 10), (15, 15), instance) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py deleted file mode 100644 index 45faf7e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_rotation_transform.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import unittest - -from detectron2.data.transforms.transform import RotationTransform - - -class TestRotationTransform(unittest.TestCase): - def assertEqualsArrays(self, a1, a2): - self.assertTrue(np.allclose(a1, a2)) - - def randomData(self, h=5, w=5): - image = np.random.rand(h, w) - coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float) - return image, coords, h, w - - def test180(self): - image, coords, h, w = self.randomData(6, 6) - rot = RotationTransform(h, w, 180, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1]) - rotated_coords = [[w - c[0], h - c[1]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test45_coords(self): - _, coords, h, w = self.randomData(4, 6) - rot = RotationTransform(h, w, 45, expand=False, center=None) - rotated_coords = [ - [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)] - for (x, y) in coords - ] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90(self): - image, coords, h, w = self.randomData() - rot = RotationTransform(h, w, 90, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90_expand(self): # non-square image - image, coords, h, w = self.randomData(h=5, w=8) - rot = RotationTransform(h, w, 90, expand=True, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test_center_expand(self): - # center has no effect if expand=True because it only affects shifting - image, coords, h, w = self.randomData(h=5, w=8) - angle = np.random.randint(360) - rot1 = RotationTransform(h, w, angle, expand=True, center=None) - rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0)) - rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w)) - rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5)) - for r1 in [rot1, rot2, rot3, rot4]: - for r2 in [rot1, rot2, rot3, rot4]: - self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image)) - self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords)) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py deleted file mode 100644 index 1256a87..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_sampler.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -import unittest -from torch.utils.data.sampler import SequentialSampler - -from detectron2.data.samplers import GroupedBatchSampler - - -class TestGroupedBatchSampler(unittest.TestCase): - def test_missing_group_id(self): - sampler = SequentialSampler(list(range(100))) - group_ids = [1] * 100 - samples = GroupedBatchSampler(sampler, group_ids, 2) - - for mini_batch in samples: - self.assertEqual(len(mini_batch), 2) - - def test_groups(self): - sampler = SequentialSampler(list(range(100))) - group_ids = [1, 0] * 50 - samples = GroupedBatchSampler(sampler, group_ids, 2) - - for mini_batch in samples: - self.assertEqual((mini_batch[0] + mini_batch[1]) % 2, 0) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py deleted file mode 100644 index 6d85518..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/data/test_transforms.py +++ /dev/null @@ -1,134 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import logging -import numpy as np -import unittest -from unittest import mock - -from detectron2.config import get_cfg -from detectron2.data import detection_utils -from detectron2.data import transforms as T -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger(__name__) - - -class TestTransforms(unittest.TestCase): - def setUp(self): - setup_logger() - - def test_apply_rotated_boxes(self): - np.random.seed(125) - cfg = get_cfg() - is_train = True - transform_gen = detection_utils.build_transform_gen(cfg, is_train) - image = np.random.rand(200, 300) - image, transforms = T.apply_transform_gens(transform_gen, image) - image_shape = image.shape[:2] # h, w - assert image_shape == (800, 1200) - annotation = {"bbox": [179, 97, 62, 40, -56]} - - boxes = np.array([annotation["bbox"]], dtype=np.float64) # boxes.shape = (1, 5) - transformed_bbox = transforms.apply_rotated_box(boxes)[0] - - expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64) - err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox) - assert np.allclose(transformed_bbox, expected_bbox), err_msg - - def test_apply_rotated_boxes_unequal_scaling_factor(self): - np.random.seed(125) - h, w = 400, 200 - newh, neww = 800, 800 - image = np.random.rand(h, w) - transform_gen = [] - transform_gen.append(T.Resize(shape=(newh, neww))) - image, transforms = T.apply_transform_gens(transform_gen, image) - image_shape = image.shape[:2] # h, w - assert image_shape == (newh, neww) - - boxes = np.array( - [ - [150, 100, 40, 20, 0], - [150, 100, 40, 20, 30], - [150, 100, 40, 20, 90], - [150, 100, 40, 20, -90], - ], - dtype=np.float64, - ) - transformed_boxes = transforms.apply_rotated_box(boxes) - - expected_bboxes = np.array( - [ - [600, 200, 160, 40, 0], - [600, 200, 144.22205102, 52.91502622, 49.10660535], - [600, 200, 80, 80, 90], - [600, 200, 80, 80, -90], - ], - dtype=np.float64, - ) - err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes) - assert np.allclose(transformed_boxes, expected_bboxes), err_msg - - def test_print_transform_gen(self): - t = T.RandomCrop("relative", (100, 100)) - self.assertTrue(str(t) == "RandomCrop(crop_type='relative', crop_size=(100, 100))") - - t = T.RandomFlip(prob=0.5) - self.assertTrue(str(t) == "RandomFlip(prob=0.5)") - - t = T.RandomFlip() - self.assertTrue(str(t) == "RandomFlip()") - - def test_random_apply_prob_out_of_range_check(self): - # GIVEN - test_probabilities = {0.0: True, 0.5: True, 1.0: True, -0.01: False, 1.01: False} - - # WHEN - for given_probability, is_valid in test_probabilities.items(): - # THEN - if not is_valid: - self.assertRaises(AssertionError, T.RandomApply, None, prob=given_probability) - else: - T.RandomApply(T.NoOpTransform(), prob=given_probability) - - def test_random_apply_wrapping_transform_gen_probability_occured_evaluation(self): - # GIVEN - transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - # WHEN - with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): - transform = random_apply.get_transform(image_mock) - - # THEN - transform_mock.get_transform.assert_called_once_with(image_mock) - self.assertIsNot(transform, transform_mock) - - def test_random_apply_wrapping_std_transform_probability_occured_evaluation(self): - # GIVEN - transform_mock = mock.MagicMock(name="MockTransform", spec=T.Transform) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - # WHEN - with mock.patch.object(random_apply, "_rand_range", return_value=0.0001): - transform = random_apply.get_transform(image_mock) - - # THEN - self.assertIs(transform, transform_mock) - - def test_random_apply_probability_not_occured_evaluation(self): - # GIVEN - transform_mock = mock.MagicMock(name="MockTransform", spec=T.TransformGen) - image_mock = mock.MagicMock(name="MockImage") - random_apply = T.RandomApply(transform_mock, prob=0.001) - - # WHEN - with mock.patch.object(random_apply, "_rand_range", return_value=0.9): - transform = random_apply.get_transform(image_mock) - - # THEN - transform_mock.get_transform.assert_not_called() - self.assertIsInstance(transform, T.NoOpTransform) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py deleted file mode 100644 index d180627..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_mask_ops.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import contextlib -import io -import numpy as np -import unittest -from collections import defaultdict -import torch -import tqdm -from fvcore.common.benchmark import benchmark -from fvcore.common.file_io import PathManager -from pycocotools.coco import COCO -from tabulate import tabulate -from torch.nn import functional as F - -from detectron2.data import MetadataCatalog -from detectron2.layers.mask_ops import ( - pad_masks, - paste_mask_in_image_old, - paste_masks_in_image, - scale_boxes, -) -from detectron2.structures import BitMasks, Boxes, BoxMode, PolygonMasks -from detectron2.structures.masks import polygons_to_bitmask - - -def iou_between_full_image_bit_masks(a, b): - intersect = (a & b).sum() - union = (a | b).sum() - return intersect / union - - -def rasterize_polygons_with_grid_sample(full_image_bit_mask, box, mask_size, threshold=0.5): - x0, y0, x1, y1 = box[0], box[1], box[2], box[3] - - img_h, img_w = full_image_bit_mask.shape - - mask_y = np.arange(0.0, mask_size) + 0.5 # mask y sample coords in [0.5, mask_size - 0.5] - mask_x = np.arange(0.0, mask_size) + 0.5 # mask x sample coords in [0.5, mask_size - 0.5] - mask_y = mask_y / mask_size * (y1 - y0) + y0 - mask_x = mask_x / mask_size * (x1 - x0) + x0 - - mask_x = (mask_x - 0.5) / (img_w - 1) * 2 + -1 - mask_y = (mask_y - 0.5) / (img_h - 1) * 2 + -1 - gy, gx = torch.meshgrid(torch.from_numpy(mask_y), torch.from_numpy(mask_x)) - ind = torch.stack([gx, gy], dim=-1).to(dtype=torch.float32) - - full_image_bit_mask = torch.from_numpy(full_image_bit_mask) - mask = F.grid_sample( - full_image_bit_mask[None, None, :, :].to(dtype=torch.float32), - ind[None, :, :, :], - align_corners=True, - ) - - return mask[0, 0] >= threshold - - -class TestMaskCropPaste(unittest.TestCase): - def setUp(self): - json_file = MetadataCatalog.get("coco_2017_val_100").json_file - if not PathManager.isfile(json_file): - raise unittest.SkipTest("{} not found".format(json_file)) - with contextlib.redirect_stdout(io.StringIO()): - json_file = PathManager.get_local_path(json_file) - self.coco = COCO(json_file) - - def test_crop_paste_consistency(self): - """ - rasterize_polygons_within_box (used in training) - and - paste_masks_in_image (used in inference) - should be inverse operations to each other. - - This function runs several implementation of the above two operations and prints - the reconstruction error. - """ - - anns = self.coco.loadAnns(self.coco.getAnnIds(iscrowd=False)) # avoid crowd annotations - - selected_anns = anns[:100] - - ious = [] - for ann in tqdm.tqdm(selected_anns): - results = self.process_annotation(ann) - ious.append([k[2] for k in results]) - - ious = np.array(ious) - mean_ious = ious.mean(axis=0) - table = [] - res_dic = defaultdict(dict) - for row, iou in zip(results, mean_ious): - table.append((row[0], row[1], iou)) - res_dic[row[0]][row[1]] = iou - print(tabulate(table, headers=["rasterize", "paste", "iou"], tablefmt="simple")) - # assert that the reconstruction is good: - self.assertTrue(res_dic["polygon"]["aligned"] > 0.94) - self.assertTrue(res_dic["roialign"]["aligned"] > 0.95) - - def process_annotation(self, ann, mask_side_len=28): - # Parse annotation data - img_info = self.coco.loadImgs(ids=[ann["image_id"]])[0] - height, width = img_info["height"], img_info["width"] - gt_polygons = [np.array(p, dtype=np.float64) for p in ann["segmentation"]] - gt_bbox = BoxMode.convert(ann["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - gt_bit_mask = polygons_to_bitmask(gt_polygons, height, width) - - # Run rasterize .. - torch_gt_bbox = torch.tensor(gt_bbox).to(dtype=torch.float32).reshape(-1, 4) - box_bitmasks = { - "polygon": PolygonMasks([gt_polygons]).crop_and_resize(torch_gt_bbox, mask_side_len)[0], - "gridsample": rasterize_polygons_with_grid_sample(gt_bit_mask, gt_bbox, mask_side_len), - "roialign": BitMasks(torch.from_numpy(gt_bit_mask[None, :, :])).crop_and_resize( - torch_gt_bbox, mask_side_len - )[0], - } - - # Run paste .. - results = defaultdict(dict) - for k, box_bitmask in box_bitmasks.items(): - padded_bitmask, scale = pad_masks(box_bitmask[None, :, :], 1) - scaled_boxes = scale_boxes(torch_gt_bbox, scale) - - r = results[k] - r["old"] = paste_mask_in_image_old( - padded_bitmask[0], scaled_boxes[0], height, width, threshold=0.5 - ) - r["aligned"] = paste_masks_in_image( - box_bitmask[None, :, :], Boxes(torch_gt_bbox), (height, width) - )[0] - - table = [] - for rasterize_method, r in results.items(): - for paste_method, mask in r.items(): - mask = np.asarray(mask) - iou = iou_between_full_image_bit_masks(gt_bit_mask.astype("uint8"), mask) - table.append((rasterize_method, paste_method, iou)) - return table - - def test_polygon_area(self): - # Draw polygon boxes - for d in [5.0, 10.0, 1000.0]: - polygon = PolygonMasks([[[0, 0, 0, d, d, d, d, 0]]]) - area = polygon.area()[0] - target = d ** 2 - self.assertEqual(area, target) - - # Draw polygon triangles - for d in [5.0, 10.0, 1000.0]: - polygon = PolygonMasks([[[0, 0, 0, d, d, d]]]) - area = polygon.area()[0] - target = d ** 2 / 2 - self.assertEqual(area, target) - - -def benchmark_paste(): - S = 800 - H, W = image_shape = (S, S) - N = 64 - torch.manual_seed(42) - masks = torch.rand(N, 28, 28) - - center = torch.rand(N, 2) * 600 + 100 - wh = torch.clamp(torch.randn(N, 2) * 40 + 200, min=50) - x0y0 = torch.clamp(center - wh * 0.5, min=0.0) - x1y1 = torch.clamp(center + wh * 0.5, max=S) - boxes = Boxes(torch.cat([x0y0, x1y1], axis=1)) - - def func(device, n=3): - m = masks.to(device=device) - b = boxes.to(device=device) - - def bench(): - for _ in range(n): - paste_masks_in_image(m, b, image_shape) - if device.type == "cuda": - torch.cuda.synchronize() - - return bench - - specs = [{"device": torch.device("cpu"), "n": 3}] - if torch.cuda.is_available(): - specs.append({"device": torch.device("cuda"), "n": 3}) - - benchmark(func, "paste_masks", specs, num_iters=10, warmup_iters=2) - - -if __name__ == "__main__": - benchmark_paste() - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py deleted file mode 100644 index 94b346c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_nms_rotated.py +++ /dev/null @@ -1,188 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from __future__ import absolute_import, division, print_function, unicode_literals -import numpy as np -import unittest -import torch -from torchvision import ops - -from detectron2.layers import batched_nms, batched_nms_rotated, nms_rotated - - -def nms_edit_distance(keep1, keep2): - """ - Compare the "keep" result of two nms call. - They are allowed to be different in terms of edit distance - due to floating point precision issues, e.g., - if a box happen to have an IoU of 0.5 with another box, - one implentation may choose to keep it while another may discard it. - """ - if torch.equal(keep1, keep2): - # they should be equal most of the time - return 0 - keep1, keep2 = tuple(keep1.cpu()), tuple(keep2.cpu()) - m, n = len(keep1), len(keep2) - - # edit distance with DP - f = [np.arange(n + 1), np.arange(n + 1)] - for i in range(m): - cur_row = i % 2 - other_row = (i + 1) % 2 - f[other_row][0] = i + 1 - for j in range(n): - f[other_row][j + 1] = ( - f[cur_row][j] - if keep1[i] == keep2[j] - else min(min(f[cur_row][j], f[cur_row][j + 1]), f[other_row][j]) + 1 - ) - return f[m % 2][n] - - -class TestNMSRotated(unittest.TestCase): - def reference_horizontal_nms(self, boxes, scores, iou_threshold): - """ - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - (Note here 5 == 4 + 1, i.e., 4-dim horizontal box + 1-dim prob) - iou_threshold: intersection over union threshold. - Returns: - picked: a list of indexes of the kept boxes - """ - picked = [] - _, indexes = scores.sort(descending=True) - while len(indexes) > 0: - current = indexes[0] - picked.append(current.item()) - if len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[1:] - rest_boxes = boxes[indexes, :] - iou = ops.box_iou(rest_boxes, current_box.unsqueeze(0)).squeeze(1) - indexes = indexes[iou <= iou_threshold] - - return torch.as_tensor(picked) - - def _create_tensors(self, N): - boxes = torch.rand(N, 4) * 100 - # Note: the implementation of this function in torchvision is: - # boxes[:, 2:] += torch.rand(N, 2) * 100 - # but it does not guarantee non-negative widths/heights constraints: - # boxes[:, 2] >= boxes[:, 0] and boxes[:, 3] >= boxes[:, 1]: - boxes[:, 2:] += boxes[:, :2] - scores = torch.rand(N) - return boxes, scores - - def test_batched_nms_rotated_0_degree_cpu(self): - N = 2000 - num_classes = 50 - boxes, scores = self._create_tensors(N) - idxs = torch.randint(0, num_classes, (N,)) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" - for iou in [0.2, 0.5, 0.8]: - backup = boxes.clone() - keep_ref = batched_nms(boxes, scores, idxs, iou) - assert torch.allclose(boxes, backup), "boxes modified by batched_nms" - backup = rotated_boxes.clone() - keep = batched_nms_rotated(rotated_boxes, scores, idxs, iou) - assert torch.allclose( - rotated_boxes, backup - ), "rotated_boxes modified by batched_nms_rotated" - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_batched_nms_rotated_0_degree_cuda(self): - N = 2000 - num_classes = 50 - boxes, scores = self._create_tensors(N) - idxs = torch.randint(0, num_classes, (N,)) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS with 0 degree is incompatible with horizontal NMS for IoU={}" - for iou in [0.2, 0.5, 0.8]: - backup = boxes.clone() - keep_ref = batched_nms(boxes.cuda(), scores.cuda(), idxs, iou) - self.assertTrue(torch.allclose(boxes, backup), "boxes modified by batched_nms") - backup = rotated_boxes.clone() - keep = batched_nms_rotated(rotated_boxes.cuda(), scores.cuda(), idxs, iou) - self.assertTrue( - torch.allclose(rotated_boxes, backup), - "rotated_boxes modified by batched_nms_rotated", - ) - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - def test_nms_rotated_0_degree_cpu(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.5]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - self.assertLessEqual(nms_edit_distance(keep, keep_ref), 1, err_msg.format(iou)) - - def test_nms_rotated_90_degrees_cpu(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - # Note for rotated_boxes[:, 2] and rotated_boxes[:, 3]: - # widths and heights are intentionally swapped here for 90 degrees case - # so that the reference horizontal nms could be used - rotated_boxes[:, 2] = boxes[:, 3] - boxes[:, 1] - rotated_boxes[:, 3] = boxes[:, 2] - boxes[:, 0] - - rotated_boxes[:, 4] = torch.ones(N) * 90 - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.2, 0.5, 0.8]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - assert torch.equal(keep, keep_ref), err_msg.format(iou) - - def test_nms_rotated_180_degrees_cpu(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - rotated_boxes[:, 4] = torch.ones(N) * 180 - err_msg = "Rotated NMS incompatible between CPU and reference implementation for IoU={}" - for iou in [0.2, 0.5, 0.8]: - keep_ref = self.reference_horizontal_nms(boxes, scores, iou) - keep = nms_rotated(rotated_boxes, scores, iou) - assert torch.equal(keep, keep_ref), err_msg.format(iou) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_nms_rotated_0_degree_cuda(self): - N = 1000 - boxes, scores = self._create_tensors(N) - rotated_boxes = torch.zeros(N, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - err_msg = "Rotated NMS incompatible between CPU and CUDA for IoU={}" - - for iou in [0.2, 0.5, 0.8]: - r_cpu = nms_rotated(rotated_boxes, scores, iou) - r_cuda = nms_rotated(rotated_boxes.cuda(), scores.cuda(), iou) - - assert torch.equal(r_cpu, r_cuda.cpu()), err_msg.format(iou) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py deleted file mode 100644 index 633d7c2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import unittest -import cv2 -import torch -from fvcore.common.benchmark import benchmark - -from detectron2.layers.roi_align import ROIAlign - - -class ROIAlignTest(unittest.TestCase): - def test_forward_output(self): - input = np.arange(25).reshape(5, 5).astype("float32") - """ - 0 1 2 3 4 - 5 6 7 8 9 - 10 11 12 13 14 - 15 16 17 18 19 - 20 21 22 23 24 - """ - - output = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=False) - output_correct = self._simple_roialign(input, [1, 1, 3, 3], (4, 4), aligned=True) - - # without correction: - old_results = [ - [7.5, 8, 8.5, 9], - [10, 10.5, 11, 11.5], - [12.5, 13, 13.5, 14], - [15, 15.5, 16, 16.5], - ] - - # with 0.5 correction: - correct_results = [ - [4.5, 5.0, 5.5, 6.0], - [7.0, 7.5, 8.0, 8.5], - [9.5, 10.0, 10.5, 11.0], - [12.0, 12.5, 13.0, 13.5], - ] - # This is an upsampled version of [[6, 7], [11, 12]] - - self.assertTrue(np.allclose(output.flatten(), np.asarray(old_results).flatten())) - self.assertTrue( - np.allclose(output_correct.flatten(), np.asarray(correct_results).flatten()) - ) - - # Also see similar issues in tensorflow at - # https://github.com/tensorflow/tensorflow/issues/26278 - - def test_resize(self): - H, W = 30, 30 - input = np.random.rand(H, W).astype("float32") * 100 - box = [10, 10, 20, 20] - output = self._simple_roialign(input, box, (5, 5), aligned=True) - - input2x = cv2.resize(input, (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) - box2x = [x / 2 for x in box] - output2x = self._simple_roialign(input2x, box2x, (5, 5), aligned=True) - diff = np.abs(output2x - output) - self.assertTrue(diff.max() < 1e-4) - - def _simple_roialign(self, img, box, resolution, aligned=True): - """ - RoiAlign with scale 1.0 and 0 sample ratio. - """ - if isinstance(resolution, int): - resolution = (resolution, resolution) - op = ROIAlign(resolution, 1.0, 0, aligned=aligned) - input = torch.from_numpy(img[None, None, :, :].astype("float32")) - - rois = [0] + list(box) - rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) - output = op.forward(input, rois) - if torch.cuda.is_available(): - output_cuda = op.forward(input.cuda(), rois.cuda()).cpu() - self.assertTrue(torch.allclose(output, output_cuda)) - return output[0, 0] - - def _simple_roialign_with_grad(self, img, box, resolution, device): - if isinstance(resolution, int): - resolution = (resolution, resolution) - - op = ROIAlign(resolution, 1.0, 0, aligned=True) - input = torch.from_numpy(img[None, None, :, :].astype("float32")) - - rois = [0] + list(box) - rois = torch.from_numpy(np.asarray(rois)[None, :].astype("float32")) - input = input.to(device=device) - rois = rois.to(device=device) - input.requires_grad = True - output = op.forward(input, rois) - return input, output - - def test_empty_box(self): - img = np.random.rand(5, 5) - box = [3, 4, 5, 4] - o = self._simple_roialign(img, box, 7) - self.assertTrue(o.shape == (7, 7)) - self.assertTrue((o == 0).all()) - - for dev in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - input, output = self._simple_roialign_with_grad(img, box, 7, torch.device(dev)) - output.sum().backward() - self.assertTrue(torch.allclose(input.grad, torch.zeros_like(input))) - - def test_empty_batch(self): - input = torch.zeros(0, 3, 10, 10, dtype=torch.float32) - rois = torch.zeros(0, 5, dtype=torch.float32) - op = ROIAlign((7, 7), 1.0, 0, aligned=True) - output = op.forward(input, rois) - self.assertTrue(output.shape == (0, 3, 7, 7)) - - -def benchmark_roi_align(): - from detectron2 import _C - - def random_boxes(mean_box, stdev, N, maxsize): - ret = torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) - ret.clamp_(min=0, max=maxsize) - return ret - - def func(N, C, H, W, nboxes_per_img): - input = torch.rand(N, C, H, W) - boxes = [] - batch_idx = [] - for k in range(N): - b = random_boxes([80, 80, 130, 130], 24, nboxes_per_img, H) - # try smaller boxes: - # b = random_boxes([100, 100, 110, 110], 4, nboxes_per_img, H) - boxes.append(b) - batch_idx.append(torch.zeros(nboxes_per_img, 1, dtype=torch.float32) + k) - boxes = torch.cat(boxes, axis=0) - batch_idx = torch.cat(batch_idx, axis=0) - boxes = torch.cat([batch_idx, boxes], axis=1) - - input = input.cuda() - boxes = boxes.cuda() - - def bench(): - _C.roi_align_forward(input, boxes, 1.0, 7, 7, 0, True) - torch.cuda.synchronize() - - return bench - - args = [dict(N=2, C=512, H=256, W=256, nboxes_per_img=500)] - benchmark(func, "cuda_roialign", args, num_iters=20, warmup_iters=1) - - -if __name__ == "__main__": - if torch.cuda.is_available(): - benchmark_roi_align() - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py deleted file mode 100644 index 1915b59..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/layers/test_roi_align_rotated.py +++ /dev/null @@ -1,176 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import cv2 -import torch -from torch.autograd import Variable, gradcheck - -from detectron2.layers.roi_align import ROIAlign -from detectron2.layers.roi_align_rotated import ROIAlignRotated - -logger = logging.getLogger(__name__) - - -class ROIAlignRotatedTest(unittest.TestCase): - def _box_to_rotated_box(self, box, angle): - return [ - (box[0] + box[2]) / 2.0, - (box[1] + box[3]) / 2.0, - box[2] - box[0], - box[3] - box[1], - angle, - ] - - def _rot90(self, img, num): - num = num % 4 # note: -1 % 4 == 3 - for _ in range(num): - img = img.transpose(0, 1).flip(0) - return img - - def test_forward_output_0_90_180_270(self): - for i in range(4): - # i = 0, 1, 2, 3 corresponding to 0, 90, 180, 270 degrees - img = torch.arange(25, dtype=torch.float32).reshape(5, 5) - """ - 0 1 2 3 4 - 5 6 7 8 9 - 10 11 12 13 14 - 15 16 17 18 19 - 20 21 22 23 24 - """ - box = [1, 1, 3, 3] - rotated_box = self._box_to_rotated_box(box=box, angle=90 * i) - - result = self._simple_roi_align_rotated(img=img, box=rotated_box, resolution=(4, 4)) - - # Here's an explanation for 0 degree case: - # point 0 in the original input lies at [0.5, 0.5] - # (the center of bin [0, 1] x [0, 1]) - # point 1 in the original input lies at [1.5, 0.5], etc. - # since the resolution is (4, 4) that divides [1, 3] x [1, 3] - # into 4 x 4 equal bins, - # the top-left bin is [1, 1.5] x [1, 1.5], and its center - # (1.25, 1.25) lies at the 3/4 position - # between point 0 and point 1, point 5 and point 6, - # point 0 and point 5, point 1 and point 6, so it can be calculated as - # 0.25*(0*0.25+1*0.75)+(5*0.25+6*0.75)*0.75 = 4.5 - result_expected = torch.tensor( - [ - [4.5, 5.0, 5.5, 6.0], - [7.0, 7.5, 8.0, 8.5], - [9.5, 10.0, 10.5, 11.0], - [12.0, 12.5, 13.0, 13.5], - ] - ) - # This is also an upsampled version of [[6, 7], [11, 12]] - - # When the box is rotated by 90 degrees CCW, - # the result would be rotated by 90 degrees CW, thus it's -i here - result_expected = self._rot90(result_expected, -i) - - assert torch.allclose(result, result_expected) - - def test_resize(self): - H, W = 30, 30 - input = torch.rand(H, W) * 100 - box = [10, 10, 20, 20] - rotated_box = self._box_to_rotated_box(box, angle=0) - output = self._simple_roi_align_rotated(img=input, box=rotated_box, resolution=(5, 5)) - - input2x = cv2.resize(input.numpy(), (W // 2, H // 2), interpolation=cv2.INTER_LINEAR) - input2x = torch.from_numpy(input2x) - box2x = [x / 2 for x in box] - rotated_box2x = self._box_to_rotated_box(box2x, angle=0) - output2x = self._simple_roi_align_rotated(img=input2x, box=rotated_box2x, resolution=(5, 5)) - assert torch.allclose(output2x, output) - - def _simple_roi_align_rotated(self, img, box, resolution): - """ - RoiAlignRotated with scale 1.0 and 0 sample ratio. - """ - op = ROIAlignRotated(output_size=resolution, spatial_scale=1.0, sampling_ratio=0) - input = img[None, None, :, :] - - rois = [0] + list(box) - rois = torch.tensor(rois, dtype=torch.float32)[None, :] - result_cpu = op.forward(input, rois) - if torch.cuda.is_available(): - result_cuda = op.forward(input.cuda(), rois.cuda()) - assert torch.allclose(result_cpu, result_cuda.cpu()) - return result_cpu[0, 0] - - def test_empty_box(self): - img = torch.rand(5, 5) - out = self._simple_roi_align_rotated(img, [2, 3, 0, 0, 0], (7, 7)) - self.assertTrue((out == 0).all()) - - def test_roi_align_rotated_gradcheck_cpu(self): - dtype = torch.float64 - device = torch.device("cpu") - roi_align_rotated_op = ROIAlignRotated( - output_size=(5, 5), spatial_scale=0.5, sampling_ratio=1 - ).to(dtype=dtype, device=device) - x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) - # roi format is (batch index, x_center, y_center, width, height, angle) - rois = torch.tensor( - [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], - dtype=dtype, - device=device, - ) - - def func(input): - return roi_align_rotated_op(input, rois) - - assert gradcheck(func, (x,)), "gradcheck failed for RoIAlignRotated CPU" - assert gradcheck(func, (x.transpose(2, 3),)), "gradcheck failed for RoIAlignRotated CPU" - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_roi_align_rotated_gradient_cuda(self): - """ - Compute gradients for ROIAlignRotated with multiple bounding boxes on the GPU, - and compare the result with ROIAlign - """ - # torch.manual_seed(123) - dtype = torch.float64 - device = torch.device("cuda") - pool_h, pool_w = (5, 5) - - roi_align = ROIAlign(output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2).to( - device=device - ) - - roi_align_rotated = ROIAlignRotated( - output_size=(pool_h, pool_w), spatial_scale=1, sampling_ratio=2 - ).to(device=device) - - x = torch.rand(1, 1, 10, 10, dtype=dtype, device=device, requires_grad=True) - # x_rotated = x.clone() won't work (will lead to grad_fun=CloneBackward)! - x_rotated = Variable(x.data.clone(), requires_grad=True) - - # roi_rotated format is (batch index, x_center, y_center, width, height, angle) - rois_rotated = torch.tensor( - [[0, 4.5, 4.5, 9, 9, 0], [0, 2, 7, 4, 4, 0], [0, 7, 7, 4, 4, 0]], - dtype=dtype, - device=device, - ) - - y_rotated = roi_align_rotated(x_rotated, rois_rotated) - s_rotated = y_rotated.sum() - s_rotated.backward() - - # roi format is (batch index, x1, y1, x2, y2) - rois = torch.tensor( - [[0, 0, 0, 9, 9], [0, 0, 5, 4, 9], [0, 5, 5, 9, 9]], dtype=dtype, device=device - ) - - y = roi_align(x, rois) - s = y.sum() - s.backward() - - assert torch.allclose( - x.grad, x_rotated.grad - ), "gradients for ROIAlign and ROIAlignRotated mismatch on CUDA" - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py deleted file mode 100644 index bc14f02..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_anchor_generator.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.config import get_cfg -from detectron2.layers import ShapeSpec -from detectron2.modeling.anchor_generator import DefaultAnchorGenerator, RotatedAnchorGenerator - -logger = logging.getLogger(__name__) - - -class TestAnchorGenerator(unittest.TestCase): - def test_default_anchor_generator(self): - cfg = get_cfg() - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] - - anchor_generator = DefaultAnchorGenerator(cfg, [ShapeSpec(stride=4)]) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - anchors = anchor_generator([features["stage3"]]) - expected_anchor_tensor = torch.tensor( - [ - [-32.0, -8.0, 32.0, 8.0], - [-16.0, -16.0, 16.0, 16.0], - [-8.0, -32.0, 8.0, 32.0], - [-64.0, -16.0, 64.0, 16.0], - [-32.0, -32.0, 32.0, 32.0], - [-16.0, -64.0, 16.0, 64.0], - [-28.0, -8.0, 36.0, 8.0], # -28.0 == -32.0 + STRIDE (4) - [-12.0, -16.0, 20.0, 16.0], - [-4.0, -32.0, 12.0, 32.0], - [-60.0, -16.0, 68.0, 16.0], - [-28.0, -32.0, 36.0, 32.0], - [-12.0, -64.0, 20.0, 64.0], - ] - ) - - assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) - - def test_default_anchor_generator_centered(self): - # test explicit args - anchor_generator = DefaultAnchorGenerator( - sizes=[32, 64], aspect_ratios=[0.25, 1, 4], strides=[4] - ) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - expected_anchor_tensor = torch.tensor( - [ - [-30.0, -6.0, 34.0, 10.0], - [-14.0, -14.0, 18.0, 18.0], - [-6.0, -30.0, 10.0, 34.0], - [-62.0, -14.0, 66.0, 18.0], - [-30.0, -30.0, 34.0, 34.0], - [-14.0, -62.0, 18.0, 66.0], - [-26.0, -6.0, 38.0, 10.0], - [-10.0, -14.0, 22.0, 18.0], - [-2.0, -30.0, 14.0, 34.0], - [-58.0, -14.0, 70.0, 18.0], - [-26.0, -30.0, 38.0, 34.0], - [-10.0, -62.0, 22.0, 66.0], - ] - ) - - anchors = anchor_generator([features["stage3"]]) - assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) - - # doesn't work yet - # anchors = torch.jit.script(anchor_generator)([features["stage3"]]) - # assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) - - def test_rrpn_anchor_generator(self): - cfg = get_cfg() - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1, 4]] - cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [0, 45] # test single list[float] - anchor_generator = RotatedAnchorGenerator(cfg, [ShapeSpec(stride=4)]) - - # only the last two dimensions of features matter here - num_images = 2 - features = {"stage3": torch.rand(num_images, 96, 1, 2)} - anchors = anchor_generator([features["stage3"]]) - expected_anchor_tensor = torch.tensor( - [ - [0.0, 0.0, 64.0, 16.0, 0.0], - [0.0, 0.0, 64.0, 16.0, 45.0], - [0.0, 0.0, 32.0, 32.0, 0.0], - [0.0, 0.0, 32.0, 32.0, 45.0], - [0.0, 0.0, 16.0, 64.0, 0.0], - [0.0, 0.0, 16.0, 64.0, 45.0], - [0.0, 0.0, 128.0, 32.0, 0.0], - [0.0, 0.0, 128.0, 32.0, 45.0], - [0.0, 0.0, 64.0, 64.0, 0.0], - [0.0, 0.0, 64.0, 64.0, 45.0], - [0.0, 0.0, 32.0, 128.0, 0.0], - [0.0, 0.0, 32.0, 128.0, 45.0], - [4.0, 0.0, 64.0, 16.0, 0.0], # 4.0 == 0.0 + STRIDE (4) - [4.0, 0.0, 64.0, 16.0, 45.0], - [4.0, 0.0, 32.0, 32.0, 0.0], - [4.0, 0.0, 32.0, 32.0, 45.0], - [4.0, 0.0, 16.0, 64.0, 0.0], - [4.0, 0.0, 16.0, 64.0, 45.0], - [4.0, 0.0, 128.0, 32.0, 0.0], - [4.0, 0.0, 128.0, 32.0, 45.0], - [4.0, 0.0, 64.0, 64.0, 0.0], - [4.0, 0.0, 64.0, 64.0, 45.0], - [4.0, 0.0, 32.0, 128.0, 0.0], - [4.0, 0.0, 32.0, 128.0, 45.0], - ] - ) - - assert torch.allclose(anchors[0].tensor, expected_anchor_tensor) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py deleted file mode 100644 index 9d124d7..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_box2box_transform.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated - -logger = logging.getLogger(__name__) - - -def random_boxes(mean_box, stdev, N): - return torch.rand(N, 4) * stdev + torch.tensor(mean_box, dtype=torch.float) - - -class TestBox2BoxTransform(unittest.TestCase): - def test_reconstruction(self): - weights = (5, 5, 10, 10) - b2b_tfm = Box2BoxTransform(weights=weights) - src_boxes = random_boxes([10, 10, 20, 20], 1, 10) - dst_boxes = random_boxes([10, 10, 20, 20], 1, 10) - - devices = [torch.device("cpu")] - if torch.cuda.is_available(): - devices.append(torch.device("cuda")) - for device in devices: - src_boxes = src_boxes.to(device=device) - dst_boxes = dst_boxes.to(device=device) - deltas = b2b_tfm.get_deltas(src_boxes, dst_boxes) - dst_boxes_reconstructed = b2b_tfm.apply_deltas(deltas, src_boxes) - assert torch.allclose(dst_boxes, dst_boxes_reconstructed) - - -def random_rotated_boxes(mean_box, std_length, std_angle, N): - return torch.cat( - [torch.rand(N, 4) * std_length, torch.rand(N, 1) * std_angle], dim=1 - ) + torch.tensor(mean_box, dtype=torch.float) - - -class TestBox2BoxTransformRotated(unittest.TestCase): - def test_reconstruction(self): - weights = (5, 5, 10, 10, 1) - b2b_transform = Box2BoxTransformRotated(weights=weights) - src_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) - dst_boxes = random_rotated_boxes([10, 10, 20, 20, -30], 5, 60.0, 10) - - devices = [torch.device("cpu")] - if torch.cuda.is_available(): - devices.append(torch.device("cuda")) - for device in devices: - src_boxes = src_boxes.to(device=device) - dst_boxes = dst_boxes.to(device=device) - deltas = b2b_transform.get_deltas(src_boxes, dst_boxes) - dst_boxes_reconstructed = b2b_transform.apply_deltas(deltas, src_boxes) - assert torch.allclose(dst_boxes[:, :4], dst_boxes_reconstructed[:, :4], atol=1e-5) - # angle difference has to be normalized - assert torch.allclose( - (dst_boxes[:, 4] - dst_boxes_reconstructed[:, 4] + 180.0) % 360.0 - 180.0, - torch.zeros_like(dst_boxes[:, 4]), - atol=1e-4, - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py deleted file mode 100644 index 70b64d3..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_fast_rcnn.py +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.layers import ShapeSpec -from detectron2.modeling.box_regression import Box2BoxTransform, Box2BoxTransformRotated -from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers -from detectron2.modeling.roi_heads.rotated_fast_rcnn import RotatedFastRCNNOutputLayers -from detectron2.structures import Boxes, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage - -logger = logging.getLogger(__name__) - - -class FastRCNNTest(unittest.TestCase): - def test_fast_rcnn(self): - torch.manual_seed(132) - - box_head_output_size = 8 - - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=5, - ) - feature_pooled = torch.rand(2, box_head_output_size) - predictions = box_predictor(feature_pooled) - - proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32) - gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - proposal = Instances((10, 10)) - proposal.proposal_boxes = Boxes(proposal_boxes) - proposal.gt_boxes = Boxes(gt_boxes) - proposal.gt_classes = torch.tensor([1, 2]) - - with EventStorage(): # capture events in a new storage to discard them - losses = box_predictor.losses(predictions, [proposal]) - - expected_losses = { - "loss_cls": torch.tensor(1.7951188087), - "loss_box_reg": torch.tensor(4.0357131958), - } - for name in expected_losses.keys(): - assert torch.allclose(losses[name], expected_losses[name]) - - def test_fast_rcnn_empty_batch(self, device="cpu"): - box_predictor = FastRCNNOutputLayers( - ShapeSpec(channels=10), - box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), - num_classes=8, - ).to(device=device) - - logits = torch.randn(0, 100, requires_grad=True, device=device) - deltas = torch.randn(0, 4, requires_grad=True, device=device) - losses = box_predictor.losses([logits, deltas], []) - for value in losses.values(): - self.assertTrue(torch.allclose(value, torch.zeros_like(value))) - sum(losses.values()).backward() - self.assertTrue(logits.grad is not None) - self.assertTrue(deltas.grad is not None) - - predictions, _ = box_predictor.inference([logits, deltas], []) - self.assertEqual(len(predictions), 0) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_fast_rcnn_empty_batch_cuda(self): - self.test_fast_rcnn_empty_batch(device=torch.device("cuda")) - - def test_fast_rcnn_rotated(self): - torch.manual_seed(132) - box_head_output_size = 8 - - box_predictor = RotatedFastRCNNOutputLayers( - ShapeSpec(channels=box_head_output_size), - box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), - num_classes=5, - ) - feature_pooled = torch.rand(2, box_head_output_size) - predictions = box_predictor(feature_pooled) - proposal_boxes = torch.tensor( - [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32 - ) - gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) - proposal = Instances((10, 10)) - proposal.proposal_boxes = RotatedBoxes(proposal_boxes) - proposal.gt_boxes = RotatedBoxes(gt_boxes) - proposal.gt_classes = torch.tensor([1, 2]) - - with EventStorage(): # capture events in a new storage to discard them - losses = box_predictor.losses(predictions, [proposal]) - - # Note: the expected losses are slightly different even if - # the boxes are essentially the same as in the FastRCNNOutput test, because - # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization - # between the two cases. - expected_losses = { - "loss_cls": torch.tensor(1.7920907736), - "loss_box_reg": torch.tensor(4.0410838127), - } - for name in expected_losses.keys(): - assert torch.allclose(losses[name], expected_losses[name]) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py deleted file mode 100644 index 95fe6a0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_model_e2e.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - - -import unittest -import torch - -import detectron2.model_zoo as model_zoo -from detectron2.config import get_cfg -from detectron2.modeling import build_model -from detectron2.structures import BitMasks, Boxes, ImageList, Instances -from detectron2.utils.events import EventStorage - - -def get_model_zoo(config_path): - """ - Like model_zoo.get, but do not load any weights (even pretrained) - """ - cfg_file = model_zoo.get_config_file(config_path) - cfg = get_cfg() - cfg.merge_from_file(cfg_file) - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - return build_model(cfg) - - -def create_model_input(img, inst=None): - if inst is not None: - return {"image": img, "instances": inst} - else: - return {"image": img} - - -def get_empty_instance(h, w): - inst = Instances((h, w)) - inst.gt_boxes = Boxes(torch.rand(0, 4)) - inst.gt_classes = torch.tensor([]).to(dtype=torch.int64) - inst.gt_masks = BitMasks(torch.rand(0, h, w)) - return inst - - -def get_regular_bitmask_instances(h, w): - inst = Instances((h, w)) - inst.gt_boxes = Boxes(torch.rand(3, 4)) - inst.gt_boxes.tensor[:, 2:] += inst.gt_boxes.tensor[:, :2] - inst.gt_classes = torch.tensor([3, 4, 5]).to(dtype=torch.int64) - inst.gt_masks = BitMasks((torch.rand(3, h, w) > 0.5)) - return inst - - -class ModelE2ETest: - def setUp(self): - torch.manual_seed(43) - self.model = get_model_zoo(self.CONFIG_PATH) - - def _test_eval(self, input_sizes): - inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] - self.model.eval() - self.model(inputs) - - def _test_train(self, input_sizes, instances): - assert len(input_sizes) == len(instances) - inputs = [ - create_model_input(torch.rand(3, s[0], s[1]), inst) - for s, inst in zip(input_sizes, instances) - ] - self.model.train() - with EventStorage(): - losses = self.model(inputs) - sum(losses.values()).backward() - del losses - - def _inf_tensor(self, *shape): - return 1.0 / torch.zeros(*shape, device=self.model.device) - - def _nan_tensor(self, *shape): - return torch.zeros(*shape, device=self.model.device).fill_(float("nan")) - - def test_empty_data(self): - instances = [get_empty_instance(200, 250), get_empty_instance(200, 249)] - self._test_eval([(200, 250), (200, 249)]) - self._test_train([(200, 250), (200, 249)], instances) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_eval_tocpu(self): - model = get_model_zoo(self.CONFIG_PATH).cpu() - model.eval() - input_sizes = [(200, 250), (200, 249)] - inputs = [create_model_input(torch.rand(3, s[0], s[1])) for s in input_sizes] - model(inputs) - - -class MaskRCNNE2ETest(ModelE2ETest, unittest.TestCase): - CONFIG_PATH = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml" - - def test_half_empty_data(self): - instances = [get_empty_instance(200, 250), get_regular_bitmask_instances(200, 249)] - self._test_train([(200, 250), (200, 249)], instances) - - # This test is flaky because in some environment the output features are zero due to relu - # def test_rpn_inf_nan_data(self): - # self.model.eval() - # for tensor in [self._inf_tensor, self._nan_tensor]: - # images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - # features = { - # "p2": tensor(1, 256, 256, 256), - # "p3": tensor(1, 256, 128, 128), - # "p4": tensor(1, 256, 64, 64), - # "p5": tensor(1, 256, 32, 32), - # "p6": tensor(1, 256, 16, 16), - # } - # props, _ = self.model.proposal_generator(images, features) - # self.assertEqual(len(props[0]), 0) - - def test_roiheads_inf_nan_data(self): - self.model.eval() - for tensor in [self._inf_tensor, self._nan_tensor]: - images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - features = { - "p2": tensor(1, 256, 256, 256), - "p3": tensor(1, 256, 128, 128), - "p4": tensor(1, 256, 64, 64), - "p5": tensor(1, 256, 32, 32), - "p6": tensor(1, 256, 16, 16), - } - props = [Instances((510, 510))] - props[0].proposal_boxes = Boxes([[10, 10, 20, 20]]).to(device=self.model.device) - props[0].objectness_logits = torch.tensor([1.0]).reshape(1, 1) - det, _ = self.model.roi_heads(images, features, props) - self.assertEqual(len(det[0]), 0) - - -class RetinaNetE2ETest(ModelE2ETest, unittest.TestCase): - CONFIG_PATH = "COCO-Detection/retinanet_R_50_FPN_1x.yaml" - - def test_inf_nan_data(self): - self.model.eval() - self.model.score_threshold = -999999999 - for tensor in [self._inf_tensor, self._nan_tensor]: - images = ImageList(tensor(1, 3, 512, 512), [(510, 510)]) - features = [ - tensor(1, 256, 128, 128), - tensor(1, 256, 64, 64), - tensor(1, 256, 32, 32), - tensor(1, 256, 16, 16), - tensor(1, 256, 8, 8), - ] - anchors = self.model.anchor_generator(features) - box_cls, box_delta = self.model.head(features) - box_cls = [tensor(*k.shape) for k in box_cls] - box_delta = [tensor(*k.shape) for k in box_delta] - det = self.model.inference(box_cls, box_delta, anchors, images.image_sizes) - # all predictions (if any) are infinite or nan - if len(det[0]): - self.assertTrue(torch.isfinite(det[0].pred_boxes.tensor).sum() == 0) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py deleted file mode 100644 index 5a06303..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_heads.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.config import get_cfg -from detectron2.modeling.backbone import build_backbone -from detectron2.modeling.proposal_generator.build import build_proposal_generator -from detectron2.modeling.roi_heads import build_roi_heads -from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage - -logger = logging.getLogger(__name__) - - -class ROIHeadsTest(unittest.TestCase): - def test_roi_heads(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.ROI_HEADS.NAME = "StandardROIHeads" - cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" - cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 - cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" - cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) - backbone = build_backbone(cfg) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - - image_shape = (15, 15) - gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - gt_instance0 = Instances(image_shape) - gt_instance0.gt_boxes = Boxes(gt_boxes0) - gt_instance0.gt_classes = torch.tensor([2, 1]) - gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) - gt_instance1 = Instances(image_shape) - gt_instance1.gt_boxes = Boxes(gt_boxes1) - gt_instance1.gt_classes = torch.tensor([1, 2]) - gt_instances = [gt_instance0, gt_instance1] - - proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) - roi_heads = build_roi_heads(cfg, backbone.output_shape()) - - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator(images, features, gt_instances) - _, detector_losses = roi_heads(images, features, proposals, gt_instances) - - expected_losses = { - "loss_cls": torch.tensor(4.4236516953), - "loss_box_reg": torch.tensor(0.0091214813), - } - for name in expected_losses.keys(): - self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name])) - - def test_rroi_heads(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" - cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" - cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" - cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" - cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) - cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" - cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" - cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) - backbone = build_backbone(cfg) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - - image_shape = (15, 15) - gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) - gt_instance0 = Instances(image_shape) - gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) - gt_instance0.gt_classes = torch.tensor([2, 1]) - gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) - gt_instance1 = Instances(image_shape) - gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) - gt_instance1.gt_classes = torch.tensor([1, 2]) - gt_instances = [gt_instance0, gt_instance1] - - proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) - roi_heads = build_roi_heads(cfg, backbone.output_shape()) - - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator(images, features, gt_instances) - _, detector_losses = roi_heads(images, features, proposals, gt_instances) - - expected_losses = { - "loss_cls": torch.tensor(4.381618499755859), - "loss_box_reg": torch.tensor(0.0011829272843897343), - } - for name in expected_losses.keys(): - err_msg = "detector_losses[{}] = {}, expected losses = {}".format( - name, detector_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(detector_losses[name], expected_losses[name]), err_msg) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py deleted file mode 100644 index 9aa3825..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_roi_pooler.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.modeling.poolers import ROIPooler -from detectron2.structures import Boxes, RotatedBoxes - -logger = logging.getLogger(__name__) - - -class TestROIPooler(unittest.TestCase): - def _rand_boxes(self, num_boxes, x_max, y_max): - coords = torch.rand(num_boxes, 4) - coords[:, 0] *= x_max - coords[:, 1] *= y_max - coords[:, 2] *= x_max - coords[:, 3] *= y_max - boxes = torch.zeros(num_boxes, 4) - boxes[:, 0] = torch.min(coords[:, 0], coords[:, 2]) - boxes[:, 1] = torch.min(coords[:, 1], coords[:, 3]) - boxes[:, 2] = torch.max(coords[:, 0], coords[:, 2]) - boxes[:, 3] = torch.max(coords[:, 1], coords[:, 3]) - return boxes - - def _test_roialignv2_roialignrotated_match(self, device): - pooler_resolution = 14 - canonical_level = 4 - canonical_scale_factor = 2 ** canonical_level - pooler_scales = (1.0 / canonical_scale_factor,) - sampling_ratio = 0 - - N, C, H, W = 2, 4, 10, 8 - N_rois = 10 - std = 11 - mean = 0 - feature = (torch.rand(N, C, H, W) - 0.5) * 2 * std + mean - - features = [feature.to(device)] - - rois = [] - rois_rotated = [] - for _ in range(N): - boxes = self._rand_boxes( - num_boxes=N_rois, x_max=W * canonical_scale_factor, y_max=H * canonical_scale_factor - ) - - rotated_boxes = torch.zeros(N_rois, 5) - rotated_boxes[:, 0] = (boxes[:, 0] + boxes[:, 2]) / 2.0 - rotated_boxes[:, 1] = (boxes[:, 1] + boxes[:, 3]) / 2.0 - rotated_boxes[:, 2] = boxes[:, 2] - boxes[:, 0] - rotated_boxes[:, 3] = boxes[:, 3] - boxes[:, 1] - rois.append(Boxes(boxes).to(device)) - rois_rotated.append(RotatedBoxes(rotated_boxes).to(device)) - - roialignv2_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlignV2", - ) - - roialignv2_out = roialignv2_pooler(features, rois) - - roialignrotated_pooler = ROIPooler( - output_size=pooler_resolution, - scales=pooler_scales, - sampling_ratio=sampling_ratio, - pooler_type="ROIAlignRotated", - ) - - roialignrotated_out = roialignrotated_pooler(features, rois_rotated) - - self.assertTrue(torch.allclose(roialignv2_out, roialignrotated_out, atol=1e-4)) - - def test_roialignv2_roialignrotated_match_cpu(self): - self._test_roialignv2_roialignrotated_match(device="cpu") - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_roialignv2_roialignrotated_match_cuda(self): - self._test_roialignv2_roialignrotated_match(device="cuda") - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py deleted file mode 100644 index 967d210..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/modeling/test_rpn.py +++ /dev/null @@ -1,234 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest -import torch - -from detectron2.config import get_cfg -from detectron2.modeling.backbone import build_backbone -from detectron2.modeling.proposal_generator.build import build_proposal_generator -from detectron2.modeling.proposal_generator.rpn_outputs import find_top_rpn_proposals -from detectron2.structures import Boxes, ImageList, Instances, RotatedBoxes -from detectron2.utils.events import EventStorage - -logger = logging.getLogger(__name__) - - -class RPNTest(unittest.TestCase): - def test_rpn(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" - cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1) - backbone = build_backbone(cfg) - proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - image_shape = (15, 15) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) - gt_instances = Instances(image_shape) - gt_instances.gt_boxes = Boxes(gt_boxes) - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator( - images, features, [gt_instances[0], gt_instances[1]] - ) - - expected_losses = { - "loss_rpn_cls": torch.tensor(0.0804563984), - "loss_rpn_loc": torch.tensor(0.0990132466), - } - for name in expected_losses.keys(): - err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( - name, proposal_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) - - expected_proposal_boxes = [ - Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])), - Boxes( - torch.tensor( - [ - [0, 0, 30, 20], - [0, 0, 16.7862777710, 13.1362524033], - [0, 0, 30, 13.3173446655], - [0, 0, 10.8602609634, 20], - [7.7165775299, 0, 27.3875980377, 20], - ] - ) - ), - ] - - expected_objectness_logits = [ - torch.tensor([0.1225359365, -0.0133192837]), - torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]), - ] - - for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( - proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits - ): - self.assertEqual(len(proposal), len(expected_proposal_box)) - self.assertEqual(proposal.image_size, im_size) - self.assertTrue( - torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor) - ) - self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit)) - - def test_rrpn(self): - torch.manual_seed(121) - cfg = get_cfg() - cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" - cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" - cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64]] - cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 1]] - cfg.MODEL.ANCHOR_GENERATOR.ANGLES = [[0, 60]] - cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) - cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" - backbone = build_backbone(cfg) - proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) - num_images = 2 - images_tensor = torch.rand(num_images, 20, 30) - image_sizes = [(10, 10), (20, 30)] - images = ImageList(images_tensor, image_sizes) - image_shape = (15, 15) - num_channels = 1024 - features = {"res4": torch.rand(num_images, num_channels, 1, 2)} - gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) - gt_instances = Instances(image_shape) - gt_instances.gt_boxes = RotatedBoxes(gt_boxes) - with EventStorage(): # capture events in a new storage to discard them - proposals, proposal_losses = proposal_generator( - images, features, [gt_instances[0], gt_instances[1]] - ) - - expected_losses = { - "loss_rpn_cls": torch.tensor(0.043263837695121765), - "loss_rpn_loc": torch.tensor(0.14432406425476074), - } - for name in expected_losses.keys(): - err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( - name, proposal_losses[name], expected_losses[name] - ) - self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) - - expected_proposal_boxes = [ - RotatedBoxes( - torch.tensor( - [ - [0.60189795, 1.24095452, 61.98131943, 18.03621292, -4.07244873], - [15.64940453, 1.69624567, 59.59749603, 16.34339333, 2.62692475], - [-3.02982378, -2.69752932, 67.90952301, 59.62455750, 59.97010040], - [16.71863365, 1.98309708, 35.61507797, 32.81484985, 62.92267227], - [0.49432933, -7.92979717, 67.77606201, 62.93098450, -1.85656738], - [8.00880814, 1.36017394, 121.81007385, 32.74150467, 50.44297409], - [16.44299889, -4.82221127, 63.39775848, 61.22503662, 54.12270737], - [5.00000000, 5.00000000, 10.00000000, 10.00000000, -0.76943970], - [17.64130402, -0.98095351, 61.40377808, 16.28918839, 55.53118134], - [0.13016054, 4.60568953, 35.80157471, 32.30180359, 62.52872086], - [-4.26460743, 0.39604485, 124.30079651, 31.84611320, -1.58203125], - [7.52815342, -0.91636634, 62.39784622, 15.45565224, 60.79549789], - ] - ) - ), - RotatedBoxes( - torch.tensor( - [ - [0.07734215, 0.81635046, 65.33510590, 17.34688377, -1.51821899], - [-3.41833067, -3.11320257, 64.17595673, 60.55617905, 58.27033234], - [20.67383385, -6.16561556, 63.60531998, 62.52315903, 54.85546494], - [15.00000000, 10.00000000, 30.00000000, 20.00000000, -0.18218994], - [9.22646523, -6.84775209, 62.09895706, 65.46472931, -2.74307251], - [15.00000000, 4.93451595, 30.00000000, 9.86903191, -0.60272217], - [8.88342094, 2.65560246, 120.95362854, 32.45022202, 55.75970078], - [16.39088631, 2.33887148, 34.78761292, 35.61492920, 60.81977463], - [9.78298569, 10.00000000, 19.56597137, 20.00000000, -0.86660767], - [1.28576660, 5.49873352, 34.93610382, 33.22600174, 60.51599884], - [17.58912468, -1.63270092, 62.96052551, 16.45713997, 52.91245270], - [5.64749718, -1.90428460, 62.37649155, 16.19474792, 61.09543991], - [0.82255805, 2.34931135, 118.83985901, 32.83671188, 56.50753784], - [-5.33874989, 1.64404404, 125.28501892, 33.35424042, -2.80731201], - ] - ) - ), - ] - - expected_objectness_logits = [ - torch.tensor( - [ - 0.10111768, - 0.09112845, - 0.08466332, - 0.07589971, - 0.06650183, - 0.06350251, - 0.04299347, - 0.01864817, - 0.00986163, - 0.00078543, - -0.04573630, - -0.04799230, - ] - ), - torch.tensor( - [ - 0.11373727, - 0.09377633, - 0.05281663, - 0.05143715, - 0.04040275, - 0.03250912, - 0.01307789, - 0.01177734, - 0.00038105, - -0.00540255, - -0.01194804, - -0.01461012, - -0.03061717, - -0.03599222, - ] - ), - ] - - torch.set_printoptions(precision=8, sci_mode=False) - - for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( - proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits - ): - self.assertEqual(len(proposal), len(expected_proposal_box)) - self.assertEqual(proposal.image_size, im_size) - # It seems that there's some randomness in the result across different machines: - # This test can be run on a local machine for 100 times with exactly the same result, - # However, a different machine might produce slightly different results, - # thus the atol here. - err_msg = "computed proposal boxes = {}, expected {}".format( - proposal.proposal_boxes.tensor, expected_proposal_box.tensor - ) - self.assertTrue( - torch.allclose( - proposal.proposal_boxes.tensor, expected_proposal_box.tensor, atol=1e-5 - ), - err_msg, - ) - - err_msg = "computed objectness logits = {}, expected {}".format( - proposal.objectness_logits, expected_objectness_logit - ) - self.assertTrue( - torch.allclose(proposal.objectness_logits, expected_objectness_logit, atol=1e-5), - err_msg, - ) - - def test_rpn_proposals_inf(self): - N, Hi, Wi, A = 3, 3, 3, 3 - proposals = [torch.rand(N, Hi * Wi * A, 4)] - pred_logits = [torch.rand(N, Hi * Wi * A)] - pred_logits[0][1][3:5].fill_(float("inf")) - images = ImageList.from_tensors([torch.rand(3, 10, 10)] * 3) - find_top_rpn_proposals(proposals, pred_logits, images, 0.5, 1000, 1000, 0, False) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/__init__.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py deleted file mode 100644 index 4d33c3b..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_boxes.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import json -import math -import numpy as np -import unittest -import torch - -from detectron2.structures import Boxes, BoxMode, pairwise_iou - - -class TestBoxMode(unittest.TestCase): - def _convert_xy_to_wh(self, x): - return BoxMode.convert(x, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) - - def _convert_xywha_to_xyxy(self, x): - return BoxMode.convert(x, BoxMode.XYWHA_ABS, BoxMode.XYXY_ABS) - - def _convert_xywh_to_xywha(self, x): - return BoxMode.convert(x, BoxMode.XYWH_ABS, BoxMode.XYWHA_ABS) - - def test_box_convert_list(self): - for tp in [list, tuple]: - box = tp([5.0, 5.0, 10.0, 10.0]) - output = self._convert_xy_to_wh(box) - self.assertIsInstance(output, tp) - self.assertIsInstance(output[0], float) - self.assertEqual(output, tp([5.0, 5.0, 5.0, 5.0])) - - with self.assertRaises(Exception): - self._convert_xy_to_wh([box]) - - def test_box_convert_array(self): - box = np.asarray([[5, 5, 10, 10], [1, 1, 2, 3]]) - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - def test_box_convert_cpu_tensor(self): - box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]) - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - output = output.numpy() - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_box_convert_cuda_tensor(self): - box = torch.tensor([[5, 5, 10, 10], [1, 1, 2, 3]]).cuda() - output = self._convert_xy_to_wh(box) - self.assertEqual(output.dtype, box.dtype) - self.assertEqual(output.shape, box.shape) - self.assertEqual(output.device, box.device) - output = output.cpu().numpy() - self.assertTrue((output[0] == [5, 5, 5, 5]).all()) - self.assertTrue((output[1] == [1, 1, 1, 2]).all()) - - def test_box_convert_xywha_to_xyxy_list(self): - for tp in [list, tuple]: - box = tp([50, 50, 30, 20, 0]) - output = self._convert_xywha_to_xyxy(box) - self.assertIsInstance(output, tp) - self.assertEqual(output, tp([35, 40, 65, 60])) - - with self.assertRaises(Exception): - self._convert_xywha_to_xyxy([box]) - - def test_box_convert_xywha_to_xyxy_array(self): - for dtype in [np.float64, np.float32]: - box = np.asarray( - [ - [50, 50, 30, 20, 0], - [50, 50, 30, 20, 90], - [1, 1, math.sqrt(2), math.sqrt(2), -45], - ], - dtype=dtype, - ) - output = self._convert_xywha_to_xyxy(box) - self.assertEqual(output.dtype, box.dtype) - expected = np.asarray([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) - self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywha_to_xyxy_tensor(self): - for dtype in [torch.float32, torch.float64]: - box = torch.tensor( - [ - [50, 50, 30, 20, 0], - [50, 50, 30, 20, 90], - [1, 1, math.sqrt(2), math.sqrt(2), -45], - ], - dtype=dtype, - ) - output = self._convert_xywha_to_xyxy(box) - self.assertEqual(output.dtype, box.dtype) - expected = torch.tensor([[35, 40, 65, 60], [40, 35, 60, 65], [0, 0, 2, 2]], dtype=dtype) - - self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywh_to_xywha_list(self): - for tp in [list, tuple]: - box = tp([50, 50, 30, 20]) - output = self._convert_xywh_to_xywha(box) - self.assertIsInstance(output, tp) - self.assertEqual(output, tp([65, 60, 30, 20, 0])) - - with self.assertRaises(Exception): - self._convert_xywh_to_xywha([box]) - - def test_box_convert_xywh_to_xywha_array(self): - for dtype in [np.float64, np.float32]: - box = np.asarray([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) - output = self._convert_xywh_to_xywha(box) - self.assertEqual(output.dtype, box.dtype) - expected = np.asarray( - [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype - ) - self.assertTrue(np.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_box_convert_xywh_to_xywha_tensor(self): - for dtype in [torch.float32, torch.float64]: - box = torch.tensor([[30, 40, 70, 60], [30, 40, 60, 70], [-1, -1, 2, 2]], dtype=dtype) - output = self._convert_xywh_to_xywha(box) - self.assertEqual(output.dtype, box.dtype) - expected = torch.tensor( - [[65, 70, 70, 60, 0], [60, 75, 60, 70, 0], [0, 0, 2, 2, 0]], dtype=dtype - ) - - self.assertTrue(torch.allclose(output, expected, atol=1e-6), "output={}".format(output)) - - def test_json_serializable(self): - payload = {"box_mode": BoxMode.XYWH_REL} - try: - json.dumps(payload) - except Exception: - self.fail("JSON serialization failed") - - def test_json_deserializable(self): - payload = '{"box_mode": 2}' - obj = json.loads(payload) - try: - obj["box_mode"] = BoxMode(obj["box_mode"]) - except Exception: - self.fail("JSON deserialization failed") - - -class TestBoxIOU(unittest.TestCase): - def test_pairwise_iou(self): - boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) - - boxes2 = torch.tensor( - [ - [0.0, 0.0, 1.0, 1.0], - [0.0, 0.0, 0.5, 1.0], - [0.0, 0.0, 1.0, 0.5], - [0.0, 0.0, 0.5, 0.5], - [0.5, 0.5, 1.0, 1.0], - [0.5, 0.5, 1.5, 1.5], - ] - ) - - expected_ious = torch.tensor( - [ - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - ] - ) - - ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) - - self.assertTrue(torch.allclose(ious, expected_ious)) - - -class TestBoxes(unittest.TestCase): - def test_empty_cat(self): - x = Boxes.cat([]) - self.assertTrue(x.tensor.shape, (0, 4)) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py deleted file mode 100644 index abeb355..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_imagelist.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import unittest -from typing import Sequence -import torch - -from detectron2.structures import ImageList - - -class TestImageList(unittest.TestCase): - def test_imagelist_padding_shape(self): - class TensorToImageList(torch.nn.Module): - def forward(self, tensors: Sequence[torch.Tensor]): - return ImageList.from_tensors(tensors, 4).tensor - - func = torch.jit.trace( - TensorToImageList(), ([torch.ones((3, 10, 10), dtype=torch.float32)],) - ) - ret = func([torch.ones((3, 15, 20), dtype=torch.float32)]) - self.assertEqual(list(ret.shape), [1, 3, 16, 20], str(ret.shape)) - - func = torch.jit.trace( - TensorToImageList(), - ( - [ - torch.ones((3, 16, 10), dtype=torch.float32), - torch.ones((3, 13, 11), dtype=torch.float32), - ], - ), - ) - ret = func( - [ - torch.ones((3, 25, 20), dtype=torch.float32), - torch.ones((3, 10, 10), dtype=torch.float32), - ] - ) - # does not support calling with different #images - self.assertEqual(list(ret.shape), [2, 3, 28, 20], str(ret.shape)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py deleted file mode 100644 index 79c5249..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_instances.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest -import torch - -from detectron2.structures import Instances - - -class TestInstancesIndexing(unittest.TestCase): - def test_int_indexing(self): - attr1 = torch.tensor([[0.0, 0.0, 1.0], [0.0, 0.0, 0.5], [0.0, 0.0, 1.0], [0.0, 0.5, 0.5]]) - attr2 = torch.tensor([0.1, 0.2, 0.3, 0.4]) - instances = Instances((100, 100)) - instances.attr1 = attr1 - instances.attr2 = attr2 - for i in range(-len(instances), len(instances)): - inst = instances[i] - self.assertEqual((inst.attr1 == attr1[i]).all(), True) - self.assertEqual((inst.attr2 == attr2[i]).all(), True) - - self.assertRaises(IndexError, lambda: instances[len(instances)]) - self.assertRaises(IndexError, lambda: instances[-len(instances) - 1]) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py deleted file mode 100644 index 575ac48..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/structures/test_rotated_boxes.py +++ /dev/null @@ -1,357 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from __future__ import absolute_import, division, print_function, unicode_literals -import logging -import math -import random -import unittest -import torch -from fvcore.common.benchmark import benchmark - -from detectron2.layers.rotated_boxes import pairwise_iou_rotated -from detectron2.structures.boxes import Boxes -from detectron2.structures.rotated_boxes import RotatedBoxes, pairwise_iou - -logger = logging.getLogger(__name__) - - -class TestRotatedBoxesLayer(unittest.TestCase): - def test_iou_0_dim_cpu(self): - boxes1 = torch.rand(0, 5, dtype=torch.float32) - boxes2 = torch.rand(10, 5, dtype=torch.float32) - expected_ious = torch.zeros(0, 10, dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - boxes1 = torch.rand(10, 5, dtype=torch.float32) - boxes2 = torch.rand(0, 5, dtype=torch.float32) - expected_ious = torch.zeros(10, 0, dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_0_dim_cuda(self): - boxes1 = torch.rand(0, 5, dtype=torch.float32) - boxes2 = torch.rand(10, 5, dtype=torch.float32) - expected_ious = torch.zeros(0, 10, dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - boxes1 = torch.rand(10, 5, dtype=torch.float32) - boxes2 = torch.rand(0, 5, dtype=torch.float32) - expected_ious = torch.zeros(10, 0, dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - def test_iou_half_overlap_cpu(self): - boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) - boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) - expected_ious = torch.tensor([[0.5]], dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious, expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_half_overlap_cuda(self): - boxes1 = torch.tensor([[0.5, 0.5, 1.0, 1.0, 0.0]], dtype=torch.float32) - boxes2 = torch.tensor([[0.25, 0.5, 0.5, 1.0, 0.0]], dtype=torch.float32) - expected_ious = torch.tensor([[0.5]], dtype=torch.float32) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTrue(torch.allclose(ious_cuda.cpu(), expected_ious)) - - def test_iou_precision(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor([[565, 565, 10, 10.0, 0]], dtype=torch.float32, device=device) - boxes2 = torch.tensor([[565, 565, 10, 8.3, 0]], dtype=torch.float32, device=device) - iou = 8.3 / 10.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(torch.allclose(ious.cpu(), expected_ious)) - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def test_iou_too_many_boxes_cuda(self): - s1, s2 = 5, 1289035 - boxes1 = torch.zeros(s1, 5) - boxes2 = torch.zeros(s2, 5) - ious_cuda = pairwise_iou_rotated(boxes1.cuda(), boxes2.cuda()) - self.assertTupleEqual(tuple(ious_cuda.shape), (s1, s2)) - - def test_iou_extreme(self): - # Cause floating point issues in cuda kernels (#1266) - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) - boxes2 = torch.tensor( - [ - [ - -1.117407639806935e17, - 1.3858420478349148e18, - 1000.0000610351562, - 1000.0000610351562, - 1612.0, - ] - ], - device=device, - ) - ious = pairwise_iou_rotated(boxes1, boxes2) - self.assertTrue(ious.min() >= 0, ious) - - -class TestRotatedBoxesStructure(unittest.TestCase): - def test_clip_area_0_degree(self): - for _ in range(50): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - # Convert from (x_ctr, y_ctr, w, h, 0) to (x1, y1, x2, y2) - boxes_4d = torch.zeros(num_boxes, 4) - boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0 - boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0 - boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0 - boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0 - - image_size = (500, 600) - test_boxes_4d = Boxes(boxes_4d) - test_boxes_5d = RotatedBoxes(boxes_5d) - # Before clip - areas_4d = test_boxes_4d.area() - areas_5d = test_boxes_5d.area() - self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) - # After clip - test_boxes_4d.clip(image_size) - test_boxes_5d.clip(image_size) - areas_4d = test_boxes_4d.area() - areas_5d = test_boxes_5d.area() - self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5)) - - def test_clip_area_arbitrary_angle(self): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - clip_angle_threshold = random.uniform(0, 180) - - image_size = (500, 600) - test_boxes_5d = RotatedBoxes(boxes_5d) - # Before clip - areas_before = test_boxes_5d.area() - # After clip - test_boxes_5d.clip(image_size, clip_angle_threshold) - areas_diff = test_boxes_5d.area() - areas_before - - # the areas should only decrease after clipping - self.assertTrue(torch.all(areas_diff <= 0)) - # whenever the box is clipped (thus the area shrinks), - # the angle for the box must be within the clip_angle_threshold - # Note that the clip function will normalize the angle range - # to be within (-180, 180] - self.assertTrue( - torch.all(torch.abs(boxes_5d[:, 4][torch.where(areas_diff < 0)]) < clip_angle_threshold) - ) - - def test_normalize_angles(self): - # torch.manual_seed(0) - for _ in range(50): - num_boxes = 100 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - rotated_boxes = RotatedBoxes(boxes_5d) - normalized_boxes = rotated_boxes.clone() - normalized_boxes.normalize_angles() - self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] >= -180)) - self.assertTrue(torch.all(normalized_boxes.tensor[:, 4] < 180)) - # x, y, w, h should not change - self.assertTrue(torch.allclose(boxes_5d[:, :4], normalized_boxes.tensor[:, :4])) - # the cos/sin values of the angles should stay the same - - self.assertTrue( - torch.allclose( - torch.cos(boxes_5d[:, 4] * math.pi / 180), - torch.cos(normalized_boxes.tensor[:, 4] * math.pi / 180), - atol=1e-5, - ) - ) - - self.assertTrue( - torch.allclose( - torch.sin(boxes_5d[:, 4] * math.pi / 180), - torch.sin(normalized_boxes.tensor[:, 4] * math.pi / 180), - atol=1e-5, - ) - ) - - def test_pairwise_iou_0_degree(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor( - [[0.5, 0.5, 1.0, 1.0, 0.0], [0.5, 0.5, 1.0, 1.0, 0.0]], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor( - [ - [0.5, 0.5, 1.0, 1.0, 0.0], - [0.25, 0.5, 0.5, 1.0, 0.0], - [0.5, 0.25, 1.0, 0.5, 0.0], - [0.25, 0.25, 0.5, 0.5, 0.0], - [0.75, 0.75, 0.5, 0.5, 0.0], - [1.0, 1.0, 1.0, 1.0, 0.0], - ], - dtype=torch.float32, - device=device, - ) - expected_ious = torch.tensor( - [ - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], - ], - dtype=torch.float32, - device=device, - ) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_45_degrees(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor( - [ - [1, 1, math.sqrt(2), math.sqrt(2), 45], - [1, 1, 2 * math.sqrt(2), 2 * math.sqrt(2), -45], - ], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor([[1, 1, 2, 2, 0]], dtype=torch.float32, device=device) - expected_ious = torch.tensor([[0.5], [0.5]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_orthogonal(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor([[5, 5, 10, 6, 55]], dtype=torch.float32, device=device) - boxes2 = torch.tensor([[5, 5, 10, 6, -35]], dtype=torch.float32, device=device) - iou = (6.0 * 6.0) / (6.0 * 6.0 + 4.0 * 6.0 + 4.0 * 6.0) - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_large_close_boxes(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - boxes1 = torch.tensor( - [[299.500000, 417.370422, 600.000000, 364.259186, 27.1828]], - dtype=torch.float32, - device=device, - ) - boxes2 = torch.tensor( - [[299.500000, 417.370422, 600.000000, 364.259155, 27.1828]], - dtype=torch.float32, - device=device, - ) - iou = 364.259155 / 364.259186 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_many_boxes(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - num_boxes1 = 100 - num_boxes2 = 200 - boxes1 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32, device=device - ) - for i in range(num_boxes1) - ] - ) - boxes2 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], - dtype=torch.float32, - device=device, - ) - for i in range(num_boxes2) - ] - ) - expected_ious = torch.zeros(num_boxes1, num_boxes2, dtype=torch.float32, device=device) - for i in range(min(num_boxes1, num_boxes2)): - expected_ious[i][i] = (1 + 9 * i / num_boxes2) / 10.0 - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_issue1207_simplified(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - # Simplified test case of D2-issue-1207 - boxes1 = torch.tensor([[3, 3, 8, 2, -45.0]], device=device) - boxes2 = torch.tensor([[6, 0, 8, 2, -45.0]], device=device) - iou = 0.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_pairwise_iou_issue1207(self): - for device in ["cpu"] + ["cuda"] if torch.cuda.is_available() else []: - # The original test case in D2-issue-1207 - boxes1 = torch.tensor([[160.0, 153.0, 230.0, 23.0, -37.0]], device=device) - boxes2 = torch.tensor([[190.0, 127.0, 80.0, 21.0, -46.0]], device=device) - - iou = 0.0 - expected_ious = torch.tensor([[iou]], dtype=torch.float32, device=device) - - ious = pairwise_iou(RotatedBoxes(boxes1), RotatedBoxes(boxes2)) - self.assertTrue(torch.allclose(ious, expected_ious)) - - def test_empty_cat(self): - x = RotatedBoxes.cat([]) - self.assertTrue(x.tensor.shape, (0, 5)) - - -def benchmark_rotated_iou(): - num_boxes1 = 200 - num_boxes2 = 500 - boxes1 = torch.stack( - [ - torch.tensor([5 + 20 * i, 5 + 20 * i, 10, 10, 0], dtype=torch.float32) - for i in range(num_boxes1) - ] - ) - boxes2 = torch.stack( - [ - torch.tensor( - [5 + 20 * i, 5 + 20 * i, 10, 1 + 9 * i / num_boxes2, 0], dtype=torch.float32 - ) - for i in range(num_boxes2) - ] - ) - - def func(dev, n=1): - b1 = boxes1.to(device=dev) - b2 = boxes2.to(device=dev) - - def bench(): - for _ in range(n): - pairwise_iou_rotated(b1, b2) - if dev.type == "cuda": - torch.cuda.synchronize() - - return bench - - # only run it once per timed loop, since it's slow - args = [{"dev": torch.device("cpu"), "n": 1}] - if torch.cuda.is_available(): - args.append({"dev": torch.device("cuda"), "n": 10}) - - benchmark(func, "rotated_iou", args, warmup_iters=3) - - -if __name__ == "__main__": - unittest.main() - benchmark_rotated_iou() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py deleted file mode 100644 index 725b488..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_checkpoint.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest -from collections import OrderedDict -import torch -from torch import nn - -from detectron2.checkpoint.c2_model_loading import align_and_update_state_dicts -from detectron2.utils.logger import setup_logger - - -class TestCheckpointer(unittest.TestCase): - def setUp(self): - setup_logger() - - def create_complex_model(self): - m = nn.Module() - m.block1 = nn.Module() - m.block1.layer1 = nn.Linear(2, 3) - m.layer2 = nn.Linear(3, 2) - m.res = nn.Module() - m.res.layer2 = nn.Linear(3, 2) - - state_dict = OrderedDict() - state_dict["layer1.weight"] = torch.rand(3, 2) - state_dict["layer1.bias"] = torch.rand(3) - state_dict["layer2.weight"] = torch.rand(2, 3) - state_dict["layer2.bias"] = torch.rand(2) - state_dict["res.layer2.weight"] = torch.rand(2, 3) - state_dict["res.layer2.bias"] = torch.rand(2) - return m, state_dict - - def test_complex_model_loaded(self): - for add_data_parallel in [False, True]: - model, state_dict = self.create_complex_model() - if add_data_parallel: - model = nn.DataParallel(model) - model_sd = model.state_dict() - - align_and_update_state_dicts(model_sd, state_dict) - for loaded, stored in zip(model_sd.values(), state_dict.values()): - # different tensor references - self.assertFalse(id(loaded) == id(stored)) - # same content - self.assertTrue(loaded.equal(stored)) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py deleted file mode 100644 index 650bdf2..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_config.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import os -import tempfile -import unittest -import torch - -from detectron2.config import configurable, downgrade_config, get_cfg, upgrade_config -from detectron2.layers import ShapeSpec - -_V0_CFG = """ -MODEL: - RPN_HEAD: - NAME: "TEST" -VERSION: 0 -""" - -_V1_CFG = """ -MODEL: - WEIGHT: "/path/to/weight" -""" - - -class TestConfigVersioning(unittest.TestCase): - def test_upgrade_downgrade_consistency(self): - cfg = get_cfg() - # check that custom is preserved - cfg.USER_CUSTOM = 1 - - down = downgrade_config(cfg, to_version=0) - up = upgrade_config(down) - self.assertTrue(up == cfg) - - def _merge_cfg_str(self, cfg, merge_str): - f = tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) - try: - f.write(merge_str) - f.close() - cfg.merge_from_file(f.name) - finally: - os.remove(f.name) - return cfg - - def test_auto_upgrade(self): - cfg = get_cfg() - latest_ver = cfg.VERSION - cfg.USER_CUSTOM = 1 - - self._merge_cfg_str(cfg, _V0_CFG) - - self.assertEqual(cfg.MODEL.RPN.HEAD_NAME, "TEST") - self.assertEqual(cfg.VERSION, latest_ver) - - def test_guess_v1(self): - cfg = get_cfg() - latest_ver = cfg.VERSION - self._merge_cfg_str(cfg, _V1_CFG) - self.assertEqual(cfg.VERSION, latest_ver) - - -class _TestClassA(torch.nn.Module): - @configurable - def __init__(self, arg1, arg2, arg3=3): - super().__init__() - self.arg1 = arg1 - self.arg2 = arg2 - self.arg3 = arg3 - assert arg1 == 1 - assert arg2 == 2 - assert arg3 == 3 - - @classmethod - def from_config(cls, cfg): - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - return args - - -class _TestClassB(_TestClassA): - @configurable - def __init__(self, input_shape, arg1, arg2, arg3=3): - """ - Doc of _TestClassB - """ - assert input_shape == "shape" - super().__init__(arg1, arg2, arg3) - - @classmethod - def from_config(cls, cfg, input_shape): # test extra positional arg in from_config - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - args["input_shape"] = input_shape - return args - - -class _LegacySubClass(_TestClassB): - # an old subclass written in cfg style - def __init__(self, cfg, input_shape, arg4=4): - super().__init__(cfg, input_shape) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _NewSubClassNewInit(_TestClassB): - # test new subclass with a new __init__ - @configurable - def __init__(self, input_shape, arg4=4, **kwargs): - super().__init__(input_shape, **kwargs) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _LegacySubClassNotCfg(_TestClassB): - # an old subclass written in cfg style, but argument is not called "cfg" - def __init__(self, config, input_shape): - super().__init__(config, input_shape) - assert self.arg1 == 1 - assert self.arg2 == 2 - assert self.arg3 == 3 - - -class _TestClassC(_TestClassB): - @classmethod - def from_config(cls, cfg, input_shape, **kwargs): # test extra kwarg overwrite - args = {"arg1": cfg.ARG1, "arg2": cfg.ARG2} - args["input_shape"] = input_shape - args.update(kwargs) - return args - - -class _TestClassD(_TestClassA): - @configurable - def __init__(self, input_shape: ShapeSpec, arg1: int, arg2, arg3=3): - assert input_shape == "shape" - super().__init__(arg1, arg2, arg3) - - # _TestClassA.from_config does not have input_shape args. - # Test whether input_shape will be forwarded to __init__ - - -class TestConfigurable(unittest.TestCase): - def testInitWithArgs(self): - _ = _TestClassA(arg1=1, arg2=2, arg3=3) - _ = _TestClassB("shape", arg1=1, arg2=2) - _ = _TestClassC("shape", arg1=1, arg2=2) - _ = _TestClassD("shape", arg1=1, arg2=2, arg3=3) - - def testPatchedAttr(self): - self.assertTrue("Doc" in _TestClassB.__init__.__doc__) - self.assertEqual(_TestClassD.__init__.__annotations__["arg1"], int) - - def testInitWithCfg(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 2 - cfg.ARG3 = 3 - _ = _TestClassA(cfg) - _ = _TestClassB(cfg, input_shape="shape") - _ = _TestClassC(cfg, input_shape="shape") - _ = _TestClassD(cfg, input_shape="shape") - _ = _LegacySubClass(cfg, input_shape="shape") - _ = _NewSubClassNewInit(cfg, input_shape="shape") - _ = _LegacySubClassNotCfg(cfg, input_shape="shape") - with self.assertRaises(TypeError): - # disallow forwarding positional args to __init__ since it's prone to errors - _ = _TestClassD(cfg, "shape") - - # call with kwargs instead - _ = _TestClassA(cfg=cfg) - _ = _TestClassB(cfg=cfg, input_shape="shape") - _ = _TestClassC(cfg=cfg, input_shape="shape") - _ = _TestClassD(cfg=cfg, input_shape="shape") - _ = _LegacySubClass(cfg=cfg, input_shape="shape") - _ = _NewSubClassNewInit(cfg=cfg, input_shape="shape") - _ = _LegacySubClassNotCfg(config=cfg, input_shape="shape") - - def testInitWithCfgOverwrite(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 999 # wrong config - with self.assertRaises(AssertionError): - _ = _TestClassA(cfg, arg3=3) - - # overwrite arg2 with correct config later: - _ = _TestClassA(cfg, arg2=2, arg3=3) - _ = _TestClassB(cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassC(cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassD(cfg, input_shape="shape", arg2=2, arg3=3) - - # call with kwargs cfg=cfg instead - _ = _TestClassA(cfg=cfg, arg2=2, arg3=3) - _ = _TestClassB(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassC(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - _ = _TestClassD(cfg=cfg, input_shape="shape", arg2=2, arg3=3) - - def testInitWithCfgWrongArgs(self): - cfg = get_cfg() - cfg.ARG1 = 1 - cfg.ARG2 = 2 - with self.assertRaises(TypeError): - _ = _TestClassB(cfg, "shape", not_exist=1) - with self.assertRaises(TypeError): - _ = _TestClassC(cfg, "shape", not_exist=1) - with self.assertRaises(TypeError): - _ = _TestClassD(cfg, "shape", not_exist=1) - - def testBadClass(self): - class _BadClass1: - @configurable - def __init__(self, a=1, b=2): - pass - - class _BadClass2: - @configurable - def __init__(self, a=1, b=2): - pass - - def from_config(self, cfg): # noqa - pass - - class _BadClass3: - @configurable - def __init__(self, a=1, b=2): - pass - - # bad name: must be cfg - @classmethod - def from_config(cls, config): # noqa - pass - - with self.assertRaises(AttributeError): - _ = _BadClass1(a=1) - - with self.assertRaises(TypeError): - _ = _BadClass2(a=1) - - with self.assertRaises(TypeError): - _ = _BadClass3(get_cfg()) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py deleted file mode 100644 index ad989c4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_export_caffe2.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# -*- coding: utf-8 -*- - -import copy -import numpy as np -import os -import tempfile -import unittest -import cv2 -import torch -from fvcore.common.file_io import PathManager - -from detectron2 import model_zoo -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import DatasetCatalog -from detectron2.modeling import build_model -from detectron2.utils.logger import setup_logger - - -@unittest.skipIf(os.environ.get("CIRCLECI"), "Require COCO data and model zoo.") -class TestCaffe2Export(unittest.TestCase): - def setUp(self): - setup_logger() - - def _test_model(self, config_path, device="cpu"): - # requires extra dependencies - from detectron2.export import Caffe2Model, add_export_config, export_caffe2_model - - cfg = get_cfg() - cfg.merge_from_file(model_zoo.get_config_file(config_path)) - cfg = add_export_config(cfg) - cfg.MODEL.DEVICE = device - - model = build_model(cfg) - DetectionCheckpointer(model).load(model_zoo.get_checkpoint_url(config_path)) - - inputs = [{"image": self._get_test_image()}] - c2_model = export_caffe2_model(cfg, model, copy.deepcopy(inputs)) - - with tempfile.TemporaryDirectory(prefix="detectron2_unittest") as d: - c2_model.save_protobuf(d) - c2_model.save_graph(os.path.join(d, "test.svg"), inputs=copy.deepcopy(inputs)) - c2_model = Caffe2Model.load_protobuf(d) - c2_model(inputs)[0]["instances"] - - def _get_test_image(self): - try: - file_name = DatasetCatalog.get("coco_2017_train")[0]["file_name"] - assert PathManager.exists(file_name) - except Exception: - self.skipTest("COCO dataset not available.") - - with PathManager.open(file_name, "rb") as f: - buf = f.read() - img = cv2.imdecode(np.frombuffer(buf, dtype=np.uint8), cv2.IMREAD_COLOR) - assert img is not None, file_name - return torch.from_numpy(img.transpose(2, 0, 1)) - - def testMaskRCNN(self): - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") - - @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available") - def testMaskRCNNGPU(self): - self._test_model("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", device="cuda") - - def testRetinaNet(self): - self._test_model("COCO-Detection/retinanet_R_50_FPN_3x.yaml") - - def testPanopticFPN(self): - self._test_model("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml") diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py deleted file mode 100644 index 0e3f84c..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_analysis.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - - -import unittest -import torch - -import detectron2.model_zoo as model_zoo -from detectron2.config import get_cfg -from detectron2.modeling import build_model -from detectron2.utils.analysis import flop_count_operators, parameter_count - - -def get_model_zoo(config_path): - """ - Like model_zoo.get, but do not load any weights (even pretrained) - """ - cfg_file = model_zoo.get_config_file(config_path) - cfg = get_cfg() - cfg.merge_from_file(cfg_file) - if not torch.cuda.is_available(): - cfg.MODEL.DEVICE = "cpu" - return build_model(cfg) - - -class RetinaNetTest(unittest.TestCase): - def setUp(self): - self.model = get_model_zoo("COCO-Detection/retinanet_R_50_FPN_1x.yaml") - - def test_flop(self): - # RetinaNet supports flop-counting with random inputs - inputs = [{"image": torch.rand(3, 800, 800)}] - res = flop_count_operators(self.model, inputs) - self.assertTrue(int(res["conv"]), 146) # 146B flops - - def test_param_count(self): - res = parameter_count(self.model) - self.assertTrue(res[""], 37915572) - self.assertTrue(res["backbone"], 31452352) - - -class FasterRCNNTest(unittest.TestCase): - def setUp(self): - self.model = get_model_zoo("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml") - - def test_flop(self): - # Faster R-CNN supports flop-counting with random inputs - inputs = [{"image": torch.rand(3, 800, 800)}] - res = flop_count_operators(self.model, inputs) - - # This only checks flops for backbone & proposal generator - # Flops for box head is not conv, and depends on #proposals, which is - # almost 0 for random inputs. - self.assertTrue(int(res["conv"]), 117) - - def test_param_count(self): - res = parameter_count(self.model) - self.assertTrue(res[""], 41699936) - self.assertTrue(res["backbone"], 26799296) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py deleted file mode 100644 index 2d16c71..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_model_zoo.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import logging -import unittest - -from detectron2 import model_zoo -from detectron2.modeling import FPN, GeneralizedRCNN - -logger = logging.getLogger(__name__) - - -class TestModelZoo(unittest.TestCase): - def test_get_returns_model(self): - model = model_zoo.get("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml", trained=False) - self.assertIsInstance(model, GeneralizedRCNN) - self.assertIsInstance(model.backbone, FPN) - - def test_get_invalid_model(self): - self.assertRaises(RuntimeError, model_zoo.get, "Invalid/config.yaml") - - def test_get_url(self): - url = model_zoo.get_checkpoint_url("Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml") - self.assertEqual( - url, - "https://dl.fbaipublicfiles.com/detectron2/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn/138602908/model_final_01ca85.pkl", # noqa - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py b/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py deleted file mode 100644 index 1cdeddc..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tests/test_visualizer.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -# File: - -import numpy as np -import unittest -import torch - -from detectron2.data import MetadataCatalog -from detectron2.structures import BoxMode, Instances, RotatedBoxes -from detectron2.utils.visualizer import Visualizer - - -class TestVisualizer(unittest.TestCase): - def _random_data(self): - H, W = 100, 100 - N = 10 - img = np.random.rand(H, W, 3) * 255 - boxxy = np.random.rand(N, 2) * (H // 2) - boxes = np.concatenate((boxxy, boxxy + H // 2), axis=1) - - def _rand_poly(): - return np.random.rand(3, 2).flatten() * H - - polygons = [[_rand_poly() for _ in range(np.random.randint(1, 5))] for _ in range(N)] - - mask = np.zeros_like(img[:, :, 0], dtype=np.bool) - mask[:10, 10:20] = 1 - - labels = [str(i) for i in range(N)] - return img, boxes, labels, polygons, [mask] * N - - @property - def metadata(self): - return MetadataCatalog.get("coco_2017_train") - - def test_draw_dataset_dict(self): - img = np.random.rand(512, 512, 3) * 255 - dic = { - "annotations": [ - { - "bbox": [ - 368.9946492271106, - 330.891438763377, - 13.148537455410235, - 13.644708680142685, - ], - "bbox_mode": BoxMode.XYWH_ABS, - "category_id": 0, - "iscrowd": 1, - "segmentation": { - "counts": "_jh52m?2N2N2N2O100O10O001N1O2MceP2", - "size": [512, 512], - }, - } - ], - "height": 512, - "image_id": 1, - "width": 512, - } - v = Visualizer(img, self.metadata) - v.draw_dataset_dict(dic) - - def test_overlay_instances(self): - img, boxes, labels, polygons, masks = self._random_data() - - v = Visualizer(img, self.metadata) - output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - # Test 2x scaling - v = Visualizer(img, self.metadata, scale=2.0) - output = v.overlay_instances(masks=polygons, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape[0], img.shape[0] * 2) - - # Test overlay masks - v = Visualizer(img, self.metadata) - output = v.overlay_instances(masks=masks, boxes=boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - def test_overlay_instances_no_boxes(self): - img, boxes, labels, polygons, _ = self._random_data() - v = Visualizer(img, self.metadata) - v.overlay_instances(masks=polygons, boxes=None, labels=labels).get_image() - - def test_draw_instance_predictions(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.asarray(masks)) - - v = Visualizer(img, self.metadata) - v.draw_instance_predictions(inst) - - def test_draw_empty_mask_predictions(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.zeros_like(np.asarray(masks))) - - v = Visualizer(img, self.metadata) - v.draw_instance_predictions(inst) - - def test_correct_output_shape(self): - img = np.random.rand(928, 928, 3) * 255 - v = Visualizer(img, self.metadata) - out = v.output.get_image() - self.assertEqual(out.shape, img.shape) - - def test_overlay_rotated_instances(self): - H, W = 100, 150 - img = np.random.rand(H, W, 3) * 255 - num_boxes = 50 - boxes_5d = torch.zeros(num_boxes, 5) - boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-0.1 * W, 1.1 * W) - boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-0.1 * H, 1.1 * H) - boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) - boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, max(W, H)) - boxes_5d[:, 4] = torch.FloatTensor(num_boxes).uniform_(-1800, 1800) - rotated_boxes = RotatedBoxes(boxes_5d) - labels = [str(i) for i in range(num_boxes)] - - v = Visualizer(img, self.metadata) - output = v.overlay_instances(boxes=rotated_boxes, labels=labels).get_image() - self.assertEqual(output.shape, img.shape) - - def test_draw_no_metadata(self): - img, boxes, _, _, masks = self._random_data() - num_inst = len(boxes) - inst = Instances((img.shape[0], img.shape[1])) - inst.pred_classes = torch.randint(0, 80, size=(num_inst,)) - inst.scores = torch.rand(num_inst) - inst.pred_boxes = torch.from_numpy(boxes) - inst.pred_masks = torch.from_numpy(np.asarray(masks)) - - v = Visualizer(img, MetadataCatalog.get("asdfasdf")) - v.draw_instance_predictions(inst) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md deleted file mode 100644 index 3733863..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/README.md +++ /dev/null @@ -1,45 +0,0 @@ - -This directory contains a few scripts that use detectron2. - - -* `train_net.py` - -An example training script that's made to train builtin models of detectron2. - -For usage, see [GETTING_STARTED.md](../GETTING_STARTED.md). - -* `plain_train_net.py` - -Similar to `train_net.py`, but implements a training loop instead of using `Trainer`. -This script includes fewer features but it may be more friendly to hackers. - -* `benchmark.py` - -Benchmark the training speed, inference speed or data loading speed of a given config. - -Usage: -``` -python benchmark.py --config-file config.yaml --task train/eval/data [optional DDP flags] -``` - -* `visualize_json_results.py` - -Visualize the json instance detection/segmentation results dumped by `COCOEvalutor` or `LVISEvaluator` - -Usage: -``` -python visualize_json_results.py --input x.json --output dir/ --dataset coco_2017_val -``` -If not using a builtin dataset, you'll need your own script or modify this script. - -* `visualize_data.py` - -Visualize ground truth raw annotations or training data (after preprocessing/augmentations). - -Usage: -``` -python visualize_data.py --config-file config.yaml --source annotation/dataloader --output-dir dir/ [--show] -``` - -NOTE: the script does not stop by itself when using `--source dataloader` because a training -dataloader is usually infinite. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py deleted file mode 100644 index 9c06ea4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/analyze_model.py +++ /dev/null @@ -1,127 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import logging -import numpy as np -from collections import Counter -import tqdm - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import build_detection_test_loader -from detectron2.engine import default_argument_parser -from detectron2.modeling import build_model -from detectron2.utils.analysis import ( - activation_count_operators, - flop_count_operators, - parameter_count_table, -) -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger("detectron2") - - -def setup(args): - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.DATALOADER.NUM_WORKERS = 0 - cfg.merge_from_list(args.opts) - cfg.freeze() - setup_logger() - return cfg - - -def do_flop(cfg): - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - model.eval() - - counts = Counter() - total_flops = [] - for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa - count = flop_count_operators(model, data) - counts += count - total_flops.append(sum(count.values())) - logger.info( - "(G)Flops for Each Type of Operators:\n" + str([(k, v / idx) for k, v in counts.items()]) - ) - logger.info("Total (G)Flops: {}±{}".format(np.mean(total_flops), np.std(total_flops))) - - -def do_activation(cfg): - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - model = build_model(cfg) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - model.eval() - - counts = Counter() - total_activations = [] - for idx, data in zip(tqdm.trange(args.num_inputs), data_loader): # noqa - count = activation_count_operators(model, data) - counts += count - total_activations.append(sum(count.values())) - logger.info( - "(Million) Activations for Each Type of Operators:\n" - + str([(k, v / idx) for k, v in counts.items()]) - ) - logger.info( - "Total (Million) Activations: {}±{}".format( - np.mean(total_activations), np.std(total_activations) - ) - ) - - -def do_parameter(cfg): - model = build_model(cfg) - logger.info("Parameter Count:\n" + parameter_count_table(model, max_depth=5)) - - -def do_structure(cfg): - model = build_model(cfg) - logger.info("Model Structure:\n" + str(model)) - - -if __name__ == "__main__": - parser = default_argument_parser( - epilog=""" -Examples: - -To show parameters of a model: -$ ./analyze_model.py --tasks parameter \\ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml - -Flops and activations are data-dependent, therefore inputs and model weights -are needed to count them: - -$ ./analyze_model.py --num-inputs 100 --tasks flop \\ - --config-file ../configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml \\ - MODEL.WEIGHTS /path/to/model.pkl -""" - ) - parser.add_argument( - "--tasks", - choices=["flop", "activation", "parameter", "structure"], - required=True, - nargs="+", - ) - parser.add_argument( - "--num-inputs", - default=100, - type=int, - help="number of inputs used to compute statistics for flops/activations, " - "both are data dependent.", - ) - args = parser.parse_args() - assert not args.eval_only - assert args.num_gpus == 1 - - cfg = setup(args) - - for task in args.tasks: - { - "flop": do_flop, - "activation": do_activation, - "parameter": do_parameter, - "structure": do_structure, - }[task](cfg) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py deleted file mode 100644 index 9eec59f..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/benchmark.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -A script to benchmark builtin models. - -Note: this script has an extra dependency of psutil. -""" - -import itertools -import logging -import psutil -import torch -import tqdm -from fvcore.common.timer import Timer -from torch.nn.parallel import DistributedDataParallel - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import ( - DatasetFromList, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.engine import SimpleTrainer, default_argument_parser, hooks, launch -from detectron2.modeling import build_model -from detectron2.solver import build_optimizer -from detectron2.utils import comm -from detectron2.utils.events import CommonMetricPrinter -from detectron2.utils.logger import setup_logger - -logger = logging.getLogger("detectron2") - - -def setup(args): - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway. - cfg.merge_from_list(args.opts) - cfg.freeze() - setup_logger(distributed_rank=comm.get_rank()) - return cfg - - -def benchmark_data(args): - cfg = setup(args) - - timer = Timer() - dataloader = build_detection_train_loader(cfg) - logger.info("Initialize loader using {} seconds.".format(timer.seconds())) - - timer.reset() - itr = iter(dataloader) - for i in range(10): # warmup - next(itr) - if i == 0: - startup_time = timer.seconds() - timer = Timer() - max_iter = 1000 - for _ in tqdm.trange(max_iter): - next(itr) - logger.info( - "{} iters ({} images) in {} seconds.".format( - max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() - ) - ) - logger.info("Startup time: {} seconds".format(startup_time)) - vram = psutil.virtual_memory() - logger.info( - "RAM Usage: {:.2f}/{:.2f} GB".format( - (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3 - ) - ) - - # test for a few more rounds - for _ in range(10): - timer = Timer() - max_iter = 1000 - for _ in tqdm.trange(max_iter): - next(itr) - logger.info( - "{} iters ({} images) in {} seconds.".format( - max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() - ) - ) - - -def benchmark_train(args): - cfg = setup(args) - model = build_model(cfg) - logger.info("Model:\n{}".format(model)) - if comm.get_world_size() > 1: - model = DistributedDataParallel( - model, device_ids=[comm.get_local_rank()], broadcast_buffers=False - ) - optimizer = build_optimizer(cfg, model) - checkpointer = DetectionCheckpointer(model, optimizer=optimizer) - checkpointer.load(cfg.MODEL.WEIGHTS) - - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 0 - data_loader = build_detection_train_loader(cfg) - dummy_data = list(itertools.islice(data_loader, 100)) - - def f(): - data = DatasetFromList(dummy_data, copy=False) - while True: - yield from data - - max_iter = 400 - trainer = SimpleTrainer(model, f(), optimizer) - trainer.register_hooks( - [hooks.IterationTimer(), hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])] - ) - trainer.train(1, max_iter) - - -@torch.no_grad() -def benchmark_eval(args): - cfg = setup(args) - model = build_model(cfg) - model.eval() - logger.info("Model:\n{}".format(model)) - DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) - - cfg.defrost() - cfg.DATALOADER.NUM_WORKERS = 0 - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - dummy_data = list(itertools.islice(data_loader, 100)) - - def f(): - while True: - yield from DatasetFromList(dummy_data, copy=False) - - for _ in range(5): # warmup - model(dummy_data[0]) - - max_iter = 400 - timer = Timer() - with tqdm.tqdm(total=max_iter) as pbar: - for idx, d in enumerate(f()): - if idx == max_iter: - break - model(d) - pbar.update() - logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) - - -if __name__ == "__main__": - parser = default_argument_parser() - parser.add_argument("--task", choices=["train", "eval", "data"], required=True) - args = parser.parse_args() - assert not args.eval_only - - if args.task == "data": - f = benchmark_data - elif args.task == "train": - """ - Note: training speed may not be representative. - The training cost of a R-CNN model varies with the content of the data - and the quality of the model. - """ - f = benchmark_train - elif args.task == "eval": - f = benchmark_eval - # only benchmark single-GPU inference. - assert args.num_gpus == 1 and args.num_machines == 1 - launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,)) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py deleted file mode 100644 index 18a24e4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/convert-torchvision-to-d2.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import pickle as pkl -import sys -import torch - -""" -Usage: - # download one of the ResNet{18,34,50,101,152} models from torchvision: - wget https://download.pytorch.org/models/resnet50-19c8e357.pth -O r50.pth - # run the conversion - ./convert-torchvision-to-d2.py r50.pth r50.pkl - - # Then, use r50.pkl with the following changes in config: - -MODEL: - WEIGHTS: "/path/to/r50.pkl" - PIXEL_MEAN: [123.675, 116.280, 103.530] - PIXEL_STD: [58.395, 57.120, 57.375] - RESNETS: - DEPTH: 50 - STRIDE_IN_1X1: False -INPUT: - FORMAT: "RGB" - - These models typically produce slightly worse results than the - pre-trained ResNets we use in official configs, which are the - original ResNet models released by MSRA. -""" - -if __name__ == "__main__": - input = sys.argv[1] - - obj = torch.load(input, map_location="cpu") - - newmodel = {} - for k in list(obj.keys()): - old_k = k - if "layer" not in k: - k = "stem." + k - for t in [1, 2, 3, 4]: - k = k.replace("layer{}".format(t), "res{}".format(t + 1)) - for t in [1, 2, 3]: - k = k.replace("bn{}".format(t), "conv{}.norm".format(t)) - k = k.replace("downsample.0", "shortcut") - k = k.replace("downsample.1", "shortcut.norm") - print(old_k, "->", k) - newmodel[k] = obj.pop(old_k).detach().numpy() - - res = {"model": newmodel, "__author__": "torchvision", "matching_heuristics": True} - - with open(sys.argv[2], "wb") as f: - pkl.dump(res, f) - if obj: - print("Unconverted keys:", obj.keys()) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md deleted file mode 100644 index b9d5b15..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/README.md +++ /dev/null @@ -1,9 +0,0 @@ - -This directory contains: - -1. A script that converts a detectron2 model to caffe2 format. - -2. An example that loads a Mask R-CNN model in caffe2 format and runs inference. - -See [tutorial](https://detectron2.readthedocs.io/tutorials/deployment.html) -for their usage. diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py deleted file mode 100644 index 08feb69..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_converter.py +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -import argparse -import os -import onnx -import torch - -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import build_detection_test_loader -from detectron2.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format -from detectron2.export import Caffe2Tracer, add_export_config -from detectron2.modeling import build_model -from detectron2.utils.logger import setup_logger - - -def setup_cfg(args): - cfg = get_cfg() - # cuda context is initialized before creating dataloader, so we don't fork anymore - cfg.DATALOADER.NUM_WORKERS = 0 - cfg = add_export_config(cfg) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - if cfg.MODEL.DEVICE != "cpu": - TORCH_VERSION = tuple(int(x) for x in torch.__version__.split(".")[:2]) - assert TORCH_VERSION >= (1, 5), "PyTorch>=1.5 required for GPU conversion!" - return cfg - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Convert a model using caffe2 tracing.") - parser.add_argument( - "--format", - choices=["caffe2", "onnx", "torchscript"], - help="output format", - default="caffe2", - ) - parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") - parser.add_argument("--run-eval", action="store_true") - parser.add_argument("--output", help="output directory for the converted model") - parser.add_argument( - "opts", - help="Modify config options using the command-line", - default=None, - nargs=argparse.REMAINDER, - ) - args = parser.parse_args() - logger = setup_logger() - logger.info("Command line arguments: " + str(args)) - os.makedirs(args.output, exist_ok=True) - - cfg = setup_cfg(args) - - # create a torch model - torch_model = build_model(cfg) - DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) - - # get a sample data - data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) - first_batch = next(iter(data_loader)) - - # convert and save caffe2 model - tracer = Caffe2Tracer(cfg, torch_model, first_batch) - if args.format == "caffe2": - caffe2_model = tracer.export_caffe2() - caffe2_model.save_protobuf(args.output) - # draw the caffe2 graph - caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch) - elif args.format == "onnx": - onnx_model = tracer.export_onnx() - onnx.save(onnx_model, os.path.join(args.output, "model.onnx")) - elif args.format == "torchscript": - script_model = tracer.export_torchscript() - script_model.save(os.path.join(args.output, "model.ts")) - - # Recursively print IR of all modules - with open(os.path.join(args.output, "model_ts_IR.txt"), "w") as f: - try: - f.write(script_model._actual_script_module._c.dump_to_str(True, False, False)) - except AttributeError: - pass - # Print IR of the entire graph (all submodules inlined) - with open(os.path.join(args.output, "model_ts_IR_inlined.txt"), "w") as f: - f.write(str(script_model.inlined_graph)) - # Print the model structure in pytorch style - with open(os.path.join(args.output, "model.txt"), "w") as f: - f.write(str(script_model)) - - # run evaluation with the converted model - if args.run_eval: - assert args.format == "caffe2", "Python inference in other format is not yet supported." - dataset = cfg.DATASETS.TEST[0] - data_loader = build_detection_test_loader(cfg, dataset) - # NOTE: hard-coded evaluator. change to the evaluator for your dataset - evaluator = COCOEvaluator(dataset, cfg, True, args.output) - metrics = inference_on_dataset(caffe2_model, data_loader, evaluator) - print_csv_format(metrics) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp deleted file mode 100644 index 44370b4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/caffe2_mask_rcnn.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -C10_DEFINE_string(predict_net, "", "path to model.pb"); -C10_DEFINE_string(init_net, "", "path to model_init.pb"); -C10_DEFINE_string(input, "", "path to input image"); - -using namespace std; -using namespace caffe2; - -int main(int argc, char** argv) { - caffe2::GlobalInit(&argc, &argv); - string predictNetPath = FLAGS_predict_net; - string initNetPath = FLAGS_init_net; - cv::Mat input = cv::imread(FLAGS_input, cv::IMREAD_COLOR); - - const int height = input.rows; - const int width = input.cols; - // FPN models require divisibility of 32 - assert(height % 32 == 0 && width % 32 == 0); - const int batch = 1; - const int channels = 3; - - // initialize Net and Workspace - caffe2::NetDef initNet_, predictNet_; - CAFFE_ENFORCE(ReadProtoFromFile(initNetPath, &initNet_)); - CAFFE_ENFORCE(ReadProtoFromFile(predictNetPath, &predictNet_)); - - Workspace workSpace; - for (auto& str : predictNet_.external_input()) { - workSpace.CreateBlob(str); - } - CAFFE_ENFORCE(workSpace.CreateNet(predictNet_)); - CAFFE_ENFORCE(workSpace.RunNetOnce(initNet_)); - - // setup inputs - auto data = BlobGetMutableTensor(workSpace.GetBlob("data"), caffe2::CPU); - data->Resize(batch, channels, height, width); - float* ptr = data->mutable_data(); - // HWC to CHW - for (int c = 0; c < 3; ++c) { - for (int i = 0; i < height * width; ++i) { - ptr[c * height * width + i] = static_cast(input.data[3 * i + c]); - } - } - - auto im_info = - BlobGetMutableTensor(workSpace.GetBlob("im_info"), caffe2::CPU); - im_info->Resize(batch, 3); - float* im_info_ptr = im_info->mutable_data(); - im_info_ptr[0] = height; - im_info_ptr[1] = width; - im_info_ptr[2] = 1.0; - - // run the network - CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name())); - - // run 3 more times to benchmark - int N_benchmark = 3; - auto start_time = chrono::high_resolution_clock::now(); - for (int i = 0; i < N_benchmark; ++i) { - CAFFE_ENFORCE(workSpace.RunNet(predictNet_.name())); - } - auto end_time = chrono::high_resolution_clock::now(); - auto ms = chrono::duration_cast(end_time - start_time) - .count(); - cout << "Latency (should vary with different inputs): " - << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; - - // parse Mask R-CNN outputs - caffe2::Tensor bbox( - workSpace.GetBlob("bbox_nms")->Get(), caffe2::CPU); - caffe2::Tensor scores( - workSpace.GetBlob("score_nms")->Get(), caffe2::CPU); - caffe2::Tensor labels( - workSpace.GetBlob("class_nms")->Get(), caffe2::CPU); - caffe2::Tensor mask_probs( - workSpace.GetBlob("mask_fcn_probs")->Get(), caffe2::CPU); - cout << "bbox:" << bbox.DebugString() << endl; - cout << "scores:" << scores.DebugString() << endl; - cout << "labels:" << labels.DebugString() << endl; - cout << "mask_probs: " << mask_probs.DebugString() << endl; - - int num_instances = bbox.sizes()[0]; - for (int i = 0; i < num_instances; ++i) { - float score = scores.data()[i]; - if (score < 0.6) - continue; // skip them - - const float* box = bbox.data() + i * 4; - int label = labels.data()[i]; - - cout << "Prediction " << i << ", xyxy=("; - cout << box[0] << ", " << box[1] << ", " << box[2] << ", " << box[3] - << "); score=" << score << "; label=" << label << endl; - - const float* mask = mask_probs.data() + - i * mask_probs.size_from_dim(1) + label * mask_probs.size_from_dim(2); - - // save the 28x28 mask - cv::Mat cv_mask(28, 28, CV_32FC1); - memcpy(cv_mask.data, mask, 28 * 28 * sizeof(float)); - cv::imwrite("mask" + std::to_string(i) + ".png", cv_mask * 255.); - } - return 0; -} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp b/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp deleted file mode 100644 index 82fbdb0..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/deploy/torchscript_traced_mask_rcnn.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. - -#include -#include -#include - -#include -#include - -using namespace std; - -// experimental. don't use -int main(int argc, const char* argv[]) { - if (argc != 3) { - return 1; - } - std::string image_file = argv[2]; - - torch::autograd::AutoGradMode guard(false); - auto module = torch::jit::load(argv[1]); - - assert(module.buffers().size() > 0); - // Assume that the entire model is on the same device. - // We just put input to this device. - auto device = (*begin(module.buffers())).device(); - - cv::Mat input_img = cv::imread(image_file, cv::IMREAD_COLOR); - const int height = input_img.rows; - const int width = input_img.cols; - // FPN models require divisibility of 32 - assert(height % 32 == 0 && width % 32 == 0); - const int channels = 3; - - auto input = torch::from_blob( - input_img.data, {1, height, width, channels}, torch::kUInt8); - // NHWC to NCHW - input = input.to(device, torch::kFloat).permute({0, 3, 1, 2}).contiguous(); - - std::array im_info_data{height * 1.0f, width * 1.0f, 1.0f}; - auto im_info = torch::from_blob(im_info_data.data(), {1, 3}).to(device); - - // run the network - auto output = module.forward({std::make_tuple(input, im_info)}); - - // run 3 more times to benchmark - int N_benchmark = 3; - auto start_time = chrono::high_resolution_clock::now(); - for (int i = 0; i < N_benchmark; ++i) { - output = module.forward({std::make_tuple(input, im_info)}); - } - auto end_time = chrono::high_resolution_clock::now(); - auto ms = chrono::duration_cast(end_time - start_time) - .count(); - cout << "Latency (should vary with different inputs): " - << ms * 1.0 / 1e6 / N_benchmark << " seconds" << endl; - - auto outputs = output.toTuple()->elements(); - // parse Mask R-CNN outputs - auto bbox = outputs[0].toTensor(), scores = outputs[1].toTensor(), - labels = outputs[2].toTensor(), mask_probs = outputs[3].toTensor(); - - cout << "bbox: " << bbox.toString() << " " << bbox.sizes() << endl; - cout << "scores: " << scores.toString() << " " << scores.sizes() << endl; - cout << "labels: " << labels.toString() << " " << labels.sizes() << endl; - cout << "mask_probs: " << mask_probs.toString() << " " << mask_probs.sizes() - << endl; - - int num_instances = bbox.sizes()[0]; - cout << bbox << endl; - return 0; -} diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py deleted file mode 100644 index 3e52185..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/finetune_net.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Detection Training Script. - -This scripts reads a given config file and runs the training or evaluation. -It is an entry point that is made to train standard models in detectron2. - -In order to let one script support training of many models, -this script contains logic that are specific to these built-in models and therefore -may not be suitable for your own project. -For example, your research project perhaps only needs a single "evaluator". - -Therefore, we recommend you to use detectron2 as an library and take -this file as an example of how to use the library. -You may want to write your own script with your data and other customizations. -""" - -import logging -import os -from collections import OrderedDict -import torch - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - LVISEvaluator, - PascalVOCDetectionEvaluator, - SemSegEvaluator, - verify_results, -) -from detectron2.modeling import GeneralizedRCNNWithTTA - -# Register Custom Dataset -from detectron2.data.datasets import register_coco_instances - -register_coco_instances("CIHP_train", {}, "../../data/msrcnn_finetune_annotations/CIHP_train.json", - "../../data/instance-level_human_parsing/Training/Images") -register_coco_instances("CIHP_val", {}, "../../data/msrcnn_finetune_annotations/CIHP_val.json", - "../../data/instance-level_human_parsing/Validation/Images") -register_coco_instances("demo_train", {}, "../../demo/annotations/demo_train.json", - "../../demo/img") -register_coco_instances("demo_val", {}, "../../demo/annotations/demo_val.json", - "../../demo/img") - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains pre-defined default logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. You can use - "tools/plain_train_net.py" as an example. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: - evaluator_list.append( - SemSegEvaluator( - dataset_name, - distributed=True, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - output_dir=output_folder, - ) - ) - if evaluator_type in ["coco", "coco_panoptic_seg"]: - evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) - if evaluator_type == "coco_panoptic_seg": - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_instance": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesSemSegEvaluator(dataset_name) - elif evaluator_type == "pascal_voc": - return PascalVOCDetectionEvaluator(dataset_name) - elif evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - elif len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def test_with_TTA(cls, cfg, model): - logger = logging.getLogger("detectron2.trainer") - # In the end of training, run an evaluation with TTA - # Only support some R-CNN models. - logger.info("Running inference with test-time augmentation ...") - model = GeneralizedRCNNWithTTA(cfg, model) - evaluators = [ - cls.build_evaluator( - cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") - ) - for name in cfg.DATASETS.TEST - ] - res = cls.test(cfg, model, evaluators) - res = OrderedDict({k + "_TTA": v for k, v in res.items()}) - return res - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - """ - If you'd like to do anything fancier than the standard training logic, - consider writing your own training loop (see plain_train_net.py) or - subclassing the trainer. - """ - trainer = Trainer(cfg) - trainer.resume_or_load(resume=False) - if cfg.TEST.AUG.ENABLED: - trainer.register_hooks( - [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] - ) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh b/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh deleted file mode 100644 index 3b9d39e..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/inference.sh +++ /dev/null @@ -1,4 +0,0 @@ -python finetune_net.py \ - --num-gpus 1 \ - --config-file ../configs/Misc/parsing_inference.yaml \ - --eval-only MODEL.WEIGHTS ./model_final.pth TEST.AUG.ENABLED False diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py deleted file mode 100644 index 52a0a28..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/plain_train_net.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Detectron2 training script with a plain training loop. - -This script reads a given config file and runs the training or evaluation. -It is an entry point that is able to train standard models in detectron2. - -In order to let one script support training of many models, -this script contains logic that are specific to these built-in models and therefore -may not be suitable for your own project. -For example, your research project perhaps only needs a single "evaluator". - -Therefore, we recommend you to use detectron2 as a library and take -this file as an example of how to use the library. -You may want to write your own script with your data and other customizations. - -Compared to "train_net.py", this script supports fewer default features. -It also includes fewer abstraction, therefore is easier to add custom logic. -""" - -import logging -import os -from collections import OrderedDict -import torch -from torch.nn.parallel import DistributedDataParallel - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer -from detectron2.config import get_cfg -from detectron2.data import ( - MetadataCatalog, - build_detection_test_loader, - build_detection_train_loader, -) -from detectron2.engine import default_argument_parser, default_setup, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - LVISEvaluator, - PascalVOCDetectionEvaluator, - SemSegEvaluator, - inference_on_dataset, - print_csv_format, -) -from detectron2.modeling import build_model -from detectron2.solver import build_lr_scheduler, build_optimizer -from detectron2.utils.events import ( - CommonMetricPrinter, - EventStorage, - JSONWriter, - TensorboardXWriter, -) - -logger = logging.getLogger("detectron2") - - -def get_evaluator(cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: - evaluator_list.append( - SemSegEvaluator( - dataset_name, - distributed=True, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - output_dir=output_folder, - ) - ) - if evaluator_type in ["coco", "coco_panoptic_seg"]: - evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) - if evaluator_type == "coco_panoptic_seg": - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_instance": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesSemSegEvaluator(dataset_name) - if evaluator_type == "pascal_voc": - return PascalVOCDetectionEvaluator(dataset_name) - if evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) - ) - if len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - -def do_test(cfg, model): - results = OrderedDict() - for dataset_name in cfg.DATASETS.TEST: - data_loader = build_detection_test_loader(cfg, dataset_name) - evaluator = get_evaluator( - cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) - ) - results_i = inference_on_dataset(model, data_loader, evaluator) - results[dataset_name] = results_i - if comm.is_main_process(): - logger.info("Evaluation results for {} in csv format:".format(dataset_name)) - print_csv_format(results_i) - if len(results) == 1: - results = list(results.values())[0] - return results - - -def do_train(cfg, model, resume=False): - model.train() - optimizer = build_optimizer(cfg, model) - scheduler = build_lr_scheduler(cfg, optimizer) - - checkpointer = DetectionCheckpointer( - model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler - ) - start_iter = ( - checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 - ) - max_iter = cfg.SOLVER.MAX_ITER - - periodic_checkpointer = PeriodicCheckpointer( - checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter - ) - - writers = ( - [ - CommonMetricPrinter(max_iter), - JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), - TensorboardXWriter(cfg.OUTPUT_DIR), - ] - if comm.is_main_process() - else [] - ) - - # compared to "train_net.py", we do not support accurate timing and - # precise BN here, because they are not trivial to implement - data_loader = build_detection_train_loader(cfg) - logger.info("Starting training from iteration {}".format(start_iter)) - with EventStorage(start_iter) as storage: - for data, iteration in zip(data_loader, range(start_iter, max_iter)): - iteration = iteration + 1 - storage.step() - - loss_dict = model(data) - losses = sum(loss_dict.values()) - assert torch.isfinite(losses).all(), loss_dict - - loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} - losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - if comm.is_main_process(): - storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) - - optimizer.zero_grad() - losses.backward() - optimizer.step() - storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) - scheduler.step() - - if ( - cfg.TEST.EVAL_PERIOD > 0 - and iteration % cfg.TEST.EVAL_PERIOD == 0 - and iteration != max_iter - ): - do_test(cfg, model) - # Compared to "train_net.py", the test results are not dumped to EventStorage - comm.synchronize() - - if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter): - for writer in writers: - writer.write() - periodic_checkpointer.step(iteration) - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup( - cfg, args - ) # if you don't like any of the default setup, write your own setup code - return cfg - - -def main(args): - cfg = setup(args) - - model = build_model(cfg) - logger.info("Model:\n{}".format(model)) - if args.eval_only: - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - return do_test(cfg, model) - - distributed = comm.get_world_size() > 1 - if distributed: - model = DistributedDataParallel( - model, device_ids=[comm.get_local_rank()], broadcast_buffers=False - ) - - do_train(cfg, model, resume=args.resume) - return do_test(cfg, model) - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh b/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh deleted file mode 100644 index b892673..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/run.sh +++ /dev/null @@ -1,3 +0,0 @@ -python finetune_net.py \ - --config-file ../configs/Misc/parsing_finetune_cihp+vip.yaml \ - --num-gpus 8 diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py deleted file mode 100644 index b1c0ee4..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/train_net.py +++ /dev/null @@ -1,171 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Detection Training Script. - -This scripts reads a given config file and runs the training or evaluation. -It is an entry point that is made to train standard models in detectron2. - -In order to let one script support training of many models, -this script contains logic that are specific to these built-in models and therefore -may not be suitable for your own project. -For example, your research project perhaps only needs a single "evaluator". - -Therefore, we recommend you to use detectron2 as an library and take -this file as an example of how to use the library. -You may want to write your own script with your data and other customizations. -""" - -import logging -import os -from collections import OrderedDict -import torch - -import detectron2.utils.comm as comm -from detectron2.checkpoint import DetectionCheckpointer -from detectron2.config import get_cfg -from detectron2.data import MetadataCatalog -from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, hooks, launch -from detectron2.evaluation import ( - CityscapesInstanceEvaluator, - CityscapesSemSegEvaluator, - COCOEvaluator, - COCOPanopticEvaluator, - DatasetEvaluators, - LVISEvaluator, - PascalVOCDetectionEvaluator, - SemSegEvaluator, - verify_results, -) -from detectron2.modeling import GeneralizedRCNNWithTTA - - -class Trainer(DefaultTrainer): - """ - We use the "DefaultTrainer" which contains pre-defined default logic for - standard training workflow. They may not work for you, especially if you - are working on a new research project. In that case you can use the cleaner - "SimpleTrainer", or write your own training loop. You can use - "tools/plain_train_net.py" as an example. - """ - - @classmethod - def build_evaluator(cls, cfg, dataset_name, output_folder=None): - """ - Create evaluator(s) for a given dataset. - This uses the special metadata "evaluator_type" associated with each builtin dataset. - For your own dataset, you can simply create an evaluator manually in your - script and do not have to worry about the hacky if-else logic here. - """ - if output_folder is None: - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") - evaluator_list = [] - evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type - if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: - evaluator_list.append( - SemSegEvaluator( - dataset_name, - distributed=True, - num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, - ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, - output_dir=output_folder, - ) - ) - if evaluator_type in ["coco", "coco_panoptic_seg"]: - evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) - if evaluator_type == "coco_panoptic_seg": - evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) - if evaluator_type == "cityscapes_instance": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesInstanceEvaluator(dataset_name) - if evaluator_type == "cityscapes_sem_seg": - assert ( - torch.cuda.device_count() >= comm.get_rank() - ), "CityscapesEvaluator currently do not work with multiple machines." - return CityscapesSemSegEvaluator(dataset_name) - elif evaluator_type == "pascal_voc": - return PascalVOCDetectionEvaluator(dataset_name) - elif evaluator_type == "lvis": - return LVISEvaluator(dataset_name, cfg, True, output_folder) - if len(evaluator_list) == 0: - raise NotImplementedError( - "no Evaluator for the dataset {} with the type {}".format( - dataset_name, evaluator_type - ) - ) - elif len(evaluator_list) == 1: - return evaluator_list[0] - return DatasetEvaluators(evaluator_list) - - @classmethod - def test_with_TTA(cls, cfg, model): - logger = logging.getLogger("detectron2.trainer") - # In the end of training, run an evaluation with TTA - # Only support some R-CNN models. - logger.info("Running inference with test-time augmentation ...") - model = GeneralizedRCNNWithTTA(cfg, model) - evaluators = [ - cls.build_evaluator( - cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") - ) - for name in cfg.DATASETS.TEST - ] - res = cls.test(cfg, model, evaluators) - res = OrderedDict({k + "_TTA": v for k, v in res.items()}) - return res - - -def setup(args): - """ - Create configs and perform basic setups. - """ - cfg = get_cfg() - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - default_setup(cfg, args) - return cfg - - -def main(args): - cfg = setup(args) - - if args.eval_only: - model = Trainer.build_model(cfg) - DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( - cfg.MODEL.WEIGHTS, resume=args.resume - ) - res = Trainer.test(cfg, model) - if cfg.TEST.AUG.ENABLED: - res.update(Trainer.test_with_TTA(cfg, model)) - if comm.is_main_process(): - verify_results(cfg, res) - return res - - """ - If you'd like to do anything fancier than the standard training logic, - consider writing your own training loop (see plain_train_net.py) or - subclassing the trainer. - """ - trainer = Trainer(cfg) - trainer.resume_or_load(resume=args.resume) - if cfg.TEST.AUG.ENABLED: - trainer.register_hooks( - [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))] - ) - return trainer.train() - - -if __name__ == "__main__": - args = default_argument_parser().parse_args() - print("Command Line Args:", args) - launch( - main, - args.num_gpus, - num_machines=args.num_machines, - machine_rank=args.machine_rank, - dist_url=args.dist_url, - args=(args,), - ) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py deleted file mode 100644 index b143b2d..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_data.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import argparse -import os -from itertools import chain -import cv2 -import tqdm - -from detectron2.config import get_cfg -from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader -from detectron2.data import detection_utils as utils -from detectron2.data.build import filter_images_with_few_keypoints -from detectron2.utils.logger import setup_logger -from detectron2.utils.visualizer import Visualizer - - -def setup(args): - cfg = get_cfg() - if args.config_file: - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) - cfg.freeze() - return cfg - - -def parse_args(in_args=None): - parser = argparse.ArgumentParser(description="Visualize ground-truth data") - parser.add_argument( - "--source", - choices=["annotation", "dataloader"], - required=True, - help="visualize the annotations or the data loader (with pre-processing)", - ) - parser.add_argument("--config-file", metavar="FILE", help="path to config file") - parser.add_argument("--output-dir", default="./", help="path to output directory") - parser.add_argument("--show", action="store_true", help="show output in a window") - parser.add_argument( - "opts", - help="Modify config options using the command-line", - default=None, - nargs=argparse.REMAINDER, - ) - return parser.parse_args(in_args) - - -if __name__ == "__main__": - args = parse_args() - logger = setup_logger() - logger.info("Arguments: " + str(args)) - cfg = setup(args) - - dirname = args.output_dir - os.makedirs(dirname, exist_ok=True) - metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) - - def output(vis, fname): - if args.show: - print(fname) - cv2.imshow("window", vis.get_image()[:, :, ::-1]) - cv2.waitKey() - else: - filepath = os.path.join(dirname, fname) - print("Saving to {} ...".format(filepath)) - vis.save(filepath) - - scale = 2.0 if args.show else 1.0 - if args.source == "dataloader": - train_data_loader = build_detection_train_loader(cfg) - for batch in train_data_loader: - for per_image in batch: - # Pytorch tensor is in (C, H, W) format - img = per_image["image"].permute(1, 2, 0).cpu().detach().numpy() - img = utils.convert_image_to_rgb(img, cfg.INPUT.FORMAT) - - visualizer = Visualizer(img, metadata=metadata, scale=scale) - target_fields = per_image["instances"].get_fields() - labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] - vis = visualizer.overlay_instances( - labels=labels, - boxes=target_fields.get("gt_boxes", None), - masks=target_fields.get("gt_masks", None), - keypoints=target_fields.get("gt_keypoints", None), - ) - output(vis, str(per_image["image_id"]) + ".jpg") - else: - dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) - if cfg.MODEL.KEYPOINT_ON: - dicts = filter_images_with_few_keypoints(dicts, 1) - for dic in tqdm.tqdm(dicts): - img = utils.read_image(dic["file_name"], "RGB") - visualizer = Visualizer(img, metadata=metadata, scale=scale) - vis = visualizer.draw_dataset_dict(dic) - output(vis, os.path.basename(dic["file_name"])) diff --git a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py b/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py deleted file mode 100644 index d11ecb9..0000000 --- a/preprocess/humanparsing/mhp_extension/detectron2/tools/visualize_json_results.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import argparse -import json -import numpy as np -import os -from collections import defaultdict -import cv2 -import tqdm -from fvcore.common.file_io import PathManager - -from detectron2.data import DatasetCatalog, MetadataCatalog -from detectron2.structures import Boxes, BoxMode, Instances -from detectron2.utils.logger import setup_logger -from detectron2.utils.visualizer import Visualizer - - -def create_instances(predictions, image_size): - ret = Instances(image_size) - - score = np.asarray([x["score"] for x in predictions]) - chosen = (score > args.conf_threshold).nonzero()[0] - score = score[chosen] - bbox = np.asarray([predictions[i]["bbox"] for i in chosen]).reshape(-1, 4) - bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) - - labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) - - ret.scores = score - ret.pred_boxes = Boxes(bbox) - ret.pred_classes = labels - - try: - ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] - except KeyError: - pass - return ret - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="A script that visualizes the json predictions from COCO or LVIS dataset." - ) - parser.add_argument("--input", required=True, help="JSON file produced by the model") - parser.add_argument("--output", required=True, help="output directory") - parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") - parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") - args = parser.parse_args() - - logger = setup_logger() - - with PathManager.open(args.input, "r") as f: - predictions = json.load(f) - - pred_by_image = defaultdict(list) - for p in predictions: - pred_by_image[p["image_id"]].append(p) - - dicts = list(DatasetCatalog.get(args.dataset)) - metadata = MetadataCatalog.get(args.dataset) - if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): - - def dataset_id_map(ds_id): - return metadata.thing_dataset_id_to_contiguous_id[ds_id] - - elif "lvis" in args.dataset: - # LVIS results are in the same format as COCO results, but have a different - # mapping from dataset category id to contiguous category id in [0, #categories - 1] - def dataset_id_map(ds_id): - return ds_id - 1 - - else: - raise ValueError("Unsupported dataset: {}".format(args.dataset)) - - os.makedirs(args.output, exist_ok=True) - - for dic in tqdm.tqdm(dicts): - img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] - basename = os.path.basename(dic["file_name"]) - - predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) - vis = Visualizer(img, metadata) - vis_pred = vis.draw_instance_predictions(predictions).get_image() - - vis = Visualizer(img, metadata) - vis_gt = vis.draw_dataset_dict(dic).get_image() - - concat = np.concatenate((vis_pred, vis_gt), axis=1) - cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]) diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py deleted file mode 100644 index 8b00594..0000000 --- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_datasets.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : datasets.py -@Time : 8/4/19 3:35 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import os -import numpy as np -import random -import torch -import cv2 -from torch.utils import data -from utils.transforms import get_affine_transform - - -class CropDataSet(data.Dataset): - def __init__(self, root, split_name, crop_size=[473, 473], scale_factor=0.25, - rotation_factor=30, ignore_label=255, transform=None): - self.root = root - self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] - self.crop_size = np.asarray(crop_size) - self.ignore_label = ignore_label - self.scale_factor = scale_factor - self.rotation_factor = rotation_factor - self.flip_prob = 0.5 - self.transform = transform - self.split_name = split_name - - list_path = os.path.join(self.root, self.split_name + '.txt') - train_list = [i_id.strip() for i_id in open(list_path)] - - self.train_list = train_list - self.number_samples = len(self.train_list) - - def __len__(self): - return self.number_samples - - def _box2cs(self, box): - x, y, w, h = box[:4] - return self._xywh2cs(x, y, w, h) - - def _xywh2cs(self, x, y, w, h): - center = np.zeros((2), dtype=np.float32) - center[0] = x + w * 0.5 - center[1] = y + h * 0.5 - if w > self.aspect_ratio * h: - h = w * 1.0 / self.aspect_ratio - elif w < self.aspect_ratio * h: - w = h * self.aspect_ratio - scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) - return center, scale - - def __getitem__(self, index): - train_item = self.train_list[index] - - im_path = os.path.join(self.root, self.split_name + '_images', train_item + '.jpg') - parsing_anno_path = os.path.join(self.root, self.split_name + '_segmentations', train_item + '.png') - - im = cv2.imread(im_path, cv2.IMREAD_COLOR) - h, w, _ = im.shape - parsing_anno = np.zeros((h, w), dtype=np.long) - - # Get person center and scale - person_center, s = self._box2cs([0, 0, w - 1, h - 1]) - r = 0 - - if self.split_name != 'test': - # Get pose annotation - parsing_anno = cv2.imread(parsing_anno_path, cv2.IMREAD_GRAYSCALE) - sf = self.scale_factor - rf = self.rotation_factor - s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf) - r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) if random.random() <= 0.6 else 0 - - if random.random() <= self.flip_prob: - im = im[:, ::-1, :] - parsing_anno = parsing_anno[:, ::-1] - person_center[0] = im.shape[1] - person_center[0] - 1 - right_idx = [15, 17, 19] - left_idx = [14, 16, 18] - for i in range(0, 3): - right_pos = np.where(parsing_anno == right_idx[i]) - left_pos = np.where(parsing_anno == left_idx[i]) - parsing_anno[right_pos[0], right_pos[1]] = left_idx[i] - parsing_anno[left_pos[0], left_pos[1]] = right_idx[i] - - trans = get_affine_transform(person_center, s, r, self.crop_size) - input = cv2.warpAffine( - im, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0, 0, 0)) - - if self.transform: - input = self.transform(input) - - meta = { - 'name': train_item, - 'center': person_center, - 'height': h, - 'width': w, - 'scale': s, - 'rotation': r - } - - if self.split_name == 'val' or self.split_name == 'test': - return input, meta - else: - label_parsing = cv2.warpAffine( - parsing_anno, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_NEAREST, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(255)) - - label_parsing = torch.from_numpy(label_parsing) - - return input, label_parsing, meta - - -class CropDataValSet(data.Dataset): - def __init__(self, root, split_name='crop_pic', crop_size=[473, 473], transform=None, flip=False): - self.root = root - self.crop_size = crop_size - self.transform = transform - self.flip = flip - self.split_name = split_name - self.root = root - self.aspect_ratio = crop_size[1] * 1.0 / crop_size[0] - self.crop_size = np.asarray(crop_size) - - list_path = os.path.join(self.root, self.split_name + '.txt') - val_list = [i_id.strip() for i_id in open(list_path)] - - self.val_list = val_list - self.number_samples = len(self.val_list) - - def __len__(self): - return len(self.val_list) - - def _box2cs(self, box): - x, y, w, h = box[:4] - return self._xywh2cs(x, y, w, h) - - def _xywh2cs(self, x, y, w, h): - center = np.zeros((2), dtype=np.float32) - center[0] = x + w * 0.5 - center[1] = y + h * 0.5 - if w > self.aspect_ratio * h: - h = w * 1.0 / self.aspect_ratio - elif w < self.aspect_ratio * h: - w = h * self.aspect_ratio - scale = np.array([w * 1.0, h * 1.0], dtype=np.float32) - - return center, scale - - def __getitem__(self, index): - val_item = self.val_list[index] - # Load training image - im_path = os.path.join(self.root, self.split_name, val_item + '.jpg') - im = cv2.imread(im_path, cv2.IMREAD_COLOR) - h, w, _ = im.shape - # Get person center and scale - person_center, s = self._box2cs([0, 0, w - 1, h - 1]) - r = 0 - trans = get_affine_transform(person_center, s, r, self.crop_size) - input = cv2.warpAffine( - im, - trans, - (int(self.crop_size[1]), int(self.crop_size[0])), - flags=cv2.INTER_LINEAR, - borderMode=cv2.BORDER_CONSTANT, - borderValue=(0, 0, 0)) - input = self.transform(input) - flip_input = input.flip(dims=[-1]) - if self.flip: - batch_input_im = torch.stack([input, flip_input]) - else: - batch_input_im = input - - meta = { - 'name': val_item, - 'center': person_center, - 'height': h, - 'width': w, - 'scale': s, - 'rotation': r - } - - return batch_input_im, meta diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py deleted file mode 100644 index 288e3c8..0000000 --- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_evaluate.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : evaluate.py -@Time : 8/4/19 3:36 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import os -import argparse -import numpy as np -import torch - -from torch.utils import data -from tqdm import tqdm -from PIL import Image as PILImage -import torchvision.transforms as transforms -import torch.backends.cudnn as cudnn - -import networks -from utils.miou import compute_mean_ioU -from utils.transforms import BGR2RGB_transform -from utils.transforms import transform_parsing, transform_logits -from mhp_extension.global_local_parsing.global_local_datasets import CropDataValSet - - -def get_arguments(): - """Parse all the arguments provided from the CLI. - - Returns: - A list of parsed arguments. - """ - parser = argparse.ArgumentParser(description="Self Correction for Human Parsing") - - # Network Structure - parser.add_argument("--arch", type=str, default='resnet101') - # Data Preference - parser.add_argument("--data-dir", type=str, default='./data/LIP') - parser.add_argument("--batch-size", type=int, default=1) - parser.add_argument("--split-name", type=str, default='crop_pic') - parser.add_argument("--input-size", type=str, default='473,473') - parser.add_argument("--num-classes", type=int, default=20) - parser.add_argument("--ignore-label", type=int, default=255) - parser.add_argument("--random-mirror", action="store_true") - parser.add_argument("--random-scale", action="store_true") - # Evaluation Preference - parser.add_argument("--log-dir", type=str, default='./log') - parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar') - parser.add_argument("--gpu", type=str, default='0', help="choose gpu device.") - parser.add_argument("--save-results", action="store_true", help="whether to save the results.") - parser.add_argument("--flip", action="store_true", help="random flip during the test.") - parser.add_argument("--multi-scales", type=str, default='1', help="multiple scales during the test") - return parser.parse_args() - - -def get_palette(num_cls): - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette - - -def multi_scale_testing(model, batch_input_im, crop_size=[473, 473], flip=True, multi_scales=[1]): - flipped_idx = (15, 14, 17, 16, 19, 18) - if len(batch_input_im.shape) > 4: - batch_input_im = batch_input_im.squeeze() - if len(batch_input_im.shape) == 3: - batch_input_im = batch_input_im.unsqueeze(0) - - interp = torch.nn.Upsample(size=crop_size, mode='bilinear', align_corners=True) - ms_outputs = [] - for s in multi_scales: - interp_im = torch.nn.Upsample(scale_factor=s, mode='bilinear', align_corners=True) - scaled_im = interp_im(batch_input_im) - parsing_output = model(scaled_im) - parsing_output = parsing_output[0][-1] - output = parsing_output[0] - if flip: - flipped_output = parsing_output[1] - flipped_output[14:20, :, :] = flipped_output[flipped_idx, :, :] - output += flipped_output.flip(dims=[-1]) - output *= 0.5 - output = interp(output.unsqueeze(0)) - ms_outputs.append(output[0]) - ms_fused_parsing_output = torch.stack(ms_outputs) - ms_fused_parsing_output = ms_fused_parsing_output.mean(0) - ms_fused_parsing_output = ms_fused_parsing_output.permute(1, 2, 0) # HWC - parsing = torch.argmax(ms_fused_parsing_output, dim=2) - parsing = parsing.data.cpu().numpy() - ms_fused_parsing_output = ms_fused_parsing_output.data.cpu().numpy() - return parsing, ms_fused_parsing_output - - -def main(): - """Create the model and start the evaluation process.""" - args = get_arguments() - multi_scales = [float(i) for i in args.multi_scales.split(',')] - gpus = [int(i) for i in args.gpu.split(',')] - assert len(gpus) == 1 - if not args.gpu == 'None': - os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu - - cudnn.benchmark = True - cudnn.enabled = True - - h, w = map(int, args.input_size.split(',')) - input_size = [h, w] - - model = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=None) - - IMAGE_MEAN = model.mean - IMAGE_STD = model.std - INPUT_SPACE = model.input_space - print('image mean: {}'.format(IMAGE_MEAN)) - print('image std: {}'.format(IMAGE_STD)) - print('input space:{}'.format(INPUT_SPACE)) - if INPUT_SPACE == 'BGR': - print('BGR Transformation') - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize(mean=IMAGE_MEAN, - std=IMAGE_STD), - - ]) - if INPUT_SPACE == 'RGB': - print('RGB Transformation') - transform = transforms.Compose([ - transforms.ToTensor(), - BGR2RGB_transform(), - transforms.Normalize(mean=IMAGE_MEAN, - std=IMAGE_STD), - ]) - - # Data loader - lip_test_dataset = CropDataValSet(args.data_dir, args.split_name, crop_size=input_size, transform=transform, - flip=args.flip) - num_samples = len(lip_test_dataset) - print('Totoal testing sample numbers: {}'.format(num_samples)) - testloader = data.DataLoader(lip_test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) - - # Load model weight - state_dict = torch.load(args.model_restore) - from collections import OrderedDict - new_state_dict = OrderedDict() - for k, v in state_dict.items(): - name = k[7:] # remove `module.` - new_state_dict[name] = v - model.load_state_dict(new_state_dict) - model.cuda() - model.eval() - - sp_results_dir = os.path.join(args.log_dir, args.split_name + '_parsing') - if not os.path.exists(sp_results_dir): - os.makedirs(sp_results_dir) - - palette = get_palette(20) - parsing_preds = [] - scales = np.zeros((num_samples, 2), dtype=np.float32) - centers = np.zeros((num_samples, 2), dtype=np.int32) - with torch.no_grad(): - for idx, batch in enumerate(tqdm(testloader)): - image, meta = batch - if (len(image.shape) > 4): - image = image.squeeze() - im_name = meta['name'][0] - c = meta['center'].numpy()[0] - s = meta['scale'].numpy()[0] - w = meta['width'].numpy()[0] - h = meta['height'].numpy()[0] - scales[idx, :] = s - centers[idx, :] = c - parsing, logits = multi_scale_testing(model, image.cuda(), crop_size=input_size, flip=args.flip, - multi_scales=multi_scales) - if args.save_results: - parsing_result = transform_parsing(parsing, c, s, w, h, input_size) - parsing_result_path = os.path.join(sp_results_dir, im_name + '.png') - output_im = PILImage.fromarray(np.asarray(parsing_result, dtype=np.uint8)) - output_im.putpalette(palette) - output_im.save(parsing_result_path) - # save logits - logits_result = transform_logits(logits, c, s, w, h, input_size) - logits_result_path = os.path.join(sp_results_dir, im_name + '.npy') - np.save(logits_result_path, logits_result) - return - - -if __name__ == '__main__': - main() diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py deleted file mode 100644 index 810b1db..0000000 --- a/preprocess/humanparsing/mhp_extension/global_local_parsing/global_local_train.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : train.py -@Time : 8/4/19 3:36 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import os -import json -import timeit -import argparse - -import torch -import torch.optim as optim -import torchvision.transforms as transforms -import torch.backends.cudnn as cudnn -from torch.utils import data - -import networks -import utils.schp as schp -from datasets.datasets import LIPDataSet -from datasets.target_generation import generate_edge_tensor -from utils.transforms import BGR2RGB_transform -from utils.criterion import CriterionAll -from utils.encoding import DataParallelModel, DataParallelCriterion -from utils.warmup_scheduler import SGDRScheduler - - -def get_arguments(): - """Parse all the arguments provided from the CLI. - Returns: - A list of parsed arguments. - """ - parser = argparse.ArgumentParser(description="Self Correction for Human Parsing") - - # Network Structure - parser.add_argument("--arch", type=str, default='resnet101') - # Data Preference - parser.add_argument("--data-dir", type=str, default='./data/LIP') - parser.add_argument("--batch-size", type=int, default=16) - parser.add_argument("--input-size", type=str, default='473,473') - parser.add_argument("--split-name", type=str, default='crop_pic') - parser.add_argument("--num-classes", type=int, default=20) - parser.add_argument("--ignore-label", type=int, default=255) - parser.add_argument("--random-mirror", action="store_true") - parser.add_argument("--random-scale", action="store_true") - # Training Strategy - parser.add_argument("--learning-rate", type=float, default=7e-3) - parser.add_argument("--momentum", type=float, default=0.9) - parser.add_argument("--weight-decay", type=float, default=5e-4) - parser.add_argument("--gpu", type=str, default='0,1,2') - parser.add_argument("--start-epoch", type=int, default=0) - parser.add_argument("--epochs", type=int, default=150) - parser.add_argument("--eval-epochs", type=int, default=10) - parser.add_argument("--imagenet-pretrain", type=str, default='./pretrain_model/resnet101-imagenet.pth') - parser.add_argument("--log-dir", type=str, default='./log') - parser.add_argument("--model-restore", type=str, default='./log/checkpoint.pth.tar') - parser.add_argument("--schp-start", type=int, default=100, help='schp start epoch') - parser.add_argument("--cycle-epochs", type=int, default=10, help='schp cyclical epoch') - parser.add_argument("--schp-restore", type=str, default='./log/schp_checkpoint.pth.tar') - parser.add_argument("--lambda-s", type=float, default=1, help='segmentation loss weight') - parser.add_argument("--lambda-e", type=float, default=1, help='edge loss weight') - parser.add_argument("--lambda-c", type=float, default=0.1, help='segmentation-edge consistency loss weight') - return parser.parse_args() - - -def main(): - args = get_arguments() - print(args) - - start_epoch = 0 - cycle_n = 0 - - if not os.path.exists(args.log_dir): - os.makedirs(args.log_dir) - with open(os.path.join(args.log_dir, 'args.json'), 'w') as opt_file: - json.dump(vars(args), opt_file) - - gpus = [int(i) for i in args.gpu.split(',')] - if not args.gpu == 'None': - os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu - - input_size = list(map(int, args.input_size.split(','))) - - cudnn.enabled = True - cudnn.benchmark = True - - # Model Initialization - AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain) - model = DataParallelModel(AugmentCE2P) - model.cuda() - - IMAGE_MEAN = AugmentCE2P.mean - IMAGE_STD = AugmentCE2P.std - INPUT_SPACE = AugmentCE2P.input_space - print('image mean: {}'.format(IMAGE_MEAN)) - print('image std: {}'.format(IMAGE_STD)) - print('input space:{}'.format(INPUT_SPACE)) - - restore_from = args.model_restore - if os.path.exists(restore_from): - print('Resume training from {}'.format(restore_from)) - checkpoint = torch.load(restore_from) - model.load_state_dict(checkpoint['state_dict']) - start_epoch = checkpoint['epoch'] - - SCHP_AugmentCE2P = networks.init_model(args.arch, num_classes=args.num_classes, pretrained=args.imagenet_pretrain) - schp_model = DataParallelModel(SCHP_AugmentCE2P) - schp_model.cuda() - - if os.path.exists(args.schp_restore): - print('Resuming schp checkpoint from {}'.format(args.schp_restore)) - schp_checkpoint = torch.load(args.schp_restore) - schp_model_state_dict = schp_checkpoint['state_dict'] - cycle_n = schp_checkpoint['cycle_n'] - schp_model.load_state_dict(schp_model_state_dict) - - # Loss Function - criterion = CriterionAll(lambda_1=args.lambda_s, lambda_2=args.lambda_e, lambda_3=args.lambda_c, - num_classes=args.num_classes) - criterion = DataParallelCriterion(criterion) - criterion.cuda() - - # Data Loader - if INPUT_SPACE == 'BGR': - print('BGR Transformation') - transform = transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize(mean=IMAGE_MEAN, - std=IMAGE_STD), - ]) - - elif INPUT_SPACE == 'RGB': - print('RGB Transformation') - transform = transforms.Compose([ - transforms.ToTensor(), - BGR2RGB_transform(), - transforms.Normalize(mean=IMAGE_MEAN, - std=IMAGE_STD), - ]) - - train_dataset = LIPDataSet(args.data_dir, args.split_name, crop_size=input_size, transform=transform) - train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size * len(gpus), - num_workers=16, shuffle=True, pin_memory=True, drop_last=True) - print('Total training samples: {}'.format(len(train_dataset))) - - # Optimizer Initialization - optimizer = optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, - weight_decay=args.weight_decay) - - lr_scheduler = SGDRScheduler(optimizer, total_epoch=args.epochs, - eta_min=args.learning_rate / 100, warmup_epoch=10, - start_cyclical=args.schp_start, cyclical_base_lr=args.learning_rate / 2, - cyclical_epoch=args.cycle_epochs) - - total_iters = args.epochs * len(train_loader) - start = timeit.default_timer() - for epoch in range(start_epoch, args.epochs): - lr_scheduler.step(epoch=epoch) - lr = lr_scheduler.get_lr()[0] - - model.train() - for i_iter, batch in enumerate(train_loader): - i_iter += len(train_loader) * epoch - - images, labels, _ = batch - labels = labels.cuda(non_blocking=True) - - edges = generate_edge_tensor(labels) - labels = labels.type(torch.cuda.LongTensor) - edges = edges.type(torch.cuda.LongTensor) - - preds = model(images) - - # Online Self Correction Cycle with Label Refinement - if cycle_n >= 1: - with torch.no_grad(): - soft_preds = schp_model(images) - soft_parsing = [] - soft_edge = [] - for soft_pred in soft_preds: - soft_parsing.append(soft_pred[0][-1]) - soft_edge.append(soft_pred[1][-1]) - soft_preds = torch.cat(soft_parsing, dim=0) - soft_edges = torch.cat(soft_edge, dim=0) - else: - soft_preds = None - soft_edges = None - - loss = criterion(preds, [labels, edges, soft_preds, soft_edges], cycle_n) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - - if i_iter % 100 == 0: - print('iter = {} of {} completed, lr = {}, loss = {}'.format(i_iter, total_iters, lr, - loss.data.cpu().numpy())) - if (epoch + 1) % (args.eval_epochs) == 0: - schp.save_checkpoint({ - 'epoch': epoch + 1, - 'state_dict': model.state_dict(), - }, False, args.log_dir, filename='checkpoint_{}.pth.tar'.format(epoch + 1)) - - # Self Correction Cycle with Model Aggregation - if (epoch + 1) >= args.schp_start and (epoch + 1 - args.schp_start) % args.cycle_epochs == 0: - print('Self-correction cycle number {}'.format(cycle_n)) - schp.moving_average(schp_model, model, 1.0 / (cycle_n + 1)) - cycle_n += 1 - schp.bn_re_estimate(train_loader, schp_model) - schp.save_schp_checkpoint({ - 'state_dict': schp_model.state_dict(), - 'cycle_n': cycle_n, - }, False, args.log_dir, filename='schp_{}_checkpoint.pth.tar'.format(cycle_n)) - - torch.cuda.empty_cache() - end = timeit.default_timer() - print('epoch = {} of {} completed using {} s'.format(epoch, args.epochs, - (end - start) / (epoch - start_epoch + 1))) - - end = timeit.default_timer() - print('Training Finished in {} seconds'.format(end - start)) - - -if __name__ == '__main__': - main() diff --git a/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py b/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py deleted file mode 100644 index 311edf4..0000000 --- a/preprocess/humanparsing/mhp_extension/global_local_parsing/make_id_list.py +++ /dev/null @@ -1,13 +0,0 @@ -import os - -DATASET = 'VIP' # DATASET: MHPv2 or CIHP or VIP -TYPE = 'crop_pic' # crop_pic or DemoDataset -IMG_DIR = '../demo/cropped_img/crop_pic' -SAVE_DIR = '../demo/cropped_img' - -if not os.path.exists(SAVE_DIR): - os.makedirs(SAVE_DIR) - -with open(os.path.join(SAVE_DIR, TYPE + '.txt'), "w") as f: - for img_name in os.listdir(IMG_DIR): - f.write(img_name[:-4] + '\n') diff --git a/preprocess/humanparsing/mhp_extension/logits_fusion.py b/preprocess/humanparsing/mhp_extension/logits_fusion.py deleted file mode 100644 index 07a8446..0000000 --- a/preprocess/humanparsing/mhp_extension/logits_fusion.py +++ /dev/null @@ -1,307 +0,0 @@ -import argparse -import cv2 -import os -import json -import numpy as np -from PIL import Image as PILImage -import joblib - - -def mask_nms(masks, bbox_scores, instances_confidence_threshold=0.5, overlap_threshold=0.7): - """ - NMS-like procedure used in Panoptic Segmentation - Remove the overlap areas of different instances in Instance Segmentation - """ - panoptic_seg = np.zeros(masks.shape[:2], dtype=np.uint8) - sorted_inds = list(range(len(bbox_scores))) - current_segment_id = 0 - segments_score = [] - - for inst_id in sorted_inds: - score = bbox_scores[inst_id] - if score < instances_confidence_threshold: - break - mask = masks[:, :, inst_id] - mask_area = mask.sum() - - if mask_area == 0: - continue - - intersect = (mask > 0) & (panoptic_seg > 0) - intersect_area = intersect.sum() - - if intersect_area * 1.0 / mask_area > overlap_threshold: - continue - - if intersect_area > 0: - mask = mask & (panoptic_seg == 0) - - current_segment_id += 1 - # panoptic_seg[np.where(mask==1)] = current_segment_id - # panoptic_seg = panoptic_seg + current_segment_id*mask - panoptic_seg = np.where(mask == 0, panoptic_seg, current_segment_id) - segments_score.append(score) - # print(np.unique(panoptic_seg)) - return panoptic_seg, segments_score - - -def extend(si, sj, instance_label, global_label, panoptic_seg_mask, class_map): - """ - """ - directions = [[-1, 0], [0, 1], [1, 0], [0, -1], - [1, 1], [1, -1], [-1, 1], [-1, -1]] - - inst_class = instance_label[si, sj] - human_class = panoptic_seg_mask[si, sj] - global_class = class_map[inst_class] - queue = [[si, sj]] - - while len(queue) != 0: - cur = queue[0] - queue.pop(0) - - for direction in directions: - ni = cur[0] + direction[0] - nj = cur[1] + direction[1] - - if ni >= 0 and nj >= 0 and \ - ni < instance_label.shape[0] and \ - nj < instance_label.shape[1] and \ - instance_label[ni, nj] == 0 and \ - global_label[ni, nj] == global_class: - instance_label[ni, nj] = inst_class - # Using refined instance label to refine human label - panoptic_seg_mask[ni, nj] = human_class - queue.append([ni, nj]) - - -def refine(instance_label, panoptic_seg_mask, global_label, class_map): - """ - Inputs: - [ instance_label ] - np.array() with shape [h, w] - [ global_label ] with shape [h, w] - np.array() - """ - for i in range(instance_label.shape[0]): - for j in range(instance_label.shape[1]): - if instance_label[i, j] != 0: - extend(i, j, instance_label, global_label, panoptic_seg_mask, class_map) - - -def get_palette(num_cls): - """ Returns the color map for visualizing the segmentation mask. - Inputs: - =num_cls= - Number of classes. - Returns: - The color map. - """ - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette - - -def patch2img_output(patch_dir, img_name, img_height, img_width, bbox, bbox_type, num_class): - """transform bbox patch outputs to image output""" - assert bbox_type == 'gt' or 'msrcnn' - output = np.zeros((img_height, img_width, num_class), dtype='float') - output[:, :, 0] = np.inf - count_predictions = np.zeros((img_height, img_width, num_class), dtype='int32') - for i in range(len(bbox)): # person index starts from 1 - file_path = os.path.join(patch_dir, os.path.splitext(img_name)[0] + '_' + str(i + 1) + '_' + bbox_type + '.npy') - bbox_output = np.load(file_path) - output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 1:] += bbox_output[:, :, 1:] - count_predictions[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 1:] += 1 - output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 0] \ - = np.minimum(output[bbox[i][1]:bbox[i][3] + 1, bbox[i][0]:bbox[i][2] + 1, 0], bbox_output[:, :, 0]) - - # Caution zero dividing. - count_predictions[count_predictions == 0] = 1 - return output / count_predictions - - -def get_instance(cat_gt, panoptic_seg_mask): - """ - """ - instance_gt = np.zeros_like(cat_gt, dtype=np.uint8) - num_humans = len(np.unique(panoptic_seg_mask)) - 1 - class_map = {} - - total_part_num = 0 - for id in range(1, num_humans + 1): - human_part_label = np.where(panoptic_seg_mask == id, cat_gt, 0).astype(np.uint8) - # human_part_label = (np.where(panoptic_seg_mask==id) * cat_gt).astype(np.uint8) - part_classes = np.unique(human_part_label) - - exceed = False - for part_id in part_classes: - if part_id == 0: # background - continue - total_part_num += 1 - - if total_part_num > 255: - print("total_part_num exceed, return current instance map: {}".format(total_part_num)) - exceed = True - break - class_map[total_part_num] = part_id - instance_gt[np.where(human_part_label == part_id)] = total_part_num - if exceed: - break - - # Make instance id continous. - ori_cur_labels = np.unique(instance_gt) - total_num_label = len(ori_cur_labels) - if instance_gt.max() + 1 != total_num_label: - for label in range(1, total_num_label): - instance_gt[instance_gt == ori_cur_labels[label]] = label - - final_class_map = {} - for label in range(1, total_num_label): - if label >= 1: - final_class_map[label] = class_map[ori_cur_labels[label]] - - return instance_gt, final_class_map - - -def compute_confidence(im_name, feature_map, class_map, - instance_label, output_dir, - panoptic_seg_mask, seg_score_list): - """ - """ - conf_file = open(os.path.join(output_dir, os.path.splitext(im_name)[0] + '.txt'), 'w') - - weighted_map = np.zeros_like(feature_map[:, :, 0]) - for index, score in enumerate(seg_score_list): - weighted_map += (panoptic_seg_mask == index + 1) * score - - for label in class_map.keys(): - cls = class_map[label] - confidence = feature_map[:, :, cls].reshape(-1)[np.where(instance_label.reshape(-1) == label)] - confidence = (weighted_map * feature_map[:, :, cls].copy()).reshape(-1)[ - np.where(instance_label.reshape(-1) == label)] - - confidence = confidence.sum() / len(confidence) - conf_file.write('{} {}\n'.format(cls, confidence)) - - conf_file.close() - - -def result_saving(fused_output, img_name, img_height, img_width, output_dir, mask_output_path, bbox_score, msrcnn_bbox): - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - global_root = os.path.join(output_dir, 'global_parsing') - instance_root = os.path.join(output_dir, 'instance_parsing') - tag_dir = os.path.join(output_dir, 'global_tag') - - if not os.path.exists(global_root): - os.makedirs(global_root) - if not os.path.exists(instance_root): - os.makedirs(instance_root) - if not os.path.exists(tag_dir): - os.makedirs(tag_dir) - - # For visualizing indexed png image. - palette = get_palette(256) - - fused_output = cv2.resize(fused_output, dsize=(img_width, img_height), interpolation=cv2.INTER_LINEAR) - seg_pred = np.asarray(np.argmax(fused_output, axis=2), dtype=np.uint8) - masks = np.load(mask_output_path) - masks[np.where(seg_pred == 0)] = 0 - - panoptic_seg_mask = masks - seg_score_list = bbox_score - - instance_pred, class_map = get_instance(seg_pred, panoptic_seg_mask) - refine(instance_pred, panoptic_seg_mask, seg_pred, class_map) - - compute_confidence(img_name, fused_output, class_map, instance_pred, instance_root, - panoptic_seg_mask, seg_score_list) - - ins_seg_results = open(os.path.join(tag_dir, os.path.splitext(img_name)[0] + '.txt'), "a") - keep_human_id_list = list(np.unique(panoptic_seg_mask)) - if 0 in keep_human_id_list: - keep_human_id_list.remove(0) - for i in keep_human_id_list: - ins_seg_results.write('{:.6f} {} {} {} {}\n'.format(seg_score_list[i - 1], - int(msrcnn_bbox[i - 1][1]), int(msrcnn_bbox[i - 1][0]), - int(msrcnn_bbox[i - 1][3]), int(msrcnn_bbox[i - 1][2]))) - ins_seg_results.close() - - output_im_global = PILImage.fromarray(seg_pred) - output_im_instance = PILImage.fromarray(instance_pred) - output_im_tag = PILImage.fromarray(panoptic_seg_mask) - output_im_global.putpalette(palette) - output_im_instance.putpalette(palette) - output_im_tag.putpalette(palette) - - output_im_global.save(os.path.join(global_root, os.path.splitext(img_name)[0] + '.png')) - output_im_instance.save(os.path.join(instance_root, os.path.splitext(img_name)[0] + '.png')) - output_im_tag.save(os.path.join(tag_dir, os.path.splitext(img_name)[0] + '.png')) - - -def multi_process(a, args): - img_name = a['im_name'] - img_height = a['img_height'] - img_width = a['img_width'] - msrcnn_bbox = a['person_bbox'] - bbox_score = a['person_bbox_score'] - - ######### loading outputs from gloabl and local models ######### - global_output = np.load(os.path.join(args.global_output_dir, os.path.splitext(img_name)[0] + '.npy')) - - msrcnn_output = patch2img_output(args.msrcnn_output_dir, img_name, img_height, img_width, msrcnn_bbox, - bbox_type='msrcnn', num_class=20) - - gt_output = patch2img_output(args.gt_output_dir, img_name, img_height, img_width, msrcnn_bbox, bbox_type='msrcnn', - num_class=20) - - #### global and local branch logits fusion ##### -# fused_output = global_output + msrcnn_output + gt_output - fused_output = global_output + gt_output - - - mask_output_path = os.path.join(args.mask_output_dir, os.path.splitext(img_name)[0] + '_mask.npy') - result_saving(fused_output, img_name, img_height, img_width, args.save_dir, mask_output_path, bbox_score, msrcnn_bbox) - return - - -def main(args): - json_file = open(args.test_json_path) - anno = json.load(json_file)['root'] - - results = joblib.Parallel(n_jobs=24, verbose=10, pre_dispatch="all")( - [joblib.delayed(multi_process)(a, args) for i, a in enumerate(anno)] - ) - - -def get_arguments(): - parser = argparse.ArgumentParser(description="obtain final prediction by logits fusion") - parser.add_argument("--test_json_path", type=str, default='./data/CIHP/cascade_152_finetune/test.json') - parser.add_argument("--global_output_dir", type=str, - default='./data/CIHP/global/global_result-cihp-resnet101/global_output') -# parser.add_argument("--msrcnn_output_dir", type=str, -# default='./data/CIHP/cascade_152__finetune/msrcnn_result-cihp-resnet101/msrcnn_output') - parser.add_argument("--gt_output_dir", type=str, - default='./data/CIHP/cascade_152__finetune/gt_result-cihp-resnet101/gt_output') - parser.add_argument("--mask_output_dir", type=str, default='./data/CIHP/cascade_152_finetune/mask') - parser.add_argument("--save_dir", type=str, default='./data/CIHP/fusion_results/cihp-msrcnn_finetune') - return parser.parse_args() - - -if __name__ == '__main__': - args = get_arguments() - main(args) diff --git a/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py b/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py deleted file mode 100644 index 1efc5ae..0000000 --- a/preprocess/humanparsing/mhp_extension/make_crop_and_mask_w_mask_nms.py +++ /dev/null @@ -1,134 +0,0 @@ -import numpy as np -import cv2, torch -import os -import json -import argparse -import pycocotools.mask as mask_util -from tqdm import tqdm - - -def bbox_expand(img_height, img_width, bbox, exp_ratio): - x_min, y_min, x_max, y_max = bbox[:] - exp_x = (x_max - x_min) * ((exp_ratio - 1) / 2) - exp_y = (y_max - y_min) * ((exp_ratio - 1) / 2) - new_x_min = 0 if x_min - exp_x < 0 else np.round(x_min - exp_x) - new_y_min = 0 if y_min - exp_y < 0 else np.round(y_min - exp_y) - new_x_max = img_width - 1 if x_max + exp_x > img_width - 1 else np.round(x_max + exp_x) - new_y_max = img_height - 1 if y_max + exp_y > img_height - 1 else np.round(y_max + exp_y) - return int(new_x_min), int(new_y_min), int(new_x_max), int(new_y_max) - - -def make_crop_and_mask(img_info, pred, file_list, crop_save_dir, mask_save_dir, args): - img_name = img_info['file_name'] - img_id = img_info['id'] - 1 # img_info['id'] start form 1 - img_w = img_info['width'] - img_h = img_info['height'] - - img = cv2.imread(os.path.join(args.img_dir, img_name)) - - exp_bbox = [] - ori_bbox = [] - bbox_name_list = [] - bbox_score_list = [] - person_idx = 0 - - panoptic_seg = np.zeros((img_h, img_w), dtype=np.uint8) - assert len(pred[img_id]['instances']) > 0, 'image without instance prediction' - - for instance in pred[img_id]['instances']: - score = instance['score'] - if score < args.conf_thres: - break - - mask = mask_util.decode(instance['segmentation']) - mask_area = mask.sum() - - if mask_area == 0: # if mask_area < img_w*img_h/1000: - continue - - intersect = (mask > 0) & (panoptic_seg > 0) - intersect_area = intersect.sum() - - if intersect_area * 1.0 / mask_area > args.overlap_threshold: # todo add args - continue - - if intersect_area > 0: - mask = mask & (panoptic_seg == 0) - - person_idx += 1 - panoptic_seg = np.where(mask == 0, panoptic_seg, person_idx) - - bbox_score_list.append(score) - - ins_bbox = instance['bbox'] # [x,y,w,h] format - x_min, y_min, box_w, box_h = ins_bbox - x_max, y_max = x_min + box_w, y_min + box_h - exp_x_min, exp_y_min, exp_x_max, exp_y_max = bbox_expand(img_h, img_w, [x_min, y_min, x_max, y_max], - args.exp_ratio) - crop_img = img[exp_y_min:exp_y_max + 1, exp_x_min:exp_x_max + 1, :] - exp_bbox.append([exp_x_min, exp_y_min, exp_x_max, exp_y_max]) - ori_bbox.append([x_min, y_min, x_max, y_max]) - bbox_name = os.path.splitext(img_name)[0] + '_' + str(person_idx) + '_msrcnn.jpg' - bbox_name_list.append(bbox_name) - - cv2.imwrite(os.path.join(crop_save_dir, bbox_name), crop_img) - - assert person_idx > 0, 'image without instance' - mask_name = os.path.splitext(img_name)[0] + '_mask.npy' - np.save(os.path.join(mask_save_dir, mask_name), panoptic_seg) - - ############## json writing ################## - item = {} - item['dataset'] = 'CIHP' - item['im_name'] = img_name - item['img_height'] = img_h - item['img_width'] = img_w - item['center'] = [img_h / 2, img_w / 2] - item['person_num'] = person_idx - item['person_bbox'] = exp_bbox - item['real_person_bbox'] = ori_bbox - item['person_bbox_score'] = bbox_score_list - item['bbox_name'] = bbox_name_list - item['mask_name'] = mask_name - file_list.append(item) - json_file = {'root': file_list} - return json_file, file_list - - -def get_arguments(): - parser = argparse.ArgumentParser(description="crop person val/test demo for inference") - parser.add_argument("--exp_ratio", type=float, default=1.2) - parser.add_argument("--overlap_threshold", type=float, default=0.5) - parser.add_argument("--conf_thres", type=float, default=0.5) - parser.add_argument("--img_dir", type=str, - default='/data03/v_xuyunqiu/data/instance-level_human_parsing/Testing/Images') - parser.add_argument("--save_dir", type=str, - default='/data03/v_xuyunqiu/Projects/experiment_data/testing/resnest_200_TTA_mask_nms_all_data') - parser.add_argument("--img_list", type=str, - default='/data03/v_xuyunqiu/Projects/pycococreator/annotations/CIHP_test.json') - parser.add_argument("--det_res", type=str, - default='/data02/v_xuyunqiu/detectron2-ResNeSt/tools/output_cihp_inference_resnest/inference_TTA/instances_predictions.pth') - return parser.parse_args() - - -def main(args): - img_info_list = json.load(open(args.img_list, encoding='UTF-8')) - pred = torch.load(args.det_res) - - crop_save_dir = os.path.join(args.save_dir, 'crop_pic') - if not os.path.exists(crop_save_dir): - os.makedirs(crop_save_dir) - mask_save_dir = os.path.join(args.save_dir, 'crop_mask') - if not os.path.exists(mask_save_dir): - os.makedirs(mask_save_dir) - - file_list = [] - for img_info in tqdm(img_info_list['images']): - json_file, file_list = make_crop_and_mask(img_info, pred, file_list, crop_save_dir, mask_save_dir, args) - with open(os.path.join(args.save_dir, 'crop.json'), 'w') as f: - json.dump(json_file, f, indent=2) - - -if __name__ == '__main__': - args = get_arguments() - main(args) diff --git a/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh b/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh deleted file mode 100644 index 37a1e7d..0000000 --- a/preprocess/humanparsing/mhp_extension/scripts/make_coco_style_annotation.sh +++ /dev/null @@ -1,14 +0,0 @@ -python ./coco_style_annotation_creator/human_to_coco.py \ - --dataset 'CIHP' \ - --json_save_dir './data/CIHP/annotations' \ - --train_img_dir './data/CIHP/Training/Images' \ - --train_anno_dir './data/CIHP/Training/Human_ids' \ - --val_img_dir './data/CIHP/Validation/Images' \ - --val_anno_dir './data/CIHP/Validation/Human_ids' - - -python ./coco_style_annotation_creator/test_human2coco_format.py \ - --dataset 'CIHP' \ - --json_save_dir './data/CIHP/annotations' \ - --test_img_dir './data/CIHP/Testing/Images' - diff --git a/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh b/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh deleted file mode 100644 index 604a433..0000000 --- a/preprocess/humanparsing/mhp_extension/scripts/make_crop.sh +++ /dev/null @@ -1,6 +0,0 @@ -python make_crop_and_mask_w_mask_nms.py \ - --img_dir './data/CIHP/Testing/Images' \ - --save_dir './data/CIHP/' \ - --img_list './data/CIHP/annotations/CIHP_val.json' \ - --det_res './data/CIHP/detectron2_prediction/inference/instances_predictions.pth' - diff --git a/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh b/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh deleted file mode 100644 index 107bcf6..0000000 --- a/preprocess/humanparsing/mhp_extension/scripts/parsing_fusion.sh +++ /dev/null @@ -1,6 +0,0 @@ -python logits_fusion.py \ ---test_json_path ./data/CIHP/crop.json \ ---global_output_dir ./data/CIHP/global_pic_parsing \ ---msrcnn_output_dir ./data/CIHP/crop_pic_parsing \ ---gt_output_dir ./data/CIHP/crop_pic_parsing \ ---save_dir ./data/CIHP/mhp_fusion_parsing diff --git a/preprocess/humanparsing/modules/__init__.py b/preprocess/humanparsing/modules/__init__.py deleted file mode 100644 index 8a098de..0000000 --- a/preprocess/humanparsing/modules/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .bn import ABN, InPlaceABN, InPlaceABNSync -from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE -from .misc import GlobalAvgPool2d, SingleGPU -from .residual import IdentityResidualBlock -from .dense import DenseModule diff --git a/preprocess/humanparsing/modules/bn.py b/preprocess/humanparsing/modules/bn.py deleted file mode 100644 index a794698..0000000 --- a/preprocess/humanparsing/modules/bn.py +++ /dev/null @@ -1,132 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as functional - -try: - from queue import Queue -except ImportError: - from Queue import Queue - -from .functions import * - - -class ABN(nn.Module): - """Activated Batch Normalization - - This gathers a `BatchNorm2d` and an activation function in a single module - """ - - def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): - """Creates an Activated Batch Normalization module - - Parameters - ---------- - num_features : int - Number of feature channels in the input and output. - eps : float - Small constant to prevent numerical issues. - momentum : float - Momentum factor applied to compute running statistics as. - affine : bool - If `True` apply learned scale and shift transformation after normalization. - activation : str - Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. - slope : float - Negative slope for the `leaky_relu` activation. - """ - super(ABN, self).__init__() - self.num_features = num_features - self.affine = affine - self.eps = eps - self.momentum = momentum - self.activation = activation - self.slope = slope - if self.affine: - self.weight = nn.Parameter(torch.ones(num_features)) - self.bias = nn.Parameter(torch.zeros(num_features)) - else: - self.register_parameter('weight', None) - self.register_parameter('bias', None) - self.register_buffer('running_mean', torch.zeros(num_features)) - self.register_buffer('running_var', torch.ones(num_features)) - self.reset_parameters() - - def reset_parameters(self): - nn.init.constant_(self.running_mean, 0) - nn.init.constant_(self.running_var, 1) - if self.affine: - nn.init.constant_(self.weight, 1) - nn.init.constant_(self.bias, 0) - - def forward(self, x): - x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias, - self.training, self.momentum, self.eps) - - if self.activation == ACT_RELU: - return functional.relu(x, inplace=True) - elif self.activation == ACT_LEAKY_RELU: - return functional.leaky_relu(x, negative_slope=self.slope, inplace=True) - elif self.activation == ACT_ELU: - return functional.elu(x, inplace=True) - else: - return x - - def __repr__(self): - rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ - ' affine={affine}, activation={activation}' - if self.activation == "leaky_relu": - rep += ', slope={slope})' - else: - rep += ')' - return rep.format(name=self.__class__.__name__, **self.__dict__) - - -class InPlaceABN(ABN): - """InPlace Activated Batch Normalization""" - - def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01): - """Creates an InPlace Activated Batch Normalization module - - Parameters - ---------- - num_features : int - Number of feature channels in the input and output. - eps : float - Small constant to prevent numerical issues. - momentum : float - Momentum factor applied to compute running statistics as. - affine : bool - If `True` apply learned scale and shift transformation after normalization. - activation : str - Name of the activation functions, one of: `leaky_relu`, `elu` or `none`. - slope : float - Negative slope for the `leaky_relu` activation. - """ - super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope) - - def forward(self, x): - x, _, _ = inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var, - self.training, self.momentum, self.eps, self.activation, self.slope) - return x - - -class InPlaceABNSync(ABN): - """InPlace Activated Batch Normalization with cross-GPU synchronization - This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`. - """ - - def forward(self, x): - x, _, _ = inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var, - self.training, self.momentum, self.eps, self.activation, self.slope) - return x - - def __repr__(self): - rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \ - ' affine={affine}, activation={activation}' - if self.activation == "leaky_relu": - rep += ', slope={slope})' - else: - rep += ')' - return rep.format(name=self.__class__.__name__, **self.__dict__) - - diff --git a/preprocess/humanparsing/modules/deeplab.py b/preprocess/humanparsing/modules/deeplab.py deleted file mode 100644 index fd25b78..0000000 --- a/preprocess/humanparsing/modules/deeplab.py +++ /dev/null @@ -1,84 +0,0 @@ -import torch -import torch.nn as nn -import torch.nn.functional as functional - -from models._util import try_index -from .bn import ABN - - -class DeeplabV3(nn.Module): - def __init__(self, - in_channels, - out_channels, - hidden_channels=256, - dilations=(12, 24, 36), - norm_act=ABN, - pooling_size=None): - super(DeeplabV3, self).__init__() - self.pooling_size = pooling_size - - self.map_convs = nn.ModuleList([ - nn.Conv2d(in_channels, hidden_channels, 1, bias=False), - nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]), - nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]), - nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2]) - ]) - self.map_bn = norm_act(hidden_channels * 4) - - self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False) - self.global_pooling_bn = norm_act(hidden_channels) - - self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False) - self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False) - self.red_bn = norm_act(out_channels) - - self.reset_parameters(self.map_bn.activation, self.map_bn.slope) - - def reset_parameters(self, activation, slope): - gain = nn.init.calculate_gain(activation, slope) - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.xavier_normal_(m.weight.data, gain) - if hasattr(m, "bias") and m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, ABN): - if hasattr(m, "weight") and m.weight is not None: - nn.init.constant_(m.weight, 1) - if hasattr(m, "bias") and m.bias is not None: - nn.init.constant_(m.bias, 0) - - def forward(self, x): - # Map convolutions - out = torch.cat([m(x) for m in self.map_convs], dim=1) - out = self.map_bn(out) - out = self.red_conv(out) - - # Global pooling - pool = self._global_pooling(x) - pool = self.global_pooling_conv(pool) - pool = self.global_pooling_bn(pool) - pool = self.pool_red_conv(pool) - if self.training or self.pooling_size is None: - pool = pool.repeat(1, 1, x.size(2), x.size(3)) - - out += pool - out = self.red_bn(out) - return out - - def _global_pooling(self, x): - if self.training or self.pooling_size is None: - pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1) - pool = pool.view(x.size(0), x.size(1), 1, 1) - else: - pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]), - min(try_index(self.pooling_size, 1), x.shape[3])) - padding = ( - (pooling_size[1] - 1) // 2, - (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1, - (pooling_size[0] - 1) // 2, - (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1 - ) - - pool = functional.avg_pool2d(x, pooling_size, stride=1) - pool = functional.pad(pool, pad=padding, mode="replicate") - return pool diff --git a/preprocess/humanparsing/modules/dense.py b/preprocess/humanparsing/modules/dense.py deleted file mode 100644 index 9638d6e..0000000 --- a/preprocess/humanparsing/modules/dense.py +++ /dev/null @@ -1,42 +0,0 @@ -from collections import OrderedDict - -import torch -import torch.nn as nn - -from .bn import ABN - - -class DenseModule(nn.Module): - def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1): - super(DenseModule, self).__init__() - self.in_channels = in_channels - self.growth = growth - self.layers = layers - - self.convs1 = nn.ModuleList() - self.convs3 = nn.ModuleList() - for i in range(self.layers): - self.convs1.append(nn.Sequential(OrderedDict([ - ("bn", norm_act(in_channels)), - ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False)) - ]))) - self.convs3.append(nn.Sequential(OrderedDict([ - ("bn", norm_act(self.growth * bottleneck_factor)), - ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False, - dilation=dilation)) - ]))) - in_channels += self.growth - - @property - def out_channels(self): - return self.in_channels + self.growth * self.layers - - def forward(self, x): - inputs = [x] - for i in range(self.layers): - x = torch.cat(inputs, dim=1) - x = self.convs1[i](x) - x = self.convs3[i](x) - inputs += [x] - - return torch.cat(inputs, dim=1) diff --git a/preprocess/humanparsing/modules/functions.py b/preprocess/humanparsing/modules/functions.py deleted file mode 100644 index 4b28372..0000000 --- a/preprocess/humanparsing/modules/functions.py +++ /dev/null @@ -1,245 +0,0 @@ -import pdb -from os import path -import torch -import torch.distributed as dist -import torch.autograd as autograd -import torch.cuda.comm as comm -from torch.autograd.function import once_differentiable -from torch.utils.cpp_extension import load - -_src_path = path.join(path.dirname(path.abspath(__file__)), "src") -_backend = load(name="inplace_abn", - extra_cflags=["-O3"], - sources=[path.join(_src_path, f) for f in [ - "inplace_abn.cpp", - "inplace_abn_cpu.cpp", - "inplace_abn_cuda.cu", - "inplace_abn_cuda_half.cu" - ]], - extra_cuda_cflags=["--expt-extended-lambda"]) - -# Activation names -ACT_RELU = "relu" -ACT_LEAKY_RELU = "leaky_relu" -ACT_ELU = "elu" -ACT_NONE = "none" - - -def _check(fn, *args, **kwargs): - success = fn(*args, **kwargs) - if not success: - raise RuntimeError("CUDA Error encountered in {}".format(fn)) - - -def _broadcast_shape(x): - out_size = [] - for i, s in enumerate(x.size()): - if i != 1: - out_size.append(1) - else: - out_size.append(s) - return out_size - - -def _reduce(x): - if len(x.size()) == 2: - return x.sum(dim=0) - else: - n, c = x.size()[0:2] - return x.contiguous().view((n, c, -1)).sum(2).sum(0) - - -def _count_samples(x): - count = 1 - for i, s in enumerate(x.size()): - if i != 1: - count *= s - return count - - -def _act_forward(ctx, x): - if ctx.activation == ACT_LEAKY_RELU: - _backend.leaky_relu_forward(x, ctx.slope) - elif ctx.activation == ACT_ELU: - _backend.elu_forward(x) - elif ctx.activation == ACT_NONE: - pass - - -def _act_backward(ctx, x, dx): - if ctx.activation == ACT_LEAKY_RELU: - _backend.leaky_relu_backward(x, dx, ctx.slope) - elif ctx.activation == ACT_ELU: - _backend.elu_backward(x, dx) - elif ctx.activation == ACT_NONE: - pass - - -class InPlaceABN(autograd.Function): - @staticmethod - def forward(ctx, x, weight, bias, running_mean, running_var, - training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01): - # Save context - ctx.training = training - ctx.momentum = momentum - ctx.eps = eps - ctx.activation = activation - ctx.slope = slope - ctx.affine = weight is not None and bias is not None - - # Prepare inputs - count = _count_samples(x) - x = x.contiguous() - weight = weight.contiguous() if ctx.affine else x.new_empty(0) - bias = bias.contiguous() if ctx.affine else x.new_empty(0) - - if ctx.training: - mean, var = _backend.mean_var(x) - - # Update running stats - running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) - running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1)) - - # Mark in-place modified tensors - ctx.mark_dirty(x, running_mean, running_var) - else: - mean, var = running_mean.contiguous(), running_var.contiguous() - ctx.mark_dirty(x) - - # BN forward + activation - _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) - _act_forward(ctx, x) - - # Output - ctx.var = var - ctx.save_for_backward(x, var, weight, bias) - ctx.mark_non_differentiable(running_mean, running_var) - return x, running_mean, running_var - - @staticmethod - @once_differentiable - def backward(ctx, dz, _drunning_mean, _drunning_var): - z, var, weight, bias = ctx.saved_tensors - dz = dz.contiguous() - - # Undo activation - _act_backward(ctx, z, dz) - - if ctx.training: - edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) - else: - # TODO: implement simplified CUDA backward for inference mode - edz = dz.new_zeros(dz.size(1)) - eydz = dz.new_zeros(dz.size(1)) - - dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) - # dweight = eydz * weight.sign() if ctx.affine else None - dweight = eydz if ctx.affine else None - if dweight is not None: - dweight[weight < 0] *= -1 - dbias = edz if ctx.affine else None - - return dx, dweight, dbias, None, None, None, None, None, None, None - - -class InPlaceABNSync(autograd.Function): - @classmethod - def forward(cls, ctx, x, weight, bias, running_mean, running_var, - training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True): - # Save context - ctx.training = training - ctx.momentum = momentum - ctx.eps = eps - ctx.activation = activation - ctx.slope = slope - ctx.affine = weight is not None and bias is not None - - # Prepare inputs - ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1 - - # count = _count_samples(x) - batch_size = x.new_tensor([x.shape[0]], dtype=torch.long) - - x = x.contiguous() - weight = weight.contiguous() if ctx.affine else x.new_empty(0) - bias = bias.contiguous() if ctx.affine else x.new_empty(0) - - if ctx.training: - mean, var = _backend.mean_var(x) - if ctx.world_size > 1: - # get global batch size - if equal_batches: - batch_size *= ctx.world_size - else: - dist.all_reduce(batch_size, dist.ReduceOp.SUM) - - ctx.factor = x.shape[0] / float(batch_size.item()) - - mean_all = mean.clone() * ctx.factor - dist.all_reduce(mean_all, dist.ReduceOp.SUM) - - var_all = (var + (mean - mean_all) ** 2) * ctx.factor - dist.all_reduce(var_all, dist.ReduceOp.SUM) - - mean = mean_all - var = var_all - - # Update running stats - running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean) - count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1] - running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1))) - - # Mark in-place modified tensors - ctx.mark_dirty(x, running_mean, running_var) - else: - mean, var = running_mean.contiguous(), running_var.contiguous() - ctx.mark_dirty(x) - - # BN forward + activation - _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps) - _act_forward(ctx, x) - - # Output - ctx.var = var - ctx.save_for_backward(x, var, weight, bias) - ctx.mark_non_differentiable(running_mean, running_var) - return x, running_mean, running_var - - @staticmethod - @once_differentiable - def backward(ctx, dz, _drunning_mean, _drunning_var): - z, var, weight, bias = ctx.saved_tensors - dz = dz.contiguous() - - # Undo activation - _act_backward(ctx, z, dz) - - if ctx.training: - edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps) - edz_local = edz.clone() - eydz_local = eydz.clone() - - if ctx.world_size > 1: - edz *= ctx.factor - dist.all_reduce(edz, dist.ReduceOp.SUM) - - eydz *= ctx.factor - dist.all_reduce(eydz, dist.ReduceOp.SUM) - else: - edz_local = edz = dz.new_zeros(dz.size(1)) - eydz_local = eydz = dz.new_zeros(dz.size(1)) - - dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps) - # dweight = eydz_local * weight.sign() if ctx.affine else None - dweight = eydz_local if ctx.affine else None - if dweight is not None: - dweight[weight < 0] *= -1 - dbias = edz_local if ctx.affine else None - - return dx, dweight, dbias, None, None, None, None, None, None, None - - -inplace_abn = InPlaceABN.apply -inplace_abn_sync = InPlaceABNSync.apply - -__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"] diff --git a/preprocess/humanparsing/modules/misc.py b/preprocess/humanparsing/modules/misc.py deleted file mode 100644 index 3c50b69..0000000 --- a/preprocess/humanparsing/modules/misc.py +++ /dev/null @@ -1,21 +0,0 @@ -import torch.nn as nn -import torch -import torch.distributed as dist - -class GlobalAvgPool2d(nn.Module): - def __init__(self): - """Global average pooling over the input's spatial dimensions""" - super(GlobalAvgPool2d, self).__init__() - - def forward(self, inputs): - in_size = inputs.size() - return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2) - -class SingleGPU(nn.Module): - def __init__(self, module): - super(SingleGPU, self).__init__() - self.module=module - - def forward(self, input): - return self.module(input.cuda(non_blocking=True)) - diff --git a/preprocess/humanparsing/modules/residual.py b/preprocess/humanparsing/modules/residual.py deleted file mode 100644 index 8a5c90e..0000000 --- a/preprocess/humanparsing/modules/residual.py +++ /dev/null @@ -1,182 +0,0 @@ -from collections import OrderedDict - -import torch.nn as nn - -from .bn import ABN, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE -import torch.nn.functional as functional - - -class ResidualBlock(nn.Module): - """Configurable residual block - - Parameters - ---------- - in_channels : int - Number of input channels. - channels : list of int - Number of channels in the internal feature maps. Can either have two or three elements: if three construct - a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then - `3 x 3` then `1 x 1` convolutions. - stride : int - Stride of the first `3 x 3` convolution - dilation : int - Dilation to apply to the `3 x 3` convolutions. - groups : int - Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with - bottleneck blocks. - norm_act : callable - Function to create normalization / activation Module. - dropout: callable - Function to create Dropout Module. - """ - - def __init__(self, - in_channels, - channels, - stride=1, - dilation=1, - groups=1, - norm_act=ABN, - dropout=None): - super(ResidualBlock, self).__init__() - - # Check parameters for inconsistencies - if len(channels) != 2 and len(channels) != 3: - raise ValueError("channels must contain either two or three values") - if len(channels) == 2 and groups != 1: - raise ValueError("groups > 1 are only valid if len(channels) == 3") - - is_bottleneck = len(channels) == 3 - need_proj_conv = stride != 1 or in_channels != channels[-1] - - if not is_bottleneck: - bn2 = norm_act(channels[1]) - bn2.activation = ACT_NONE - layers = [ - ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, - dilation=dilation)), - ("bn1", norm_act(channels[0])), - ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, - dilation=dilation)), - ("bn2", bn2) - ] - if dropout is not None: - layers = layers[0:2] + [("dropout", dropout())] + layers[2:] - else: - bn3 = norm_act(channels[2]) - bn3.activation = ACT_NONE - layers = [ - ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=1, padding=0, bias=False)), - ("bn1", norm_act(channels[0])), - ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=stride, padding=dilation, bias=False, - groups=groups, dilation=dilation)), - ("bn2", norm_act(channels[1])), - ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)), - ("bn3", bn3) - ] - if dropout is not None: - layers = layers[0:4] + [("dropout", dropout())] + layers[4:] - self.convs = nn.Sequential(OrderedDict(layers)) - - if need_proj_conv: - self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) - self.proj_bn = norm_act(channels[-1]) - self.proj_bn.activation = ACT_NONE - - def forward(self, x): - if hasattr(self, "proj_conv"): - residual = self.proj_conv(x) - residual = self.proj_bn(residual) - else: - residual = x - x = self.convs(x) + residual - - if self.convs.bn1.activation == ACT_LEAKY_RELU: - return functional.leaky_relu(x, negative_slope=self.convs.bn1.slope, inplace=True) - elif self.convs.bn1.activation == ACT_ELU: - return functional.elu(x, inplace=True) - else: - return x - - -class IdentityResidualBlock(nn.Module): - def __init__(self, - in_channels, - channels, - stride=1, - dilation=1, - groups=1, - norm_act=ABN, - dropout=None): - """Configurable identity-mapping residual block - - Parameters - ---------- - in_channels : int - Number of input channels. - channels : list of int - Number of channels in the internal feature maps. Can either have two or three elements: if three construct - a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then - `3 x 3` then `1 x 1` convolutions. - stride : int - Stride of the first `3 x 3` convolution - dilation : int - Dilation to apply to the `3 x 3` convolutions. - groups : int - Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with - bottleneck blocks. - norm_act : callable - Function to create normalization / activation Module. - dropout: callable - Function to create Dropout Module. - """ - super(IdentityResidualBlock, self).__init__() - - # Check parameters for inconsistencies - if len(channels) != 2 and len(channels) != 3: - raise ValueError("channels must contain either two or three values") - if len(channels) == 2 and groups != 1: - raise ValueError("groups > 1 are only valid if len(channels) == 3") - - is_bottleneck = len(channels) == 3 - need_proj_conv = stride != 1 or in_channels != channels[-1] - - self.bn1 = norm_act(in_channels) - if not is_bottleneck: - layers = [ - ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False, - dilation=dilation)), - ("bn2", norm_act(channels[0])), - ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, - dilation=dilation)) - ] - if dropout is not None: - layers = layers[0:2] + [("dropout", dropout())] + layers[2:] - else: - layers = [ - ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)), - ("bn2", norm_act(channels[0])), - ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False, - groups=groups, dilation=dilation)), - ("bn3", norm_act(channels[1])), - ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False)) - ] - if dropout is not None: - layers = layers[0:4] + [("dropout", dropout())] + layers[4:] - self.convs = nn.Sequential(OrderedDict(layers)) - - if need_proj_conv: - self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False) - - def forward(self, x): - if hasattr(self, "proj_conv"): - bn1 = self.bn1(x) - shortcut = self.proj_conv(bn1) - else: - shortcut = x.clone() - bn1 = self.bn1(x) - - out = self.convs(bn1) - out.add_(shortcut) - - return out diff --git a/preprocess/humanparsing/modules/src/checks.h b/preprocess/humanparsing/modules/src/checks.h deleted file mode 100644 index e761a6f..0000000 --- a/preprocess/humanparsing/modules/src/checks.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT -#ifndef AT_CHECK -#define AT_CHECK AT_ASSERT -#endif - -#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") -#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") -#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") - -#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) -#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) \ No newline at end of file diff --git a/preprocess/humanparsing/modules/src/inplace_abn.cpp b/preprocess/humanparsing/modules/src/inplace_abn.cpp deleted file mode 100644 index 0a6b112..0000000 --- a/preprocess/humanparsing/modules/src/inplace_abn.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include - -#include - -#include "inplace_abn.h" - -std::vector mean_var(at::Tensor x) { - if (x.is_cuda()) { - if (x.type().scalarType() == at::ScalarType::Half) { - return mean_var_cuda_h(x); - } else { - return mean_var_cuda(x); - } - } else { - return mean_var_cpu(x); - } -} - -at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - if (x.is_cuda()) { - if (x.type().scalarType() == at::ScalarType::Half) { - return forward_cuda_h(x, mean, var, weight, bias, affine, eps); - } else { - return forward_cuda(x, mean, var, weight, bias, affine, eps); - } - } else { - return forward_cpu(x, mean, var, weight, bias, affine, eps); - } -} - -std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - if (z.is_cuda()) { - if (z.type().scalarType() == at::ScalarType::Half) { - return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps); - } else { - return edz_eydz_cuda(z, dz, weight, bias, affine, eps); - } - } else { - return edz_eydz_cpu(z, dz, weight, bias, affine, eps); - } -} - -at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps) { - if (z.is_cuda()) { - if (z.type().scalarType() == at::ScalarType::Half) { - return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps); - } else { - return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps); - } - } else { - return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps); - } -} - -void leaky_relu_forward(at::Tensor z, float slope) { - at::leaky_relu_(z, slope); -} - -void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) { - if (z.is_cuda()) { - if (z.type().scalarType() == at::ScalarType::Half) { - return leaky_relu_backward_cuda_h(z, dz, slope); - } else { - return leaky_relu_backward_cuda(z, dz, slope); - } - } else { - return leaky_relu_backward_cpu(z, dz, slope); - } -} - -void elu_forward(at::Tensor z) { - at::elu_(z); -} - -void elu_backward(at::Tensor z, at::Tensor dz) { - if (z.is_cuda()) { - return elu_backward_cuda(z, dz); - } else { - return elu_backward_cpu(z, dz); - } -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("mean_var", &mean_var, "Mean and variance computation"); - m.def("forward", &forward, "In-place forward computation"); - m.def("edz_eydz", &edz_eydz, "First part of backward computation"); - m.def("backward", &backward, "Second part of backward computation"); - m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation"); - m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion"); - m.def("elu_forward", &elu_forward, "Elu forward computation"); - m.def("elu_backward", &elu_backward, "Elu backward computation and inversion"); -} diff --git a/preprocess/humanparsing/modules/src/inplace_abn.h b/preprocess/humanparsing/modules/src/inplace_abn.h deleted file mode 100644 index 17afd11..0000000 --- a/preprocess/humanparsing/modules/src/inplace_abn.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include - -#include - -std::vector mean_var_cpu(at::Tensor x); -std::vector mean_var_cuda(at::Tensor x); -std::vector mean_var_cuda_h(at::Tensor x); - -at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps); -at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps); -at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps); - -std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps); -std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps); -std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps); - -at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps); -at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps); -at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps); - -void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope); -void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope); -void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope); - -void elu_backward_cpu(at::Tensor z, at::Tensor dz); -void elu_backward_cuda(at::Tensor z, at::Tensor dz); - -static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) { - num = x.size(0); - chn = x.size(1); - sp = 1; - for (int64_t i = 2; i < x.ndimension(); ++i) - sp *= x.size(i); -} - -/* - * Specialized CUDA reduction functions for BN - */ -#ifdef __CUDACC__ - -#include "utils/cuda.cuh" - -template -__device__ T reduce(Op op, int plane, int N, int S) { - T sum = (T)0; - for (int batch = 0; batch < N; ++batch) { - for (int x = threadIdx.x; x < S; x += blockDim.x) { - sum += op(batch, plane, x); - } - } - - // sum over NumThreads within a warp - sum = warpSum(sum); - - // 'transpose', and reduce within warp again - __shared__ T shared[32]; - __syncthreads(); - if (threadIdx.x % WARP_SIZE == 0) { - shared[threadIdx.x / WARP_SIZE] = sum; - } - if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { - // zero out the other entries in shared - shared[threadIdx.x] = (T)0; - } - __syncthreads(); - if (threadIdx.x / WARP_SIZE == 0) { - sum = warpSum(shared[threadIdx.x]); - if (threadIdx.x == 0) { - shared[0] = sum; - } - } - __syncthreads(); - - // Everyone picks it up, should be broadcast into the whole gradInput - return shared[0]; -} -#endif diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp b/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp deleted file mode 100644 index ffc6d38..0000000 --- a/preprocess/humanparsing/modules/src/inplace_abn_cpu.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include - -#include - -#include "utils/checks.h" -#include "inplace_abn.h" - -at::Tensor reduce_sum(at::Tensor x) { - if (x.ndimension() == 2) { - return x.sum(0); - } else { - auto x_view = x.view({x.size(0), x.size(1), -1}); - return x_view.sum(-1).sum(0); - } -} - -at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { - if (x.ndimension() == 2) { - return v; - } else { - std::vector broadcast_size = {1, -1}; - for (int64_t i = 2; i < x.ndimension(); ++i) - broadcast_size.push_back(1); - - return v.view(broadcast_size); - } -} - -int64_t count(at::Tensor x) { - int64_t count = x.size(0); - for (int64_t i = 2; i < x.ndimension(); ++i) - count *= x.size(i); - - return count; -} - -at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) { - if (affine) { - return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z); - } else { - return z; - } -} - -std::vector mean_var_cpu(at::Tensor x) { - auto num = count(x); - auto mean = reduce_sum(x) / num; - auto diff = x - broadcast_to(mean, x); - auto var = reduce_sum(diff.pow(2)) / num; - - return {mean, var}; -} - -at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var); - auto mul = at::rsqrt(var + eps) * gamma; - - x.sub_(broadcast_to(mean, x)); - x.mul_(broadcast_to(mul, x)); - if (affine) x.add_(broadcast_to(bias, x)); - - return x; -} - -std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - auto edz = reduce_sum(dz); - auto y = invert_affine(z, weight, bias, affine, eps); - auto eydz = reduce_sum(y * dz); - - return {edz, eydz}; -} - -at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps) { - auto y = invert_affine(z, weight, bias, affine, eps); - auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps); - - auto num = count(z); - auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz); - return dx; -} - -void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) { - CHECK_CPU_INPUT(z); - CHECK_CPU_INPUT(dz); - - AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] { - int64_t count = z.numel(); - auto *_z = z.data(); - auto *_dz = dz.data(); - - for (int64_t i = 0; i < count; ++i) { - if (_z[i] < 0) { - _z[i] *= 1 / slope; - _dz[i] *= slope; - } - } - })); -} - -void elu_backward_cpu(at::Tensor z, at::Tensor dz) { - CHECK_CPU_INPUT(z); - CHECK_CPU_INPUT(dz); - - AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] { - int64_t count = z.numel(); - auto *_z = z.data(); - auto *_dz = dz.data(); - - for (int64_t i = 0; i < count; ++i) { - if (_z[i] < 0) { - _z[i] = log1p(_z[i]); - _dz[i] *= (_z[i] + 1.f); - } - } - })); -} diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu b/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu deleted file mode 100644 index b157b06..0000000 --- a/preprocess/humanparsing/modules/src/inplace_abn_cuda.cu +++ /dev/null @@ -1,333 +0,0 @@ -#include - -#include -#include - -#include - -#include "utils/checks.h" -#include "utils/cuda.cuh" -#include "inplace_abn.h" - -#include - -// Operations for reduce -template -struct SumOp { - __device__ SumOp(const T *t, int c, int s) - : tensor(t), chn(c), sp(s) {} - __device__ __forceinline__ T operator()(int batch, int plane, int n) { - return tensor[(batch * chn + plane) * sp + n]; - } - const T *tensor; - const int chn; - const int sp; -}; - -template -struct VarOp { - __device__ VarOp(T m, const T *t, int c, int s) - : mean(m), tensor(t), chn(c), sp(s) {} - __device__ __forceinline__ T operator()(int batch, int plane, int n) { - T val = tensor[(batch * chn + plane) * sp + n]; - return (val - mean) * (val - mean); - } - const T mean; - const T *tensor; - const int chn; - const int sp; -}; - -template -struct GradOp { - __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s) - : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} - __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { - T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight; - T _dz = dz[(batch * chn + plane) * sp + n]; - return Pair(_dz, _y * _dz); - } - const T weight; - const T bias; - const T *z; - const T *dz; - const int chn; - const int sp; -}; - -/*********** - * mean_var - ***********/ - -template -__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) { - int plane = blockIdx.x; - T norm = T(1) / T(num * sp); - - T _mean = reduce>(SumOp(x, chn, sp), plane, num, sp) * norm; - __syncthreads(); - T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, sp) * norm; - - if (threadIdx.x == 0) { - mean[plane] = _mean; - var[plane] = _var; - } -} - -std::vector mean_var_cuda(at::Tensor x) { - CHECK_CUDA_INPUT(x); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(x, num, chn, sp); - - // Prepare output tensors - auto mean = at::empty({chn}, x.options()); - auto var = at::empty({chn}, x.options()); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] { - mean_var_kernel<<>>( - x.data(), - mean.data(), - var.data(), - num, chn, sp); - })); - - return {mean, var}; -} - -/********** - * forward - **********/ - -template -__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias, - bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - T _mean = mean[plane]; - T _var = var[plane]; - T _weight = affine ? abs(weight[plane]) + eps : T(1); - T _bias = affine ? bias[plane] : T(0); - - T mul = rsqrt(_var + eps) * _weight; - - for (int batch = 0; batch < num; ++batch) { - for (int n = threadIdx.x; n < sp; n += blockDim.x) { - T _x = x[(batch * chn + plane) * sp + n]; - T _y = (_x - _mean) * mul + _bias; - - x[(batch * chn + plane) * sp + n] = _y; - } - } -} - -at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - CHECK_CUDA_INPUT(x); - CHECK_CUDA_INPUT(mean); - CHECK_CUDA_INPUT(var); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(x, num, chn, sp); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] { - forward_kernel<<>>( - x.data(), - mean.data(), - var.data(), - weight.data(), - bias.data(), - affine, eps, num, chn, sp); - })); - - return x; -} - -/*********** - * edz_eydz - ***********/ - -template -__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias, - T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - T _weight = affine ? abs(weight[plane]) + eps : 1.f; - T _bias = affine ? bias[plane] : 0.f; - - Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, sp); - __syncthreads(); - - if (threadIdx.x == 0) { - edz[plane] = res.v1; - eydz[plane] = res.v2; - } -} - -std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(z, num, chn, sp); - - auto edz = at::empty({chn}, z.options()); - auto eydz = at::empty({chn}, z.options()); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] { - edz_eydz_kernel<<>>( - z.data(), - dz.data(), - weight.data(), - bias.data(), - edz.data(), - eydz.data(), - affine, eps, num, chn, sp); - })); - - return {edz, eydz}; -} - -/*********** - * backward - ***********/ - -template -__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz, - const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - T _weight = affine ? abs(weight[plane]) + eps : 1.f; - T _bias = affine ? bias[plane] : 0.f; - T _var = var[plane]; - T _edz = edz[plane]; - T _eydz = eydz[plane]; - - T _mul = _weight * rsqrt(_var + eps); - T count = T(num * sp); - - for (int batch = 0; batch < num; ++batch) { - for (int n = threadIdx.x; n < sp; n += blockDim.x) { - T _dz = dz[(batch * chn + plane) * sp + n]; - T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight; - - dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul; - } - } -} - -at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - CHECK_CUDA_INPUT(var); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - CHECK_CUDA_INPUT(edz); - CHECK_CUDA_INPUT(eydz); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(z, num, chn, sp); - - auto dx = at::zeros_like(z); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] { - backward_kernel<<>>( - z.data(), - dz.data(), - var.data(), - weight.data(), - bias.data(), - edz.data(), - eydz.data(), - dx.data(), - affine, eps, num, chn, sp); - })); - - return dx; -} - -/************** - * activations - **************/ - -template -inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) { - // Create thrust pointers - thrust::device_ptr th_z = thrust::device_pointer_cast(z); - thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); - - auto stream = at::cuda::getCurrentCUDAStream(); - thrust::transform_if(thrust::cuda::par.on(stream), - th_dz, th_dz + count, th_z, th_dz, - [slope] __device__ (const T& dz) { return dz * slope; }, - [] __device__ (const T& z) { return z < 0; }); - thrust::transform_if(thrust::cuda::par.on(stream), - th_z, th_z + count, th_z, - [slope] __device__ (const T& z) { return z / slope; }, - [] __device__ (const T& z) { return z < 0; }); -} - -void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - - int64_t count = z.numel(); - - AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { - leaky_relu_backward_impl(z.data(), dz.data(), slope, count); - })); -} - -template -inline void elu_backward_impl(T *z, T *dz, int64_t count) { - // Create thrust pointers - thrust::device_ptr th_z = thrust::device_pointer_cast(z); - thrust::device_ptr th_dz = thrust::device_pointer_cast(dz); - - auto stream = at::cuda::getCurrentCUDAStream(); - thrust::transform_if(thrust::cuda::par.on(stream), - th_dz, th_dz + count, th_z, th_z, th_dz, - [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); }, - [] __device__ (const T& z) { return z < 0; }); - thrust::transform_if(thrust::cuda::par.on(stream), - th_z, th_z + count, th_z, - [] __device__ (const T& z) { return log1p(z); }, - [] __device__ (const T& z) { return z < 0; }); -} - -void elu_backward_cuda(at::Tensor z, at::Tensor dz) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - - int64_t count = z.numel(); - - AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] { - elu_backward_impl(z.data(), dz.data(), count); - })); -} diff --git a/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu b/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu deleted file mode 100644 index bb63e73..0000000 --- a/preprocess/humanparsing/modules/src/inplace_abn_cuda_half.cu +++ /dev/null @@ -1,275 +0,0 @@ -#include - -#include - -#include - -#include "utils/checks.h" -#include "utils/cuda.cuh" -#include "inplace_abn.h" - -#include - -// Operations for reduce -struct SumOpH { - __device__ SumOpH(const half *t, int c, int s) - : tensor(t), chn(c), sp(s) {} - __device__ __forceinline__ float operator()(int batch, int plane, int n) { - return __half2float(tensor[(batch * chn + plane) * sp + n]); - } - const half *tensor; - const int chn; - const int sp; -}; - -struct VarOpH { - __device__ VarOpH(float m, const half *t, int c, int s) - : mean(m), tensor(t), chn(c), sp(s) {} - __device__ __forceinline__ float operator()(int batch, int plane, int n) { - const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]); - return (t - mean) * (t - mean); - } - const float mean; - const half *tensor; - const int chn; - const int sp; -}; - -struct GradOpH { - __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s) - : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {} - __device__ __forceinline__ Pair operator()(int batch, int plane, int n) { - float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight; - float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); - return Pair(_dz, _y * _dz); - } - const float weight; - const float bias; - const half *z; - const half *dz; - const int chn; - const int sp; -}; - -/*********** - * mean_var - ***********/ - -__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) { - int plane = blockIdx.x; - float norm = 1.f / static_cast(num * sp); - - float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm; - __syncthreads(); - float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm; - - if (threadIdx.x == 0) { - mean[plane] = _mean; - var[plane] = _var; - } -} - -std::vector mean_var_cuda_h(at::Tensor x) { - CHECK_CUDA_INPUT(x); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(x, num, chn, sp); - - // Prepare output tensors - auto mean = at::empty({chn},x.options().dtype(at::kFloat)); - auto var = at::empty({chn},x.options().dtype(at::kFloat)); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - mean_var_kernel_h<<>>( - reinterpret_cast(x.data()), - mean.data(), - var.data(), - num, chn, sp); - - return {mean, var}; -} - -/********** - * forward - **********/ - -__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias, - bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - const float _mean = mean[plane]; - const float _var = var[plane]; - const float _weight = affine ? abs(weight[plane]) + eps : 1.f; - const float _bias = affine ? bias[plane] : 0.f; - - const float mul = rsqrt(_var + eps) * _weight; - - for (int batch = 0; batch < num; ++batch) { - for (int n = threadIdx.x; n < sp; n += blockDim.x) { - half *x_ptr = x + (batch * chn + plane) * sp + n; - float _x = __half2float(*x_ptr); - float _y = (_x - _mean) * mul + _bias; - - *x_ptr = __float2half(_y); - } - } -} - -at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - CHECK_CUDA_INPUT(x); - CHECK_CUDA_INPUT(mean); - CHECK_CUDA_INPUT(var); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(x, num, chn, sp); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - forward_kernel_h<<>>( - reinterpret_cast(x.data()), - mean.data(), - var.data(), - weight.data(), - bias.data(), - affine, eps, num, chn, sp); - - return x; -} - -__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias, - float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - float _weight = affine ? abs(weight[plane]) + eps : 1.f; - float _bias = affine ? bias[plane] : 0.f; - - Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp); - __syncthreads(); - - if (threadIdx.x == 0) { - edz[plane] = res.v1; - eydz[plane] = res.v2; - } -} - -std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias, - bool affine, float eps) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(z, num, chn, sp); - - auto edz = at::empty({chn},z.options().dtype(at::kFloat)); - auto eydz = at::empty({chn},z.options().dtype(at::kFloat)); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - edz_eydz_kernel_h<<>>( - reinterpret_cast(z.data()), - reinterpret_cast(dz.data()), - weight.data(), - bias.data(), - edz.data(), - eydz.data(), - affine, eps, num, chn, sp); - - return {edz, eydz}; -} - -__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz, - const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) { - int plane = blockIdx.x; - - float _weight = affine ? abs(weight[plane]) + eps : 1.f; - float _bias = affine ? bias[plane] : 0.f; - float _var = var[plane]; - float _edz = edz[plane]; - float _eydz = eydz[plane]; - - float _mul = _weight * rsqrt(_var + eps); - float count = float(num * sp); - - for (int batch = 0; batch < num; ++batch) { - for (int n = threadIdx.x; n < sp; n += blockDim.x) { - float _dz = __half2float(dz[(batch * chn + plane) * sp + n]); - float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight; - - dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul); - } - } -} - -at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias, - at::Tensor edz, at::Tensor eydz, bool affine, float eps) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - CHECK_CUDA_INPUT(var); - CHECK_CUDA_INPUT(weight); - CHECK_CUDA_INPUT(bias); - CHECK_CUDA_INPUT(edz); - CHECK_CUDA_INPUT(eydz); - - // Extract dimensions - int64_t num, chn, sp; - get_dims(z, num, chn, sp); - - auto dx = at::zeros_like(z); - - // Run kernel - dim3 blocks(chn); - dim3 threads(getNumThreads(sp)); - auto stream = at::cuda::getCurrentCUDAStream(); - backward_kernel_h<<>>( - reinterpret_cast(z.data()), - reinterpret_cast(dz.data()), - var.data(), - weight.data(), - bias.data(), - edz.data(), - eydz.data(), - reinterpret_cast(dx.data()), - affine, eps, num, chn, sp); - - return dx; -} - -__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) { - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){ - float _z = __half2float(z[i]); - if (_z < 0) { - dz[i] = __float2half(__half2float(dz[i]) * slope); - z[i] = __float2half(_z / slope); - } - } -} - -void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) { - CHECK_CUDA_INPUT(z); - CHECK_CUDA_INPUT(dz); - - int64_t count = z.numel(); - dim3 threads(getNumThreads(count)); - dim3 blocks = (count + threads.x - 1) / threads.x; - auto stream = at::cuda::getCurrentCUDAStream(); - leaky_relu_backward_impl_h<<>>( - reinterpret_cast(z.data()), - reinterpret_cast(dz.data()), - slope, count); -} - diff --git a/preprocess/humanparsing/modules/src/utils/checks.h b/preprocess/humanparsing/modules/src/utils/checks.h deleted file mode 100644 index e761a6f..0000000 --- a/preprocess/humanparsing/modules/src/utils/checks.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT -#ifndef AT_CHECK -#define AT_CHECK AT_ASSERT -#endif - -#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") -#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") -#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous") - -#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) -#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) \ No newline at end of file diff --git a/preprocess/humanparsing/modules/src/utils/common.h b/preprocess/humanparsing/modules/src/utils/common.h deleted file mode 100644 index e8403ee..0000000 --- a/preprocess/humanparsing/modules/src/utils/common.h +++ /dev/null @@ -1,49 +0,0 @@ -#pragma once - -#include - -/* - * Functions to share code between CPU and GPU - */ - -#ifdef __CUDACC__ -// CUDA versions - -#define HOST_DEVICE __host__ __device__ -#define INLINE_HOST_DEVICE __host__ __device__ inline -#define FLOOR(x) floor(x) - -#if __CUDA_ARCH__ >= 600 -// Recent compute capabilities have block-level atomicAdd for all data types, so we use that -#define ACCUM(x,y) atomicAdd_block(&(x),(y)) -#else -// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float -// and use the known atomicCAS-based implementation for double -template -__device__ inline data_t atomic_add(data_t *address, data_t val) { - return atomicAdd(address, val); -} - -template<> -__device__ inline double atomic_add(double *address, double val) { - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); -} - -#define ACCUM(x,y) atomic_add(&(x),(y)) -#endif // #if __CUDA_ARCH__ >= 600 - -#else -// CPU versions - -#define HOST_DEVICE -#define INLINE_HOST_DEVICE inline -#define FLOOR(x) std::floor(x) -#define ACCUM(x,y) (x) += (y) - -#endif // #ifdef __CUDACC__ \ No newline at end of file diff --git a/preprocess/humanparsing/modules/src/utils/cuda.cuh b/preprocess/humanparsing/modules/src/utils/cuda.cuh deleted file mode 100644 index 60c0023..0000000 --- a/preprocess/humanparsing/modules/src/utils/cuda.cuh +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once - -/* - * General settings and functions - */ -const int WARP_SIZE = 32; -const int MAX_BLOCK_SIZE = 1024; - -static int getNumThreads(int nElem) { - int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; - for (int i = 0; i < 6; ++i) { - if (nElem <= threadSizes[i]) { - return threadSizes[i]; - } - } - return MAX_BLOCK_SIZE; -} - -/* - * Reduction utilities - */ -template -__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, - unsigned int mask = 0xffffffff) { -#if CUDART_VERSION >= 9000 - return __shfl_xor_sync(mask, value, laneMask, width); -#else - return __shfl_xor(value, laneMask, width); -#endif -} - -__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); } - -template -struct Pair { - T v1, v2; - __device__ Pair() {} - __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {} - __device__ Pair(T v) : v1(v), v2(v) {} - __device__ Pair(int v) : v1(v), v2(v) {} - __device__ Pair &operator+=(const Pair &a) { - v1 += a.v1; - v2 += a.v2; - return *this; - } -}; - -template -static __device__ __forceinline__ T warpSum(T val) { -#if __CUDA_ARCH__ >= 300 - for (int i = 0; i < getMSB(WARP_SIZE); ++i) { - val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); - } -#else - __shared__ T values[MAX_BLOCK_SIZE]; - values[threadIdx.x] = val; - __threadfence_block(); - const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; - for (int i = 1; i < WARP_SIZE; i++) { - val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; - } -#endif - return val; -} - -template -static __device__ __forceinline__ Pair warpSum(Pair value) { - value.v1 = warpSum(value.v1); - value.v2 = warpSum(value.v2); - return value; -} \ No newline at end of file diff --git a/preprocess/humanparsing/networks/AugmentCE2P.py b/preprocess/humanparsing/networks/AugmentCE2P.py deleted file mode 100644 index ce32f78..0000000 --- a/preprocess/humanparsing/networks/AugmentCE2P.py +++ /dev/null @@ -1,388 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : AugmentCE2P.py -@Time : 8/4/19 3:35 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import functools -import pdb - -import torch -import torch.nn as nn -from torch.nn import functional as F -# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn -# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer -from modules import InPlaceABNSync -import numpy as np - -BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') - -affine_par = True - -pretrained_settings = { - 'resnet101': { - 'imagenet': { - 'input_space': 'BGR', - 'input_size': [3, 224, 224], - 'input_range': [0, 1], - 'mean': [0.406, 0.456, 0.485], - 'std': [0.225, 0.224, 0.229], - 'num_classes': 1000 - } - }, -} - - -def conv3x3(in_planes, out_planes, stride=1): - "3x3 convolution with padding" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False) - self.bn2 = BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=False) - self.relu_inplace = nn.ReLU(inplace=True) - self.downsample = downsample - self.dilation = dilation - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out = out + residual - out = self.relu_inplace(out) - - return out - - -class CostomAdaptiveAvgPool2D(nn.Module): - - def __init__(self, output_size): - - super(CostomAdaptiveAvgPool2D, self).__init__() - - self.output_size = output_size - - def forward(self, x): - - H_in, W_in = x.shape[-2:] - H_out, W_out = self.output_size - - out_i = [] - for i in range(H_out): - out_j = [] - for j in range(W_out): - hs = int(np.floor(i * H_in / H_out)) - he = int(np.ceil((i + 1) * H_in / H_out)) - - ws = int(np.floor(j * W_in / W_out)) - we = int(np.ceil((j + 1) * W_in / W_out)) - - # print(hs, he, ws, we) - kernel_size = [he - hs, we - ws] - - out = F.avg_pool2d(x[:, :, hs:he, ws:we], kernel_size) - out_j.append(out) - - out_j = torch.concat(out_j, -1) - out_i.append(out_j) - - out_i = torch.concat(out_i, -2) - return out_i - - -class PSPModule(nn.Module): - """ - Reference: - Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* - """ - - def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)): - super(PSPModule, self).__init__() - - self.stages = [] - tmp = [] - for size in sizes: - if size == 3 or size == 6: - tmp.append(self._make_stage_custom(features, out_features, size)) - else: - tmp.append(self._make_stage(features, out_features, size)) - self.stages = nn.ModuleList(tmp) - # self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes]) - self.bottleneck = nn.Sequential( - nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1, - bias=False), - InPlaceABNSync(out_features), - ) - - def _make_stage(self, features, out_features, size): - prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) - conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) - bn = InPlaceABNSync(out_features) - return nn.Sequential(prior, conv, bn) - - def _make_stage_custom(self, features, out_features, size): - prior = CostomAdaptiveAvgPool2D(output_size=(size, size)) - conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) - bn = InPlaceABNSync(out_features) - return nn.Sequential(prior, conv, bn) - - def forward(self, feats): - h, w = feats.size(2), feats.size(3) - priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in - self.stages] + [feats] - bottle = self.bottleneck(torch.cat(priors, 1)) - return bottle - - -class ASPPModule(nn.Module): - """ - Reference: - Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* - """ - - def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)): - super(ASPPModule, self).__init__() - - self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), - nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, - bias=False), - InPlaceABNSync(inner_features)) - self.conv2 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(inner_features)) - self.conv3 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), - InPlaceABNSync(inner_features)) - self.conv4 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), - InPlaceABNSync(inner_features)) - self.conv5 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), - InPlaceABNSync(inner_features)) - - self.bottleneck = nn.Sequential( - nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(out_features), - nn.Dropout2d(0.1) - ) - - def forward(self, x): - _, _, h, w = x.size() - - feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) - - feat2 = self.conv2(x) - feat3 = self.conv3(x) - feat4 = self.conv4(x) - feat5 = self.conv5(x) - out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) - - bottle = self.bottleneck(out) - return bottle - - -class Edge_Module(nn.Module): - """ - Edge Learning Branch - """ - - def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2): - super(Edge_Module, self).__init__() - - self.conv1 = nn.Sequential( - nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(mid_fea) - ) - self.conv2 = nn.Sequential( - nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(mid_fea) - ) - self.conv3 = nn.Sequential( - nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(mid_fea) - ) - self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True) - self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True) - - def forward(self, x1, x2, x3): - _, _, h, w = x1.size() - - edge1_fea = self.conv1(x1) - edge1 = self.conv4(edge1_fea) - edge2_fea = self.conv2(x2) - edge2 = self.conv4(edge2_fea) - edge3_fea = self.conv3(x3) - edge3 = self.conv4(edge3_fea) - - edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True) - edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True) - edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True) - edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True) - - edge = torch.cat([edge1, edge2, edge3], dim=1) - edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1) - edge = self.conv5(edge) - - return edge, edge_fea - - -class Decoder_Module(nn.Module): - """ - Parsing Branch Decoder Module. - """ - - def __init__(self, num_classes): - super(Decoder_Module, self).__init__() - self.conv1 = nn.Sequential( - nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(256) - ) - self.conv2 = nn.Sequential( - nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(48) - ) - self.conv3 = nn.Sequential( - nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(256), - nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(256) - ) - - self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True) - - def forward(self, xt, xl): - _, _, h, w = xl.size() - xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True) - xl = self.conv2(xl) - x = torch.cat([xt, xl], dim=1) - x = self.conv3(x) - seg = self.conv4(x) - return seg, x - - -class ResNet(nn.Module): - def __init__(self, block, layers, num_classes): - self.inplanes = 128 - super(ResNet, self).__init__() - self.conv1 = conv3x3(3, 64, stride=2) - self.bn1 = BatchNorm2d(64) - self.relu1 = nn.ReLU(inplace=False) - self.conv2 = conv3x3(64, 64) - self.bn2 = BatchNorm2d(64) - self.relu2 = nn.ReLU(inplace=False) - self.conv3 = conv3x3(64, 128) - self.bn3 = BatchNorm2d(128) - self.relu3 = nn.ReLU(inplace=False) - - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1)) - - self.context_encoding = PSPModule(2048, 512) - - self.edge = Edge_Module() - self.decoder = Decoder_Module(num_classes) - - self.fushion = nn.Sequential( - nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(256), - nn.Dropout2d(0.1), - nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True) - ) - - def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - BatchNorm2d(planes * block.expansion, affine=affine_par)) - - layers = [] - generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1 - layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample, - multi_grid=generate_multi_grid(0, multi_grid))) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append( - block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid))) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.relu1(self.bn1(self.conv1(x))) - x = self.relu2(self.bn2(self.conv2(x))) - x = self.relu3(self.bn3(self.conv3(x))) - x = self.maxpool(x) - x2 = self.layer1(x) - x3 = self.layer2(x2) - x4 = self.layer3(x3) - x5 = self.layer4(x4) - x = self.context_encoding(x5) - parsing_result, parsing_fea = self.decoder(x, x2) - # Edge Branch - edge_result, edge_fea = self.edge(x2, x3, x4) - # Fusion Branch - x = torch.cat([parsing_fea, edge_fea], dim=1) - fusion_result = self.fushion(x) - return [[parsing_result, fusion_result], edge_result] - - -def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'): - model.input_space = settings['input_space'] - model.input_size = settings['input_size'] - model.input_range = settings['input_range'] - model.mean = settings['mean'] - model.std = settings['std'] - - if pretrained is not None: - saved_state_dict = torch.load(pretrained) - new_params = model.state_dict().copy() - for i in saved_state_dict: - i_parts = i.split('.') - if not i_parts[0] == 'fc': - new_params['.'.join(i_parts[0:])] = saved_state_dict[i] - model.load_state_dict(new_params) - - -def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'): - model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes) - settings = pretrained_settings['resnet101']['imagenet'] - initialize_pretrained_model(model, settings, pretrained) - return model diff --git a/preprocess/humanparsing/networks/__init__.py b/preprocess/humanparsing/networks/__init__.py deleted file mode 100644 index 3d5d384..0000000 --- a/preprocess/humanparsing/networks/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import -from networks.AugmentCE2P import resnet101 - -__factory = { - 'resnet101': resnet101, -} - - -def init_model(name, *args, **kwargs): - if name not in __factory.keys(): - raise KeyError("Unknown model arch: {}".format(name)) - return __factory[name](*args, **kwargs) \ No newline at end of file diff --git a/preprocess/humanparsing/networks/backbone/mobilenetv2.py b/preprocess/humanparsing/networks/backbone/mobilenetv2.py deleted file mode 100644 index 6f2fe34..0000000 --- a/preprocess/humanparsing/networks/backbone/mobilenetv2.py +++ /dev/null @@ -1,156 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : mobilenetv2.py -@Time : 8/4/19 3:35 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import torch.nn as nn -import math -import functools - -from modules import InPlaceABN, InPlaceABNSync - -BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') - -__all__ = ['mobilenetv2'] - - -def conv_bn(inp, oup, stride): - return nn.Sequential( - nn.Conv2d(inp, oup, 3, stride, 1, bias=False), - BatchNorm2d(oup), - nn.ReLU6(inplace=True) - ) - - -def conv_1x1_bn(inp, oup): - return nn.Sequential( - nn.Conv2d(inp, oup, 1, 1, 0, bias=False), - BatchNorm2d(oup), - nn.ReLU6(inplace=True) - ) - - -class InvertedResidual(nn.Module): - def __init__(self, inp, oup, stride, expand_ratio): - super(InvertedResidual, self).__init__() - self.stride = stride - assert stride in [1, 2] - - hidden_dim = round(inp * expand_ratio) - self.use_res_connect = self.stride == 1 and inp == oup - - if expand_ratio == 1: - self.conv = nn.Sequential( - # dw - nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), - BatchNorm2d(hidden_dim), - nn.ReLU6(inplace=True), - # pw-linear - nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), - BatchNorm2d(oup), - ) - else: - self.conv = nn.Sequential( - # pw - nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), - BatchNorm2d(hidden_dim), - nn.ReLU6(inplace=True), - # dw - nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), - BatchNorm2d(hidden_dim), - nn.ReLU6(inplace=True), - # pw-linear - nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), - BatchNorm2d(oup), - ) - - def forward(self, x): - if self.use_res_connect: - return x + self.conv(x) - else: - return self.conv(x) - - -class MobileNetV2(nn.Module): - def __init__(self, n_class=1000, input_size=224, width_mult=1.): - super(MobileNetV2, self).__init__() - block = InvertedResidual - input_channel = 32 - last_channel = 1280 - interverted_residual_setting = [ - # t, c, n, s - [1, 16, 1, 1], - [6, 24, 2, 2], # layer 2 - [6, 32, 3, 2], # layer 3 - [6, 64, 4, 2], - [6, 96, 3, 1], # layer 4 - [6, 160, 3, 2], - [6, 320, 1, 1], # layer 5 - ] - - # building first layer - assert input_size % 32 == 0 - input_channel = int(input_channel * width_mult) - self.last_channel = int(last_channel * width_mult) if width_mult > 1.0 else last_channel - self.features = [conv_bn(3, input_channel, 2)] - # building inverted residual blocks - for t, c, n, s in interverted_residual_setting: - output_channel = int(c * width_mult) - for i in range(n): - if i == 0: - self.features.append(block(input_channel, output_channel, s, expand_ratio=t)) - else: - self.features.append(block(input_channel, output_channel, 1, expand_ratio=t)) - input_channel = output_channel - # building last several layers - self.features.append(conv_1x1_bn(input_channel, self.last_channel)) - # make it nn.Sequential - self.features = nn.Sequential(*self.features) - - # building classifier - self.classifier = nn.Sequential( - nn.Dropout(0.2), - nn.Linear(self.last_channel, n_class), - ) - - self._initialize_weights() - - def forward(self, x): - x = self.features(x) - x = x.mean(3).mean(2) - x = self.classifier(x) - return x - - def _initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - if m.bias is not None: - m.bias.data.zero_() - elif isinstance(m, BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - elif isinstance(m, nn.Linear): - n = m.weight.size(1) - m.weight.data.normal_(0, 0.01) - m.bias.data.zero_() - - -def mobilenetv2(pretrained=False, **kwargs): - """Constructs a MobileNet_V2 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = MobileNetV2(n_class=1000, **kwargs) - if pretrained: - model.load_state_dict(load_url(model_urls['mobilenetv2']), strict=False) - return model diff --git a/preprocess/humanparsing/networks/backbone/resnet.py b/preprocess/humanparsing/networks/backbone/resnet.py deleted file mode 100644 index 88d6f73..0000000 --- a/preprocess/humanparsing/networks/backbone/resnet.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : resnet.py -@Time : 8/4/19 3:35 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import functools -import torch.nn as nn -import math -from torch.utils.model_zoo import load_url - -from modules import InPlaceABNSync - -BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') - -__all__ = ['ResNet', 'resnet18', 'resnet50', 'resnet101'] # resnet101 is coming soon! - -model_urls = { - 'resnet18': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet18-imagenet.pth', - 'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth', - 'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth' -} - - -def conv3x3(in_planes, out_planes, stride=1): - "3x3 convolution with padding" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(BasicBlock, self).__init__() - self.conv1 = conv3x3(inplanes, planes, stride) - self.bn1 = BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.conv2 = conv3x3(planes, planes) - self.bn2 = BatchNorm2d(planes) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, bias=False) - self.bn2 = BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNet(nn.Module): - - def __init__(self, block, layers, num_classes=1000): - self.inplanes = 128 - super(ResNet, self).__init__() - self.conv1 = conv3x3(3, 64, stride=2) - self.bn1 = BatchNorm2d(64) - self.relu1 = nn.ReLU(inplace=True) - self.conv2 = conv3x3(64, 64) - self.bn2 = BatchNorm2d(64) - self.relu2 = nn.ReLU(inplace=True) - self.conv3 = conv3x3(64, 128) - self.bn3 = BatchNorm2d(128) - self.relu3 = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.avgpool = nn.AvgPool2d(7, stride=1) - self.fc = nn.Linear(512 * block.expansion, num_classes) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.relu1(self.bn1(self.conv1(x))) - x = self.relu2(self.bn2(self.conv2(x))) - x = self.relu3(self.bn3(self.conv3(x))) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - return x - - -def resnet18(pretrained=False, **kwargs): - """Constructs a ResNet-18 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) - if pretrained: - model.load_state_dict(load_url(model_urls['resnet18'])) - return model - - -def resnet50(pretrained=False, **kwargs): - """Constructs a ResNet-50 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) - if pretrained: - model.load_state_dict(load_url(model_urls['resnet50']), strict=False) - return model - - -def resnet101(pretrained=False, **kwargs): - """Constructs a ResNet-101 model. - Args: - pretrained (bool): If True, returns a model pre-trained on ImageNet - """ - model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) - if pretrained: - model.load_state_dict(load_url(model_urls['resnet101']), strict=False) - return model diff --git a/preprocess/humanparsing/networks/backbone/resnext.py b/preprocess/humanparsing/networks/backbone/resnext.py deleted file mode 100644 index 96adb54..0000000 --- a/preprocess/humanparsing/networks/backbone/resnext.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : resnext.py.py -@Time : 8/11/19 8:58 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" -import functools -import torch.nn as nn -import math -from torch.utils.model_zoo import load_url - -from modules import InPlaceABNSync - -BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') - -__all__ = ['ResNeXt', 'resnext101'] # support resnext 101 - -model_urls = { - 'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth', - 'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth' -} - - -def conv3x3(in_planes, out_planes, stride=1): - "3x3 convolution with padding" - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, - padding=1, bias=False) - - -class GroupBottleneck(nn.Module): - expansion = 2 - - def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None): - super(GroupBottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, - padding=1, groups=groups, bias=False) - self.bn2 = BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False) - self.bn3 = BatchNorm2d(planes * 2) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class ResNeXt(nn.Module): - - def __init__(self, block, layers, groups=32, num_classes=1000): - self.inplanes = 128 - super(ResNeXt, self).__init__() - self.conv1 = conv3x3(3, 64, stride=2) - self.bn1 = BatchNorm2d(64) - self.relu1 = nn.ReLU(inplace=True) - self.conv2 = conv3x3(64, 64) - self.bn2 = BatchNorm2d(64) - self.relu2 = nn.ReLU(inplace=True) - self.conv3 = conv3x3(64, 128) - self.bn3 = BatchNorm2d(128) - self.relu3 = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - - self.layer1 = self._make_layer(block, 128, layers[0], groups=groups) - self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups) - self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups) - self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups) - self.avgpool = nn.AvgPool2d(7, stride=1) - self.fc = nn.Linear(1024 * block.expansion, num_classes) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1, groups=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, groups, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes, groups=groups)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.relu1(self.bn1(self.conv1(x))) - x = self.relu2(self.bn2(self.conv2(x))) - x = self.relu3(self.bn3(self.conv3(x))) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - x = self.fc(x) - - return x - - -def resnext101(pretrained=False, **kwargs): - """Constructs a ResNet-101 model. - Args: - pretrained (bool): If True, returns a model pre-trained on Places - """ - model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs) - if pretrained: - model.load_state_dict(load_url(model_urls['resnext101']), strict=False) - return model diff --git a/preprocess/humanparsing/networks/context_encoding/aspp.py b/preprocess/humanparsing/networks/context_encoding/aspp.py deleted file mode 100644 index d0ba531..0000000 --- a/preprocess/humanparsing/networks/context_encoding/aspp.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : aspp.py -@Time : 8/4/19 3:36 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import torch -import torch.nn as nn -from torch.nn import functional as F - -from modules import InPlaceABNSync - - -class ASPPModule(nn.Module): - """ - Reference: - Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."* - """ - def __init__(self, features, out_features=512, inner_features=256, dilations=(12, 24, 36)): - super(ASPPModule, self).__init__() - - self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)), - nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, - bias=False), - InPlaceABNSync(inner_features)) - self.conv2 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(inner_features)) - self.conv3 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), - InPlaceABNSync(inner_features)) - self.conv4 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), - InPlaceABNSync(inner_features)) - self.conv5 = nn.Sequential( - nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), - InPlaceABNSync(inner_features)) - - self.bottleneck = nn.Sequential( - nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(out_features), - nn.Dropout2d(0.1) - ) - - def forward(self, x): - _, _, h, w = x.size() - - feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True) - - feat2 = self.conv2(x) - feat3 = self.conv3(x) - feat4 = self.conv4(x) - feat5 = self.conv5(x) - out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) - - bottle = self.bottleneck(out) - return bottle \ No newline at end of file diff --git a/preprocess/humanparsing/networks/context_encoding/ocnet.py b/preprocess/humanparsing/networks/context_encoding/ocnet.py deleted file mode 100644 index ac43ebf..0000000 --- a/preprocess/humanparsing/networks/context_encoding/ocnet.py +++ /dev/null @@ -1,226 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : ocnet.py -@Time : 8/4/19 3:36 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import functools - -import torch -import torch.nn as nn -from torch.autograd import Variable -from torch.nn import functional as F - -from modules import InPlaceABNSync -BatchNorm2d = functools.partial(InPlaceABNSync, activation='none') - - -class _SelfAttentionBlock(nn.Module): - ''' - The basic implementation for self-attention block/non-local block - Input: - N X C X H X W - Parameters: - in_channels : the dimension of the input feature map - key_channels : the dimension after the key/query transform - value_channels : the dimension after the value transform - scale : choose the scale to downsample the input feature maps (save memory cost) - Return: - N X C X H X W - position-aware context features.(w/o concate or add with the input) - ''' - - def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1): - super(_SelfAttentionBlock, self).__init__() - self.scale = scale - self.in_channels = in_channels - self.out_channels = out_channels - self.key_channels = key_channels - self.value_channels = value_channels - if out_channels == None: - self.out_channels = in_channels - self.pool = nn.MaxPool2d(kernel_size=(scale, scale)) - self.f_key = nn.Sequential( - nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, - kernel_size=1, stride=1, padding=0), - InPlaceABNSync(self.key_channels), - ) - self.f_query = self.f_key - self.f_value = nn.Conv2d(in_channels=self.in_channels, out_channels=self.value_channels, - kernel_size=1, stride=1, padding=0) - self.W = nn.Conv2d(in_channels=self.value_channels, out_channels=self.out_channels, - kernel_size=1, stride=1, padding=0) - nn.init.constant(self.W.weight, 0) - nn.init.constant(self.W.bias, 0) - - def forward(self, x): - batch_size, h, w = x.size(0), x.size(2), x.size(3) - if self.scale > 1: - x = self.pool(x) - - value = self.f_value(x).view(batch_size, self.value_channels, -1) - value = value.permute(0, 2, 1) - query = self.f_query(x).view(batch_size, self.key_channels, -1) - query = query.permute(0, 2, 1) - key = self.f_key(x).view(batch_size, self.key_channels, -1) - - sim_map = torch.matmul(query, key) - sim_map = (self.key_channels ** -.5) * sim_map - sim_map = F.softmax(sim_map, dim=-1) - - context = torch.matmul(sim_map, value) - context = context.permute(0, 2, 1).contiguous() - context = context.view(batch_size, self.value_channels, *x.size()[2:]) - context = self.W(context) - if self.scale > 1: - context = F.upsample(input=context, size=(h, w), mode='bilinear', align_corners=True) - return context - - -class SelfAttentionBlock2D(_SelfAttentionBlock): - def __init__(self, in_channels, key_channels, value_channels, out_channels=None, scale=1): - super(SelfAttentionBlock2D, self).__init__(in_channels, - key_channels, - value_channels, - out_channels, - scale) - - -class BaseOC_Module(nn.Module): - """ - Implementation of the BaseOC module - Parameters: - in_features / out_features: the channels of the input / output feature maps. - dropout: we choose 0.05 as the default value. - size: you can apply multiple sizes. Here we only use one size. - Return: - features fused with Object context information. - """ - - def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])): - super(BaseOC_Module, self).__init__() - self.stages = [] - self.stages = nn.ModuleList( - [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes]) - self.conv_bn_dropout = nn.Sequential( - nn.Conv2d(2 * in_channels, out_channels, kernel_size=1, padding=0), - InPlaceABNSync(out_channels), - nn.Dropout2d(dropout) - ) - - def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size): - return SelfAttentionBlock2D(in_channels, - key_channels, - value_channels, - output_channels, - size) - - def forward(self, feats): - priors = [stage(feats) for stage in self.stages] - context = priors[0] - for i in range(1, len(priors)): - context += priors[i] - output = self.conv_bn_dropout(torch.cat([context, feats], 1)) - return output - - -class BaseOC_Context_Module(nn.Module): - """ - Output only the context features. - Parameters: - in_features / out_features: the channels of the input / output feature maps. - dropout: specify the dropout ratio - fusion: We provide two different fusion method, "concat" or "add" - size: we find that directly learn the attention weights on even 1/8 feature maps is hard. - Return: - features after "concat" or "add" - """ - - def __init__(self, in_channels, out_channels, key_channels, value_channels, dropout, sizes=([1])): - super(BaseOC_Context_Module, self).__init__() - self.stages = [] - self.stages = nn.ModuleList( - [self._make_stage(in_channels, out_channels, key_channels, value_channels, size) for size in sizes]) - self.conv_bn_dropout = nn.Sequential( - nn.Conv2d(in_channels, out_channels, kernel_size=1, padding=0), - InPlaceABNSync(out_channels), - ) - - def _make_stage(self, in_channels, output_channels, key_channels, value_channels, size): - return SelfAttentionBlock2D(in_channels, - key_channels, - value_channels, - output_channels, - size) - - def forward(self, feats): - priors = [stage(feats) for stage in self.stages] - context = priors[0] - for i in range(1, len(priors)): - context += priors[i] - output = self.conv_bn_dropout(context) - return output - - -class ASP_OC_Module(nn.Module): - def __init__(self, features, out_features=256, dilations=(12, 24, 36)): - super(ASP_OC_Module, self).__init__() - self.context = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=3, padding=1, dilation=1, bias=True), - InPlaceABNSync(out_features), - BaseOC_Context_Module(in_channels=out_features, out_channels=out_features, - key_channels=out_features // 2, value_channels=out_features, - dropout=0, sizes=([2]))) - self.conv2 = nn.Sequential(nn.Conv2d(features, out_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(out_features)) - self.conv3 = nn.Sequential( - nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False), - InPlaceABNSync(out_features)) - self.conv4 = nn.Sequential( - nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False), - InPlaceABNSync(out_features)) - self.conv5 = nn.Sequential( - nn.Conv2d(features, out_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False), - InPlaceABNSync(out_features)) - - self.conv_bn_dropout = nn.Sequential( - nn.Conv2d(out_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False), - InPlaceABNSync(out_features), - nn.Dropout2d(0.1) - ) - - def _cat_each(self, feat1, feat2, feat3, feat4, feat5): - assert (len(feat1) == len(feat2)) - z = [] - for i in range(len(feat1)): - z.append(torch.cat((feat1[i], feat2[i], feat3[i], feat4[i], feat5[i]), 1)) - return z - - def forward(self, x): - if isinstance(x, Variable): - _, _, h, w = x.size() - elif isinstance(x, tuple) or isinstance(x, list): - _, _, h, w = x[0].size() - else: - raise RuntimeError('unknown input type') - - feat1 = self.context(x) - feat2 = self.conv2(x) - feat3 = self.conv3(x) - feat4 = self.conv4(x) - feat5 = self.conv5(x) - - if isinstance(x, Variable): - out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1) - elif isinstance(x, tuple) or isinstance(x, list): - out = self._cat_each(feat1, feat2, feat3, feat4, feat5) - else: - raise RuntimeError('unknown input type') - output = self.conv_bn_dropout(out) - return output diff --git a/preprocess/humanparsing/networks/context_encoding/psp.py b/preprocess/humanparsing/networks/context_encoding/psp.py deleted file mode 100644 index 47181dc..0000000 --- a/preprocess/humanparsing/networks/context_encoding/psp.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : psp.py -@Time : 8/4/19 3:36 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import torch -import torch.nn as nn -from torch.nn import functional as F - -from modules import InPlaceABNSync - - -class PSPModule(nn.Module): - """ - Reference: - Zhao, Hengshuang, et al. *"Pyramid scene parsing network."* - """ - def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)): - super(PSPModule, self).__init__() - - self.stages = [] - self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes]) - self.bottleneck = nn.Sequential( - nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1, - bias=False), - InPlaceABNSync(out_features), - ) - - def _make_stage(self, features, out_features, size): - prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) - conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False) - bn = InPlaceABNSync(out_features) - return nn.Sequential(prior, conv, bn) - - def forward(self, feats): - h, w = feats.size(2), feats.size(3) - priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in - self.stages] + [feats] - bottle = self.bottleneck(torch.cat(priors, 1)) - return bottle \ No newline at end of file diff --git a/preprocess/humanparsing/utils/consistency_loss.py b/preprocess/humanparsing/utils/consistency_loss.py deleted file mode 100644 index b872fdc..0000000 --- a/preprocess/humanparsing/utils/consistency_loss.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : kl_loss.py -@Time : 7/23/19 4:02 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" -import torch -import torch.nn.functional as F -from torch import nn -from datasets.target_generation import generate_edge_tensor - - -class ConsistencyLoss(nn.Module): - def __init__(self, ignore_index=255): - super(ConsistencyLoss, self).__init__() - self.ignore_index=ignore_index - - def forward(self, parsing, edge, label): - parsing_pre = torch.argmax(parsing, dim=1) - parsing_pre[label==self.ignore_index]=self.ignore_index - generated_edge = generate_edge_tensor(parsing_pre) - edge_pre = torch.argmax(edge, dim=1) - v_generate_edge = generated_edge[label!=255] - v_edge_pre = edge_pre[label!=255] - v_edge_pre = v_edge_pre.type(torch.cuda.FloatTensor) - positive_union = (v_generate_edge==1)&(v_edge_pre==1) # only the positive values count - return F.smooth_l1_loss(v_generate_edge[positive_union].squeeze(0), v_edge_pre[positive_union].squeeze(0)) diff --git a/preprocess/humanparsing/utils/criterion.py b/preprocess/humanparsing/utils/criterion.py deleted file mode 100644 index 9688943..0000000 --- a/preprocess/humanparsing/utils/criterion.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : criterion.py -@Time : 8/30/19 8:59 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import torch.nn as nn -import torch -import numpy as np -from torch.nn import functional as F -from .lovasz_softmax import LovaszSoftmax -from .kl_loss import KLDivergenceLoss -from .consistency_loss import ConsistencyLoss - -NUM_CLASSES = 20 - - -class CriterionAll(nn.Module): - def __init__(self, use_class_weight=False, ignore_index=255, lambda_1=1, lambda_2=1, lambda_3=1, - num_classes=20): - super(CriterionAll, self).__init__() - self.ignore_index = ignore_index - self.use_class_weight = use_class_weight - self.criterion = torch.nn.CrossEntropyLoss(ignore_index=ignore_index) - self.lovasz = LovaszSoftmax(ignore_index=ignore_index) - self.kldiv = KLDivergenceLoss(ignore_index=ignore_index) - self.reg = ConsistencyLoss(ignore_index=ignore_index) - self.lamda_1 = lambda_1 - self.lamda_2 = lambda_2 - self.lamda_3 = lambda_3 - self.num_classes = num_classes - - def parsing_loss(self, preds, target, cycle_n=None): - """ - Loss function definition. - - Args: - preds: [[parsing result1, parsing result2],[edge result]] - target: [parsing label, egde label] - soft_preds: [[parsing result1, parsing result2],[edge result]] - Returns: - Calculated Loss. - """ - h, w = target[0].size(1), target[0].size(2) - - pos_num = torch.sum(target[1] == 1, dtype=torch.float) - neg_num = torch.sum(target[1] == 0, dtype=torch.float) - - weight_pos = neg_num / (pos_num + neg_num) - weight_neg = pos_num / (pos_num + neg_num) - weights = torch.tensor([weight_neg, weight_pos]) # edge loss weight - - loss = 0 - - # loss for segmentation - preds_parsing = preds[0] - for pred_parsing in preds_parsing: - scale_pred = F.interpolate(input=pred_parsing, size=(h, w), - mode='bilinear', align_corners=True) - - loss += 0.5 * self.lamda_1 * self.lovasz(scale_pred, target[0]) - if target[2] is None: - loss += 0.5 * self.lamda_1 * self.criterion(scale_pred, target[0]) - else: - soft_scale_pred = F.interpolate(input=target[2], size=(h, w), - mode='bilinear', align_corners=True) - soft_scale_pred = moving_average(soft_scale_pred, to_one_hot(target[0], num_cls=self.num_classes), - 1.0 / (cycle_n + 1.0)) - loss += 0.5 * self.lamda_1 * self.kldiv(scale_pred, soft_scale_pred, target[0]) - - # loss for edge - preds_edge = preds[1] - for pred_edge in preds_edge: - scale_pred = F.interpolate(input=pred_edge, size=(h, w), - mode='bilinear', align_corners=True) - if target[3] is None: - loss += self.lamda_2 * F.cross_entropy(scale_pred, target[1], - weights.cuda(), ignore_index=self.ignore_index) - else: - soft_scale_edge = F.interpolate(input=target[3], size=(h, w), - mode='bilinear', align_corners=True) - soft_scale_edge = moving_average(soft_scale_edge, to_one_hot(target[1], num_cls=2), - 1.0 / (cycle_n + 1.0)) - loss += self.lamda_2 * self.kldiv(scale_pred, soft_scale_edge, target[0]) - - # consistency regularization - preds_parsing = preds[0] - preds_edge = preds[1] - for pred_parsing in preds_parsing: - scale_pred = F.interpolate(input=pred_parsing, size=(h, w), - mode='bilinear', align_corners=True) - scale_edge = F.interpolate(input=preds_edge[0], size=(h, w), - mode='bilinear', align_corners=True) - loss += self.lamda_3 * self.reg(scale_pred, scale_edge, target[0]) - - return loss - - def forward(self, preds, target, cycle_n=None): - loss = self.parsing_loss(preds, target, cycle_n) - return loss - - def _generate_weights(self, masks, num_classes): - """ - masks: torch.Tensor with shape [B, H, W] - """ - masks_label = masks.data.cpu().numpy().astype(np.int64) - pixel_nums = [] - tot_pixels = 0 - for i in range(num_classes): - pixel_num_of_cls_i = np.sum(masks_label == i).astype(np.float) - pixel_nums.append(pixel_num_of_cls_i) - tot_pixels += pixel_num_of_cls_i - weights = [] - for i in range(num_classes): - weights.append( - (tot_pixels - pixel_nums[i]) / tot_pixels / (num_classes - 1) - ) - weights = np.array(weights, dtype=np.float) - # weights = torch.from_numpy(weights).float().to(masks.device) - return weights - - -def moving_average(target1, target2, alpha=1.0): - target = 0 - target += (1.0 - alpha) * target1 - target += target2 * alpha - return target - - -def to_one_hot(tensor, num_cls, dim=1, ignore_index=255): - b, h, w = tensor.shape - tensor[tensor == ignore_index] = 0 - onehot_tensor = torch.zeros(b, num_cls, h, w).cuda() - onehot_tensor.scatter_(dim, tensor.unsqueeze(dim), 1) - return onehot_tensor diff --git a/preprocess/humanparsing/utils/encoding.py b/preprocess/humanparsing/utils/encoding.py deleted file mode 100644 index e865470..0000000 --- a/preprocess/humanparsing/utils/encoding.py +++ /dev/null @@ -1,188 +0,0 @@ -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -## Created by: Hang Zhang -## ECE Department, Rutgers University -## Email: zhang.hang@rutgers.edu -## Copyright (c) 2017 -## -## This source code is licensed under the MIT-style license found in the -## LICENSE file in the root directory of this source tree -##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -"""Encoding Data Parallel""" -import threading -import functools -import torch -from torch.autograd import Variable, Function -import torch.cuda.comm as comm -from torch.nn.parallel.data_parallel import DataParallel -from torch.nn.parallel.parallel_apply import get_a_var -from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast - -torch_ver = torch.__version__[:3] - -__all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 'patch_replication_callback'] - -def allreduce(*inputs): - """Cross GPU all reduce autograd operation for calculate mean and - variance in SyncBN. - """ - return AllReduce.apply(*inputs) - -class AllReduce(Function): - @staticmethod - def forward(ctx, num_inputs, *inputs): - ctx.num_inputs = num_inputs - ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)] - inputs = [inputs[i:i + num_inputs] - for i in range(0, len(inputs), num_inputs)] - # sort before reduce sum - inputs = sorted(inputs, key=lambda i: i[0].get_device()) - results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) - outputs = comm.broadcast_coalesced(results, ctx.target_gpus) - return tuple([t for tensors in outputs for t in tensors]) - - @staticmethod - def backward(ctx, *inputs): - inputs = [i.data for i in inputs] - inputs = [inputs[i:i + ctx.num_inputs] - for i in range(0, len(inputs), ctx.num_inputs)] - results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) - outputs = comm.broadcast_coalesced(results, ctx.target_gpus) - return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) - -class Reduce(Function): - @staticmethod - def forward(ctx, *inputs): - ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))] - inputs = sorted(inputs, key=lambda i: i.get_device()) - return comm.reduce_add(inputs) - - @staticmethod - def backward(ctx, gradOutput): - return Broadcast.apply(ctx.target_gpus, gradOutput) - - -class DataParallelModel(DataParallel): - """Implements data parallelism at the module level. - - This container parallelizes the application of the given module by - splitting the input across the specified devices by chunking in the - batch dimension. - In the forward pass, the module is replicated on each device, - and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module. - Note that the outputs are not gathered, please use compatible - :class:`encoding.parallel.DataParallelCriterion`. - - The batch size should be larger than the number of GPUs used. It should - also be an integer multiple of the number of GPUs so that each chunk is - the same size (so that each GPU processes the same number of samples). - - Args: - module: module to be parallelized - device_ids: CUDA devices (default: all devices) - - Reference: - Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, - Amit Agrawal. “Context Encoding for Semantic Segmentation. - *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* - - Example:: - - >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) - >>> y = net(x) - """ - def gather(self, outputs, output_device): - return outputs - - def replicate(self, module, device_ids): - modules = super(DataParallelModel, self).replicate(module, device_ids) - return modules - - -class DataParallelCriterion(DataParallel): - """ - Calculate loss in multiple-GPUs, which balance the memory usage for - Semantic Segmentation. - - The targets are splitted across the specified devices by chunking in - the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`. - - Reference: - Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, - Amit Agrawal. “Context Encoding for Semantic Segmentation. - *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* - - Example:: - - >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) - >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2]) - >>> y = net(x) - >>> loss = criterion(y, target) - """ - def forward(self, inputs, *targets, **kwargs): - # input should be already scatterd - # scattering the targets instead - if not self.device_ids: - return self.module(inputs, *targets, **kwargs) - targets, kwargs = self.scatter(targets, kwargs, self.device_ids) - if len(self.device_ids) == 1: - return self.module(inputs, *targets[0], **kwargs[0]) - replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) - outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) - return Reduce.apply(*outputs) / len(outputs) - - -def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): - assert len(modules) == len(inputs) - assert len(targets) == len(inputs) - if kwargs_tup: - assert len(modules) == len(kwargs_tup) - else: - kwargs_tup = ({},) * len(modules) - if devices is not None: - assert len(modules) == len(devices) - else: - devices = [None] * len(modules) - - lock = threading.Lock() - results = {} - if torch_ver != "0.3": - grad_enabled = torch.is_grad_enabled() - - def _worker(i, module, input, target, kwargs, device=None): - if torch_ver != "0.3": - torch.set_grad_enabled(grad_enabled) - if device is None: - device = get_a_var(input).get_device() - try: - if not isinstance(input, tuple): - input = (input,) - with torch.cuda.device(device): - output = module(*(input + target), **kwargs) - with lock: - results[i] = output - except Exception as e: - with lock: - results[i] = e - - if len(modules) > 1: - threads = [threading.Thread(target=_worker, - args=(i, module, input, target, - kwargs, device),) - for i, (module, input, target, kwargs, device) in - enumerate(zip(modules, inputs, targets, kwargs_tup, devices))] - - for thread in threads: - thread.start() - for thread in threads: - thread.join() - else: - _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) - - outputs = [] - for i in range(len(inputs)): - output = results[i] - if isinstance(output, Exception): - raise output - outputs.append(output) - return outputs diff --git a/preprocess/humanparsing/utils/kl_loss.py b/preprocess/humanparsing/utils/kl_loss.py deleted file mode 100644 index 9a685d9..0000000 --- a/preprocess/humanparsing/utils/kl_loss.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : kl_loss.py -@Time : 7/23/19 4:02 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" -import torch -import torch.nn.functional as F -from torch import nn - - -def flatten_probas(input, target, labels, ignore=255): - """ - Flattens predictions in the batch. - """ - B, C, H, W = input.size() - input = input.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C - target = target.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C - labels = labels.view(-1) - if ignore is None: - return input, target - valid = (labels != ignore) - vinput = input[valid.nonzero().squeeze()] - vtarget = target[valid.nonzero().squeeze()] - return vinput, vtarget - - -class KLDivergenceLoss(nn.Module): - def __init__(self, ignore_index=255, T=1): - super(KLDivergenceLoss, self).__init__() - self.ignore_index=ignore_index - self.T = T - - def forward(self, input, target, label): - log_input_prob = F.log_softmax(input / self.T, dim=1) - target_porb = F.softmax(target / self.T, dim=1) - loss = F.kl_div(*flatten_probas(log_input_prob, target_porb, label, ignore=self.ignore_index)) - return self.T*self.T*loss # balanced diff --git a/preprocess/humanparsing/utils/lovasz_softmax.py b/preprocess/humanparsing/utils/lovasz_softmax.py deleted file mode 100644 index b6e444f..0000000 --- a/preprocess/humanparsing/utils/lovasz_softmax.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : lovasz_softmax.py -@Time : 8/30/19 7:12 PM -@Desc : Lovasz-Softmax and Jaccard hinge loss in PyTorch - Maxim Berman 2018 ESAT-PSI KU Leuven (MIT License) -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -from __future__ import print_function, division - -import torch -from torch.autograd import Variable -import torch.nn.functional as F -import numpy as np -from torch import nn - -try: - from itertools import ifilterfalse -except ImportError: # py3k - from itertools import filterfalse as ifilterfalse - - -def lovasz_grad(gt_sorted): - """ - Computes gradient of the Lovasz extension w.r.t sorted errors - See Alg. 1 in paper - """ - p = len(gt_sorted) - gts = gt_sorted.sum() - intersection = gts - gt_sorted.float().cumsum(0) - union = gts + (1 - gt_sorted).float().cumsum(0) - jaccard = 1. - intersection / union - if p > 1: # cover 1-pixel case - jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] - return jaccard - - -def iou_binary(preds, labels, EMPTY=1., ignore=None, per_image=True): - """ - IoU for foreground class - binary: 1 foreground, 0 background - """ - if not per_image: - preds, labels = (preds,), (labels,) - ious = [] - for pred, label in zip(preds, labels): - intersection = ((label == 1) & (pred == 1)).sum() - union = ((label == 1) | ((pred == 1) & (label != ignore))).sum() - if not union: - iou = EMPTY - else: - iou = float(intersection) / float(union) - ious.append(iou) - iou = mean(ious) # mean accross images if per_image - return 100 * iou - - -def iou(preds, labels, C, EMPTY=1., ignore=None, per_image=False): - """ - Array of IoU for each (non ignored) class - """ - if not per_image: - preds, labels = (preds,), (labels,) - ious = [] - for pred, label in zip(preds, labels): - iou = [] - for i in range(C): - if i != ignore: # The ignored label is sometimes among predicted classes (ENet - CityScapes) - intersection = ((label == i) & (pred == i)).sum() - union = ((label == i) | ((pred == i) & (label != ignore))).sum() - if not union: - iou.append(EMPTY) - else: - iou.append(float(intersection) / float(union)) - ious.append(iou) - ious = [mean(iou) for iou in zip(*ious)] # mean accross images if per_image - return 100 * np.array(ious) - - -# --------------------------- BINARY LOSSES --------------------------- - - -def lovasz_hinge(logits, labels, per_image=True, ignore=None): - """ - Binary Lovasz hinge loss - logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) - labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) - per_image: compute the loss per image instead of per batch - ignore: void class id - """ - if per_image: - loss = mean(lovasz_hinge_flat(*flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore)) - for log, lab in zip(logits, labels)) - else: - loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) - return loss - - -def lovasz_hinge_flat(logits, labels): - """ - Binary Lovasz hinge loss - logits: [P] Variable, logits at each prediction (between -\infty and +\infty) - labels: [P] Tensor, binary ground truth labels (0 or 1) - ignore: label to ignore - """ - if len(labels) == 0: - # only void pixels, the gradients should be 0 - return logits.sum() * 0. - signs = 2. * labels.float() - 1. - errors = (1. - logits * Variable(signs)) - errors_sorted, perm = torch.sort(errors, dim=0, descending=True) - perm = perm.data - gt_sorted = labels[perm] - grad = lovasz_grad(gt_sorted) - loss = torch.dot(F.relu(errors_sorted), Variable(grad)) - return loss - - -def flatten_binary_scores(scores, labels, ignore=None): - """ - Flattens predictions in the batch (binary case) - Remove labels equal to 'ignore' - """ - scores = scores.view(-1) - labels = labels.view(-1) - if ignore is None: - return scores, labels - valid = (labels != ignore) - vscores = scores[valid] - vlabels = labels[valid] - return vscores, vlabels - - -class StableBCELoss(torch.nn.modules.Module): - def __init__(self): - super(StableBCELoss, self).__init__() - - def forward(self, input, target): - neg_abs = - input.abs() - loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() - return loss.mean() - - -def binary_xloss(logits, labels, ignore=None): - """ - Binary Cross entropy loss - logits: [B, H, W] Variable, logits at each pixel (between -\infty and +\infty) - labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) - ignore: void class id - """ - logits, labels = flatten_binary_scores(logits, labels, ignore) - loss = StableBCELoss()(logits, Variable(labels.float())) - return loss - - -# --------------------------- MULTICLASS LOSSES --------------------------- - - -def lovasz_softmax(probas, labels, classes='present', per_image=False, ignore=255, weighted=None): - """ - Multi-class Lovasz-Softmax loss - probas: [B, C, H, W] Variable, class probabilities at each prediction (between 0 and 1). - Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. - labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - per_image: compute the loss per image instead of per batch - ignore: void class labels - """ - if per_image: - loss = mean(lovasz_softmax_flat(*flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), classes=classes, weighted=weighted) - for prob, lab in zip(probas, labels)) - else: - loss = lovasz_softmax_flat(*flatten_probas(probas, labels, ignore), classes=classes, weighted=weighted ) - return loss - - -def lovasz_softmax_flat(probas, labels, classes='present', weighted=None): - """ - Multi-class Lovasz-Softmax loss - probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) - labels: [P] Tensor, ground truth labels (between 0 and C - 1) - classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. - """ - if probas.numel() == 0: - # only void pixels, the gradients should be 0 - return probas * 0. - C = probas.size(1) - losses = [] - class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes - for c in class_to_sum: - fg = (labels == c).float() # foreground for class c - if (classes is 'present' and fg.sum() == 0): - continue - if C == 1: - if len(classes) > 1: - raise ValueError('Sigmoid output possible only with 1 class') - class_pred = probas[:, 0] - else: - class_pred = probas[:, c] - errors = (Variable(fg) - class_pred).abs() - errors_sorted, perm = torch.sort(errors, 0, descending=True) - perm = perm.data - fg_sorted = fg[perm] - if weighted is not None: - losses.append(weighted[c]*torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) - else: - losses.append(torch.dot(errors_sorted, Variable(lovasz_grad(fg_sorted)))) - return mean(losses) - - -def flatten_probas(probas, labels, ignore=None): - """ - Flattens predictions in the batch - """ - if probas.dim() == 3: - # assumes output of a sigmoid layer - B, H, W = probas.size() - probas = probas.view(B, 1, H, W) - B, C, H, W = probas.size() - probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C - labels = labels.view(-1) - if ignore is None: - return probas, labels - valid = (labels != ignore) - vprobas = probas[valid.nonzero().squeeze()] - vlabels = labels[valid] - return vprobas, vlabels - - -def xloss(logits, labels, ignore=None): - """ - Cross entropy loss - """ - return F.cross_entropy(logits, Variable(labels), ignore_index=255) - - -# --------------------------- HELPER FUNCTIONS --------------------------- -def isnan(x): - return x != x - - -def mean(l, ignore_nan=False, empty=0): - """ - nanmean compatible with generators. - """ - l = iter(l) - if ignore_nan: - l = ifilterfalse(isnan, l) - try: - n = 1 - acc = next(l) - except StopIteration: - if empty == 'raise': - raise ValueError('Empty mean') - return empty - for n, v in enumerate(l, 2): - acc += v - if n == 1: - return acc - return acc / n - -# --------------------------- Class --------------------------- -class LovaszSoftmax(nn.Module): - def __init__(self, per_image=False, ignore_index=255, weighted=None): - super(LovaszSoftmax, self).__init__() - self.lovasz_softmax = lovasz_softmax - self.per_image = per_image - self.ignore_index=ignore_index - self.weighted = weighted - - def forward(self, pred, label): - pred = F.softmax(pred, dim=1) - return self.lovasz_softmax(pred, label, per_image=self.per_image, ignore=self.ignore_index, weighted=self.weighted) \ No newline at end of file diff --git a/preprocess/humanparsing/utils/miou.py b/preprocess/humanparsing/utils/miou.py deleted file mode 100644 index 51a2cc9..0000000 --- a/preprocess/humanparsing/utils/miou.py +++ /dev/null @@ -1,155 +0,0 @@ -import cv2 -import os -import numpy as np - -from collections import OrderedDict -from PIL import Image as PILImage -from utils.transforms import transform_parsing - -LABELS = ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat', \ - 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm', 'Left-leg', - 'Right-leg', 'Left-shoe', 'Right-shoe'] - - -# LABELS = ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'] - -def get_palette(num_cls): - """ Returns the color map for visualizing the segmentation mask. - Args: - num_cls: Number of classes - Returns: - The color map - """ - - n = num_cls - palette = [0] * (n * 3) - for j in range(0, n): - lab = j - palette[j * 3 + 0] = 0 - palette[j * 3 + 1] = 0 - palette[j * 3 + 2] = 0 - i = 0 - while lab: - palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i)) - palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i)) - palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i)) - i += 1 - lab >>= 3 - return palette - - -def get_confusion_matrix(gt_label, pred_label, num_classes): - """ - Calcute the confusion matrix by given label and pred - :param gt_label: the ground truth label - :param pred_label: the pred label - :param num_classes: the nunber of class - :return: the confusion matrix - """ - index = (gt_label * num_classes + pred_label).astype('int32') - label_count = np.bincount(index) - confusion_matrix = np.zeros((num_classes, num_classes)) - - for i_label in range(num_classes): - for i_pred_label in range(num_classes): - cur_index = i_label * num_classes + i_pred_label - if cur_index < len(label_count): - confusion_matrix[i_label, i_pred_label] = label_count[cur_index] - - return confusion_matrix - - -def compute_mean_ioU(preds, scales, centers, num_classes, datadir, input_size=[473, 473], dataset='val'): - val_file = os.path.join(datadir, dataset + '_id.txt') - val_id = [i_id.strip() for i_id in open(val_file)] - - confusion_matrix = np.zeros((num_classes, num_classes)) - - for i, pred_out in enumerate(preds): - im_name = val_id[i] - gt_path = os.path.join(datadir, dataset + '_segmentations', im_name + '.png') - gt = np.array(PILImage.open(gt_path)) - h, w = gt.shape - s = scales[i] - c = centers[i] - pred = transform_parsing(pred_out, c, s, w, h, input_size) - - gt = np.asarray(gt, dtype=np.int32) - pred = np.asarray(pred, dtype=np.int32) - - ignore_index = gt != 255 - - gt = gt[ignore_index] - pred = pred[ignore_index] - - confusion_matrix += get_confusion_matrix(gt, pred, num_classes) - - pos = confusion_matrix.sum(1) - res = confusion_matrix.sum(0) - tp = np.diag(confusion_matrix) - - pixel_accuracy = (tp.sum() / pos.sum()) * 100 - mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100 - IoU_array = (tp / np.maximum(1.0, pos + res - tp)) - IoU_array = IoU_array * 100 - mean_IoU = IoU_array.mean() - print('Pixel accuracy: %f \n' % pixel_accuracy) - print('Mean accuracy: %f \n' % mean_accuracy) - print('Mean IU: %f \n' % mean_IoU) - name_value = [] - - for i, (label, iou) in enumerate(zip(LABELS, IoU_array)): - name_value.append((label, iou)) - - name_value.append(('Pixel accuracy', pixel_accuracy)) - name_value.append(('Mean accuracy', mean_accuracy)) - name_value.append(('Mean IU', mean_IoU)) - name_value = OrderedDict(name_value) - return name_value - - -def compute_mean_ioU_file(preds_dir, num_classes, datadir, dataset='val'): - list_path = os.path.join(datadir, dataset + '_id.txt') - val_id = [i_id.strip() for i_id in open(list_path)] - - confusion_matrix = np.zeros((num_classes, num_classes)) - - for i, im_name in enumerate(val_id): - gt_path = os.path.join(datadir, 'segmentations', im_name + '.png') - gt = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE) - - pred_path = os.path.join(preds_dir, im_name + '.png') - pred = np.asarray(PILImage.open(pred_path)) - - gt = np.asarray(gt, dtype=np.int32) - pred = np.asarray(pred, dtype=np.int32) - - ignore_index = gt != 255 - - gt = gt[ignore_index] - pred = pred[ignore_index] - - confusion_matrix += get_confusion_matrix(gt, pred, num_classes) - - pos = confusion_matrix.sum(1) - res = confusion_matrix.sum(0) - tp = np.diag(confusion_matrix) - - pixel_accuracy = (tp.sum() / pos.sum()) * 100 - mean_accuracy = ((tp / np.maximum(1.0, pos)).mean()) * 100 - IoU_array = (tp / np.maximum(1.0, pos + res - tp)) - IoU_array = IoU_array * 100 - mean_IoU = IoU_array.mean() - print('Pixel accuracy: %f \n' % pixel_accuracy) - print('Mean accuracy: %f \n' % mean_accuracy) - print('Mean IU: %f \n' % mean_IoU) - name_value = [] - - for i, (label, iou) in enumerate(zip(LABELS, IoU_array)): - name_value.append((label, iou)) - - name_value.append(('Pixel accuracy', pixel_accuracy)) - name_value.append(('Mean accuracy', mean_accuracy)) - name_value.append(('Mean IU', mean_IoU)) - name_value = OrderedDict(name_value) - return name_value diff --git a/preprocess/humanparsing/utils/schp.py b/preprocess/humanparsing/utils/schp.py deleted file mode 100644 index f574704..0000000 --- a/preprocess/humanparsing/utils/schp.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : schp.py -@Time : 4/8/19 2:11 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import os -import torch -import modules - -def moving_average(net1, net2, alpha=1): - for param1, param2 in zip(net1.parameters(), net2.parameters()): - param1.data *= (1.0 - alpha) - param1.data += param2.data * alpha - - -def _check_bn(module, flag): - if issubclass(module.__class__, modules.bn.InPlaceABNSync): - flag[0] = True - - -def check_bn(model): - flag = [False] - model.apply(lambda module: _check_bn(module, flag)) - return flag[0] - - -def reset_bn(module): - if issubclass(module.__class__, modules.bn.InPlaceABNSync): - module.running_mean = torch.zeros_like(module.running_mean) - module.running_var = torch.ones_like(module.running_var) - - -def _get_momenta(module, momenta): - if issubclass(module.__class__, modules.bn.InPlaceABNSync): - momenta[module] = module.momentum - - -def _set_momenta(module, momenta): - if issubclass(module.__class__, modules.bn.InPlaceABNSync): - module.momentum = momenta[module] - - -def bn_re_estimate(loader, model): - if not check_bn(model): - print('No batch norm layer detected') - return - model.train() - momenta = {} - model.apply(reset_bn) - model.apply(lambda module: _get_momenta(module, momenta)) - n = 0 - for i_iter, batch in enumerate(loader): - images, labels, _ = batch - b = images.data.size(0) - momentum = b / (n + b) - for module in momenta.keys(): - module.momentum = momentum - model(images) - n += b - model.apply(lambda module: _set_momenta(module, momenta)) - - -def save_schp_checkpoint(states, is_best_parsing, output_dir, filename='schp_checkpoint.pth.tar'): - save_path = os.path.join(output_dir, filename) - if os.path.exists(save_path): - os.remove(save_path) - torch.save(states, save_path) - if is_best_parsing and 'state_dict' in states: - best_save_path = os.path.join(output_dir, 'model_parsing_best.pth.tar') - if os.path.exists(best_save_path): - os.remove(best_save_path) - torch.save(states, best_save_path) diff --git a/preprocess/humanparsing/utils/soft_dice_loss.py b/preprocess/humanparsing/utils/soft_dice_loss.py deleted file mode 100644 index cb5895f..0000000 --- a/preprocess/humanparsing/utils/soft_dice_loss.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : soft_dice_loss.py -@Time : 8/13/19 5:09 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -from __future__ import print_function, division - -import torch -import torch.nn.functional as F -from torch import nn - -try: - from itertools import ifilterfalse -except ImportError: # py3k - from itertools import filterfalse as ifilterfalse - - -def tversky_loss(probas, labels, alpha=0.5, beta=0.5, epsilon=1e-6): - ''' - Tversky loss function. - probas: [P, C] Variable, class probabilities at each prediction (between 0 and 1) - labels: [P] Tensor, ground truth labels (between 0 and C - 1) - - Same as soft dice loss when alpha=beta=0.5. - Same as Jaccord loss when alpha=beta=1.0. - See `Tversky loss function for image segmentation using 3D fully convolutional deep networks` - https://arxiv.org/pdf/1706.05721.pdf - ''' - C = probas.size(1) - losses = [] - for c in list(range(C)): - fg = (labels == c).float() - if fg.sum() == 0: - continue - class_pred = probas[:, c] - p0 = class_pred - p1 = 1 - class_pred - g0 = fg - g1 = 1 - fg - numerator = torch.sum(p0 * g0) - denominator = numerator + alpha * torch.sum(p0 * g1) + beta * torch.sum(p1 * g0) - losses.append(1 - ((numerator) / (denominator + epsilon))) - return mean(losses) - - -def flatten_probas(probas, labels, ignore=255): - """ - Flattens predictions in the batch - """ - B, C, H, W = probas.size() - probas = probas.permute(0, 2, 3, 1).contiguous().view(-1, C) # B * H * W, C = P, C - labels = labels.view(-1) - if ignore is None: - return probas, labels - valid = (labels != ignore) - vprobas = probas[valid.nonzero().squeeze()] - vlabels = labels[valid] - return vprobas, vlabels - - -def isnan(x): - return x != x - - -def mean(l, ignore_nan=False, empty=0): - """ - nanmean compatible with generators. - """ - l = iter(l) - if ignore_nan: - l = ifilterfalse(isnan, l) - try: - n = 1 - acc = next(l) - except StopIteration: - if empty == 'raise': - raise ValueError('Empty mean') - return empty - for n, v in enumerate(l, 2): - acc += v - if n == 1: - return acc - return acc / n - - -class SoftDiceLoss(nn.Module): - def __init__(self, ignore_index=255): - super(SoftDiceLoss, self).__init__() - self.ignore_index = ignore_index - - def forward(self, pred, label): - pred = F.softmax(pred, dim=1) - return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=0.5, beta=0.5) - - -class SoftJaccordLoss(nn.Module): - def __init__(self, ignore_index=255): - super(SoftJaccordLoss, self).__init__() - self.ignore_index = ignore_index - - def forward(self, pred, label): - pred = F.softmax(pred, dim=1) - return tversky_loss(*flatten_probas(pred, label, ignore=self.ignore_index), alpha=1.0, beta=1.0) diff --git a/preprocess/humanparsing/utils/warmup_scheduler.py b/preprocess/humanparsing/utils/warmup_scheduler.py deleted file mode 100644 index 2528a9c..0000000 --- a/preprocess/humanparsing/utils/warmup_scheduler.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -*- - -""" -@Author : Peike Li -@Contact : peike.li@yahoo.com -@File : warmup_scheduler.py -@Time : 3/28/19 2:24 PM -@Desc : -@License : This source code is licensed under the license found in the - LICENSE file in the root directory of this source tree. -""" - -import math -from torch.optim.lr_scheduler import _LRScheduler - - -class GradualWarmupScheduler(_LRScheduler): - """ Gradually warm-up learning rate with cosine annealing in optimizer. - Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. - """ - - def __init__(self, optimizer, total_epoch, eta_min=0, warmup_epoch=10, last_epoch=-1): - self.total_epoch = total_epoch - self.eta_min = eta_min - self.warmup_epoch = warmup_epoch - super(GradualWarmupScheduler, self).__init__(optimizer, last_epoch) - - def get_lr(self): - if self.last_epoch <= self.warmup_epoch: - return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs] - else: - return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.total_epoch-self.warmup_epoch))) / 2 for base_lr in self.base_lrs] - - -class SGDRScheduler(_LRScheduler): - """ Consine annealing with warm up and restarts. - Proposed in `SGDR: Stochastic Gradient Descent with Warm Restarts`. - """ - def __init__(self, optimizer, total_epoch=150, start_cyclical=100, cyclical_base_lr=7e-4, cyclical_epoch=10, eta_min=0, warmup_epoch=10, last_epoch=-1): - self.total_epoch = total_epoch - self.start_cyclical = start_cyclical - self.cyclical_epoch = cyclical_epoch - self.cyclical_base_lr = cyclical_base_lr - self.eta_min = eta_min - self.warmup_epoch = warmup_epoch - super(SGDRScheduler, self).__init__(optimizer, last_epoch) - - def get_lr(self): - if self.last_epoch < self.warmup_epoch: - return [self.eta_min + self.last_epoch*(base_lr - self.eta_min)/self.warmup_epoch for base_lr in self.base_lrs] - elif self.last_epoch < self.start_cyclical: - return [self.eta_min + (base_lr-self.eta_min)*(1+math.cos(math.pi*(self.last_epoch-self.warmup_epoch)/(self.start_cyclical-self.warmup_epoch))) / 2 for base_lr in self.base_lrs] - else: - return [self.eta_min + (self.cyclical_base_lr-self.eta_min)*(1+math.cos(math.pi* ((self.last_epoch-self.start_cyclical)% self.cyclical_epoch)/self.cyclical_epoch)) / 2 for base_lr in self.base_lrs] - - -if __name__ == '__main__': - import matplotlib.pyplot as plt - import torch - model = torch.nn.Linear(10, 2) - optimizer = torch.optim.SGD(params=model.parameters(), lr=7e-3, momentum=0.9, weight_decay=5e-4) - scheduler_warmup = SGDRScheduler(optimizer, total_epoch=150, eta_min=7e-5, warmup_epoch=10, start_cyclical=100, cyclical_base_lr=3.5e-3, cyclical_epoch=10) - lr = [] - for epoch in range(0,150): - scheduler_warmup.step(epoch) - lr.append(scheduler_warmup.get_lr()) - plt.style.use('ggplot') - plt.plot(list(range(0,150)), lr) - plt.show() -